summaryrefslogtreecommitdiff
path: root/usr.bin/lex/ccl.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/lex/ccl.c')
-rw-r--r--usr.bin/lex/ccl.c312
1 files changed, 234 insertions, 78 deletions
diff --git a/usr.bin/lex/ccl.c b/usr.bin/lex/ccl.c
index 5226940320f..628edcac6ab 100644
--- a/usr.bin/lex/ccl.c
+++ b/usr.bin/lex/ccl.c
@@ -1,94 +1,210 @@
-/* $OpenBSD: ccl.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */
+/* $OpenBSD: ccl.c,v 1.7 2015/11/19 19:43:40 tedu Exp $ */
/* ccl - routines for character classes */
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Vern Paxson.
- *
- * The United States Government has rights in this work pursuant
- * to contract no. DE-AC03-76SF00098 between the United States
- * Department of Energy and the University of California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE.
- */
+/* Copyright (c) 1990 The Regents of the University of California. */
+/* All rights reserved. */
+
+/* This code is derived from software contributed to Berkeley by */
+/* Vern Paxson. */
+
+/* The United States Government has rights in this work pursuant */
+/* to contract no. DE-AC03-76SF00098 between the United States */
+ /* Department of Energy and the University of California. */
+
+/* This file is part of flex. */
+
+/* Redistribution and use in source and binary forms, with or without */
+/* modification, are permitted provided that the following conditions */
+/* are met: */
+
+/* 1. Redistributions of source code must retain the above copyright */
+/* notice, this list of conditions and the following disclaimer. */
+/* 2. Redistributions in binary form must reproduce the above copyright */
+/* notice, this list of conditions and the following disclaimer in the */
+/* documentation and/or other materials provided with the distribution. */
+
+/* Neither the name of the University nor the names of its contributors */
+/* may be used to endorse or promote products derived from this software */
+/* without specific prior written permission. */
-/* $Header: /cvs/OpenBSD/src/usr.bin/lex/ccl.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */
+/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
+/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
+/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
+/* PURPOSE. */
#include "flexdef.h"
+/* return true if the chr is in the ccl. Takes negation into account. */
+static bool
+ccl_contains (const int cclp, const int ch)
+{
+ int ind, len, i;
+
+ len = ccllen[cclp];
+ ind = cclmap[cclp];
+
+ for (i = 0; i < len; ++i)
+ if (ccltbl[ind + i] == ch)
+ return !cclng[cclp];
+
+ return cclng[cclp];
+}
+
+
/* ccladd - add a single character to a ccl */
-void ccladd( cclp, ch )
-int cclp;
-int ch;
- {
- int ind, len, newpos, i;
+void ccladd (cclp, ch)
+ int cclp;
+ int ch;
+{
+ int ind, len, newpos, i;
- check_char( ch );
+ check_char (ch);
len = ccllen[cclp];
ind = cclmap[cclp];
/* check to see if the character is already in the ccl */
- for ( i = 0; i < len; ++i )
- if ( ccltbl[ind + i] == ch )
+ for (i = 0; i < len; ++i)
+ if (ccltbl[ind + i] == ch)
return;
+ /* mark newlines */
+ if (ch == nlch)
+ ccl_has_nl[cclp] = true;
+
newpos = ind + len;
- if ( newpos >= current_max_ccl_tbl_size )
- {
+ if (newpos >= current_max_ccl_tbl_size) {
current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;
++num_reallocs;
- ccltbl = reallocate_Character_array( ccltbl,
- current_max_ccl_tbl_size );
- }
+ ccltbl = reallocate_Character_array (ccltbl,
+ current_max_ccl_tbl_size);
+ }
ccllen[cclp] = len + 1;
ccltbl[newpos] = ch;
+}
+
+/* dump_cclp - same thing as list_character_set, but for cclps. */
+
+static void dump_cclp (FILE* file, int cclp)
+{
+ int i;
+
+ putc ('[', file);
+
+ for (i = 0; i < csize; ++i) {
+ if (ccl_contains(cclp, i)){
+ int start_char = i;
+
+ putc (' ', file);
+
+ fputs (readable_form (i), file);
+
+ while (++i < csize && ccl_contains(cclp,i)) ;
+
+ if (i - 1 > start_char)
+ /* this was a run */
+ fprintf (file, "-%s",
+ readable_form (i - 1));
+
+ putc (' ', file);
+ }
}
+ putc (']', file);
+}
+
+
+
+/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
+int
+ccl_set_diff (int a, int b)
+{
+ int d, ch;
+
+ /* create new class */
+ d = cclinit();
+
+ /* In order to handle negation, we spin through all possible chars,
+ * addding each char in a that is not in b.
+ * (This could be O(n^2), but n is small and bounded.)
+ */
+ for ( ch = 0; ch < csize; ++ch )
+ if (ccl_contains (a, ch) && !ccl_contains(b, ch))
+ ccladd (d, ch);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_diff (");
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
+/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
+int
+ccl_set_union (int a, int b)
+{
+ int d, i;
+
+ /* create new class */
+ d = cclinit();
+
+ /* Add all of a */
+ for (i = 0; i < ccllen[a]; ++i)
+ ccladd (d, ccltbl[cclmap[a] + i]);
+
+ /* Add all of b */
+ for (i = 0; i < ccllen[b]; ++i)
+ ccladd (d, ccltbl[cclmap[b] + i]);
+
+ /* debug */
+ if (0){
+ fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, a);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, b);
+ fprintf(stderr, "\n ");
+ dump_cclp (stderr, d);
+ fprintf(stderr, "\n)\n");
+ }
+ return d;
+}
+
/* cclinit - return an empty ccl */
-int cclinit()
- {
- if ( ++lastccl >= current_maxccls )
- {
+int cclinit ()
+{
+ if (++lastccl >= current_maxccls) {
current_maxccls += MAX_CCLS_INCREMENT;
++num_reallocs;
- cclmap = reallocate_integer_array( cclmap, current_maxccls );
- ccllen = reallocate_integer_array( ccllen, current_maxccls );
- cclng = reallocate_integer_array( cclng, current_maxccls );
- }
+ cclmap =
+ reallocate_integer_array (cclmap, current_maxccls);
+ ccllen =
+ reallocate_integer_array (ccllen, current_maxccls);
+ cclng = reallocate_integer_array (cclng, current_maxccls);
+ ccl_has_nl =
+ reallocate_bool_array (ccl_has_nl,
+ current_maxccls);
+ }
- if ( lastccl == 1 )
+ if (lastccl == 1)
/* we're making the first ccl */
cclmap[lastccl] = 0;
@@ -98,22 +214,25 @@ int cclinit()
* ccl, adding the length of the ccl to the cclmap pointer
* will produce a cursor to the first free space.
*/
- cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];
+ cclmap[lastccl] =
+ cclmap[lastccl - 1] + ccllen[lastccl - 1];
ccllen[lastccl] = 0;
cclng[lastccl] = 0; /* ccl's start out life un-negated */
+ ccl_has_nl[lastccl] = false;
return lastccl;
- }
+}
/* cclnegate - negate the given ccl */
-void cclnegate( cclp )
-int cclp;
- {
+void cclnegate (cclp)
+ int cclp;
+{
cclng[cclp] = 1;
- }
+ ccl_has_nl[cclp] = !ccl_has_nl[cclp];
+}
/* list_character_set - list the members of a set of characters in CCL form
@@ -123,34 +242,71 @@ int cclp;
* has a non-zero value in the cset array.
*/
-void list_character_set( file, cset )
-FILE *file;
-int cset[];
- {
+void list_character_set (file, cset)
+ FILE *file;
+ int cset[];
+{
int i;
- putc( '[', file );
+ putc ('[', file);
- for ( i = 0; i < csize; ++i )
- {
- if ( cset[i] )
- {
+ for (i = 0; i < csize; ++i) {
+ if (cset[i]) {
int start_char = i;
- putc( ' ', file );
+ putc (' ', file);
- fputs( readable_form( i ), file );
+ fputs (readable_form (i), file);
- while ( ++i < csize && cset[i] )
- ;
+ while (++i < csize && cset[i]) ;
- if ( i - 1 > start_char )
+ if (i - 1 > start_char)
/* this was a run */
- fprintf( file, "-%s", readable_form( i - 1 ) );
+ fprintf (file, "-%s",
+ readable_form (i - 1));
- putc( ' ', file );
- }
+ putc (' ', file);
}
+ }
+
+ putc (']', file);
+}
- putc( ']', file );
+/** Determines if the range [c1-c2] is unambiguous in a case-insensitive
+ * scanner. Specifically, if a lowercase or uppercase character, x, is in the
+ * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
+ * be in the range. If not, then this range is ambiguous, and the function
+ * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that
+ * [a-z] will be labeled ambiguous because it does not include [A-Z].
+ *
+ * @param c1 the lower end of the range
+ * @param c2 the upper end of the range
+ * @return true if [c1-c2] is not ambiguous for a caseless scanner.
+ */
+bool range_covers_case (int c1, int c2)
+{
+ int i, o;
+
+ for (i = c1; i <= c2; i++) {
+ if (has_case (i)) {
+ o = reverse_case (i);
+ if (o < c1 || c2 < o)
+ return false;
+ }
}
+ return true;
+}
+
+/** Reverse the case of a character, if possible.
+ * @return c if case-reversal does not apply.
+ */
+int reverse_case (int c)
+{
+ return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);
+}
+
+/** Return true if c is uppercase or lowercase. */
+bool has_case (int c)
+{
+ return (isupper (c) || islower (c)) ? true : false;
+}