src - OpenBSD base system

diff options


context:
space:
mode:

author	Ted Unangst <tedu@cvs.openbsd.org>	2015-11-19 19:43:41 +0000
committer	Ted Unangst <tedu@cvs.openbsd.org>	2015-11-19 19:43:41 +0000
commit	3ed71134b00d13f674ae555d489ed3f7c262c849 (patch)
tree	5366e49076abb45d59627f9ccce444d886065259 /usr.bin/lex/ccl.c
parent	77d522ada02becf96f1a46c2dfc83c03dc2d48dc (diff)

Update flex from ancient 2.5.4 to the recent 2.5.39.

This work was all done by Serguey Parkhomovsky. Thanks. Some changes from upstream: * Removed autotools cruft/localization/texinfo manual/etc * Kept the old manpage, as the new manpage is content-free * Used safe string handling functions and fixed several compiler warnings * pledge(2). Flex 2.5.39 now forks/execs its filter chains and needs proc exec in addition to what was previously pledged * Removed register keyword from all variable declarations * renamed parse.c, parse.h, scan.c, skel.c with init prefix so compiling flex outside of obj by accident wouldn't clobber the bootstrap files * Minor fixes (spelling, accessing buf[strlen(buf) - 1] for zero-length strings in initscan.c/scan.l, etc) that were already in our tree This is a huge change, so it's going in the tree code bomb style. I'm not excited about the growth in complexity (like now running m4 to _post_ process lexers) but hopefully this will be a one time update and we will now take "ownership" of the code.

Diffstat (limited to 'usr.bin/lex/ccl.c')

-rw-r--r--

usr.bin/lex/ccl.c

312

1 files changed, 234 insertions, 78 deletions

diff --git a/usr.bin/lex/ccl.c b/usr.bin/lex/ccl.c
index 5226940320f..628edcac6ab 100644
--- a/usr.bin/lex/ccl.c
+++ b/usr.bin/lex/ccl.c

@@ -1,94 +1,210 @@

-/* $OpenBSD: ccl.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */

+/* $OpenBSD: ccl.c,v 1.7 2015/11/19 19:43:40 tedu Exp $ */

/* ccl - routines for character classes */

-/*-

- *

- * This code is derived from software contributed to Berkeley by

- * Vern Paxson.

- *

- * The United States Government has rights in this work pursuant

- * to contract no. DE-AC03-76SF00098 between the United States

- * Department of Energy and the University of California.

- *

- * Redistribution and use in source and binary forms, with or without

- * modification, are permitted provided that the following conditions

- * are met:

- *

- * 1. Redistributions of source code must retain the above copyright

- * notice, this list of conditions and the following disclaimer.

- * 2. Redistributions in binary form must reproduce the above copyright

- * notice, this list of conditions and the following disclaimer in the

- * documentation and/or other materials provided with the distribution.

- *

- * Neither the name of the University nor the names of its contributors

- * may be used to endorse or promote products derived from this software

- * without specific prior written permission.

- *

- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR

- * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED

- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

- * PURPOSE.

- */

+/* This code is derived from software contributed to Berkeley by */

+/* Vern Paxson. */

+/* The United States Government has rights in this work pursuant */

+/* to contract no. DE-AC03-76SF00098 between the United States */

+ /* Department of Energy and the University of California. */

+/* This file is part of flex. */

+/* Redistribution and use in source and binary forms, with or without */

+/* modification, are permitted provided that the following conditions */

+/* are met: */

+/* 1. Redistributions of source code must retain the above copyright */

+/* notice, this list of conditions and the following disclaimer. */

+/* 2. Redistributions in binary form must reproduce the above copyright */

+/* notice, this list of conditions and the following disclaimer in the */

+/* documentation and/or other materials provided with the distribution. */

+/* Neither the name of the University nor the names of its contributors */

+/* may be used to endorse or promote products derived from this software */

+/* without specific prior written permission. */

-/* $Header: /cvs/OpenBSD/src/usr.bin/lex/ccl.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */

+/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */

+/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */

+/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */

+/* PURPOSE. */

#include "flexdef.h"

+/* return true if the chr is in the ccl. Takes negation into account. */

+static bool

+ccl_contains (const int cclp, const int ch)

+ int ind, len, i;

+ len = ccllen[cclp];

+ ind = cclmap[cclp];

+ for (i = 0; i < len; ++i)

+ if (ccltbl[ind + i] == ch)

+ return !cclng[cclp];

+ return cclng[cclp];

/* ccladd - add a single character to a ccl */

-void ccladd( cclp, ch )

-int cclp;

-int ch;

- {

- int ind, len, newpos, i;

+void ccladd (cclp, ch)

+ int cclp;

+ int ch;

+ int ind, len, newpos, i;

- check_char( ch );

+ check_char (ch);

len = ccllen[cclp];

ind = cclmap[cclp];

/* check to see if the character is already in the ccl */

- for ( i = 0; i < len; ++i )

- if ( ccltbl[ind + i] == ch )

+ for (i = 0; i < len; ++i)

+ if (ccltbl[ind + i] == ch)

return;

+ /* mark newlines */

+ if (ch == nlch)

+ ccl_has_nl[cclp] = true;

newpos = ind + len;

- if ( newpos >= current_max_ccl_tbl_size )

- {

+ if (newpos >= current_max_ccl_tbl_size) {

current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;

++num_reallocs;

- ccltbl = reallocate_Character_array( ccltbl,

- current_max_ccl_tbl_size );

- }

+ ccltbl = reallocate_Character_array (ccltbl,

+ current_max_ccl_tbl_size);

+ }

ccllen[cclp] = len + 1;

ccltbl[newpos] = ch;

+/* dump_cclp - same thing as list_character_set, but for cclps. */

+static void dump_cclp (FILE* file, int cclp)

+ int i;

+ putc ('[', file);

+ for (i = 0; i < csize; ++i) {

+ if (ccl_contains(cclp, i)){

+ int start_char = i;

+ putc (' ', file);

+ fputs (readable_form (i), file);

+ while (++i < csize && ccl_contains(cclp,i)) ;

+ if (i - 1 > start_char)

+ /* this was a run */

+ fprintf (file, "-%s",

+ readable_form (i - 1));

+ putc (' ', file);

+ }

}

+ putc (']', file);

+/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */

+int

+ccl_set_diff (int a, int b)

+ int d, ch;

+ /* create new class */

+ d = cclinit();

+ /* In order to handle negation, we spin through all possible chars,

+ * addding each char in a that is not in b.

+ * (This could be O(n^2), but n is small and bounded.)

+ */

+ for ( ch = 0; ch < csize; ++ch )

+ if (ccl_contains (a, ch) && !ccl_contains(b, ch))

+ ccladd (d, ch);

+ /* debug */

+ if (0){

+ fprintf(stderr, "ccl_set_diff (");

+ fprintf(stderr, "\n ");

+ dump_cclp (stderr, a);

+ fprintf(stderr, "\n ");

+ dump_cclp (stderr, b);

+ fprintf(stderr, "\n ");

+ dump_cclp (stderr, d);

+ fprintf(stderr, "\n)\n");

+ }

+ return d;

+/* ccl_set_union - create a new ccl as the set union of the two given ccls. */

+int

+ccl_set_union (int a, int b)

+ int d, i;

+ /* create new class */

+ d = cclinit();

+ /* Add all of a */

+ for (i = 0; i < ccllen[a]; ++i)

+ ccladd (d, ccltbl[cclmap[a] + i]);

+ /* Add all of b */

+ for (i = 0; i < ccllen[b]; ++i)

+ ccladd (d, ccltbl[cclmap[b] + i]);

+ /* debug */

+ if (0){

+ fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);

+ fprintf(stderr, "\n ");

+ dump_cclp (stderr, a);

+ fprintf(stderr, "\n ");

+ dump_cclp (stderr, b);

+ fprintf(stderr, "\n ");

+ dump_cclp (stderr, d);

+ fprintf(stderr, "\n)\n");

+ }

+ return d;

/* cclinit - return an empty ccl */

-int cclinit()

- {

- if ( ++lastccl >= current_maxccls )

- {

+int cclinit ()

+ if (++lastccl >= current_maxccls) {

current_maxccls += MAX_CCLS_INCREMENT;

++num_reallocs;

- cclmap = reallocate_integer_array( cclmap, current_maxccls );

- ccllen = reallocate_integer_array( ccllen, current_maxccls );

- cclng = reallocate_integer_array( cclng, current_maxccls );

- }

+ cclmap =

+ reallocate_integer_array (cclmap, current_maxccls);

+ ccllen =

+ reallocate_integer_array (ccllen, current_maxccls);

+ cclng = reallocate_integer_array (cclng, current_maxccls);

+ ccl_has_nl =

+ reallocate_bool_array (ccl_has_nl,

+ current_maxccls);

+ }

- if ( lastccl == 1 )

+ if (lastccl == 1)

/* we're making the first ccl */

cclmap[lastccl] = 0;

@@ -98,22 +214,25 @@ int cclinit()

* ccl, adding the length of the ccl to the cclmap pointer

* will produce a cursor to the first free space.

- cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1];

+ cclmap[lastccl] =

+ cclmap[lastccl - 1] + ccllen[lastccl - 1];

ccllen[lastccl] = 0;

cclng[lastccl] = 0; /* ccl's start out life un-negated */

+ ccl_has_nl[lastccl] = false;

return lastccl;

- }

/* cclnegate - negate the given ccl */

-void cclnegate( cclp )

-int cclp;

- {

+void cclnegate (cclp)

+ int cclp;

cclng[cclp] = 1;

- }

+ ccl_has_nl[cclp] = !ccl_has_nl[cclp];

/* list_character_set - list the members of a set of characters in CCL form

@@ -123,34 +242,71 @@ int cclp;

* has a non-zero value in the cset array.

-void list_character_set( file, cset )

-FILE *file;

-int cset[];

- {

+void list_character_set (file, cset)

+ FILE *file;

+ int cset[];

int i;

- putc( '[', file );

+ putc ('[', file);

- for ( i = 0; i < csize; ++i )

- {

- if ( cset[i] )

- {

+ for (i = 0; i < csize; ++i) {

+ if (cset[i]) {

int start_char = i;

- putc( ' ', file );

+ putc (' ', file);

- fputs( readable_form( i ), file );

+ fputs (readable_form (i), file);

- while ( ++i < csize && cset[i] )

- ;

+ while (++i < csize && cset[i]) ;

- if ( i - 1 > start_char )

+ if (i - 1 > start_char)

/* this was a run */

- fprintf( file, "-%s", readable_form( i - 1 ) );

+ fprintf (file, "-%s",

+ readable_form (i - 1));

- putc( ' ', file );

- }

+ putc (' ', file);

}

+ }

+ putc (']', file);

- putc( ']', file );

+/** Determines if the range [c1-c2] is unambiguous in a case-insensitive

+ * scanner. Specifically, if a lowercase or uppercase character, x, is in the

+ * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also

+ * be in the range. If not, then this range is ambiguous, and the function

+ * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that

+ * [a-z] will be labeled ambiguous because it does not include [A-Z].

+ *

+ * @param c1 the lower end of the range

+ * @param c2 the upper end of the range

+ * @return true if [c1-c2] is not ambiguous for a caseless scanner.

+ */

+bool range_covers_case (int c1, int c2)

+ int i, o;

+ for (i = c1; i <= c2; i++) {

+ if (has_case (i)) {

+ o = reverse_case (i);

+ if (o < c1 || c2 < o)

+ return false;

+ }

}

+ return true;

+/** Reverse the case of a character, if possible.

+ * @return c if case-reversal does not apply.

+ */

+int reverse_case (int c)

+ return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c);

+/** Return true if c is uppercase or lowercase. */

+bool has_case (int c)

+ return (isupper (c) || islower (c)) ? true : false;