/*	$OpenBSD: ccl.c,v 1.8 2015/11/19 22:55:13 tedu Exp $	*/

/* ccl - routines for character classes */

/*  Copyright (c) 1990 The Regents of the University of California. */
/*  All rights reserved. */

/*  This code is derived from software contributed to Berkeley by */
/*  Vern Paxson. */

/*  The United States Government has rights in this work pursuant */
/*  to contract no. DE-AC03-76SF00098 between the United States */
 /* Department of Energy and the University of California. */

/*  This file is part of flex. */

/*  Redistribution and use in source and binary forms, with or without */
/*  modification, are permitted provided that the following conditions */
/*  are met: */

/*  1. Redistributions of source code must retain the above copyright */
/*     notice, this list of conditions and the following disclaimer. */
/*  2. Redistributions in binary form must reproduce the above copyright */
/*     notice, this list of conditions and the following disclaimer in the */
/*     documentation and/or other materials provided with the distribution. */

/*  Neither the name of the University nor the names of its contributors */
/*  may be used to endorse or promote products derived from this software */
/*  without specific prior written permission. */

/*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
/*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
/*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/*  PURPOSE. */

#include "flexdef.h"

/* return true if the chr is in the ccl. Takes negation into account. */
static bool
ccl_contains(const int cclp, const int ch)
{
	int ind, len, i;

	len = ccllen[cclp];
	ind = cclmap[cclp];

	for (i = 0; i < len; ++i)
		if (ccltbl[ind + i] == ch)
			return !cclng[cclp];

	return cclng[cclp];
}


/* ccladd - add a single character to a ccl */

void 
ccladd(cclp, ch)
	int cclp;
	int ch;
{
	int ind, len, newpos, i;

	check_char(ch);

	len = ccllen[cclp];
	ind = cclmap[cclp];

	/* check to see if the character is already in the ccl */

	for (i = 0; i < len; ++i)
		if (ccltbl[ind + i] == ch)
			return;

	/* mark newlines */
	if (ch == nlch)
		ccl_has_nl[cclp] = true;

	newpos = ind + len;

	if (newpos >= current_max_ccl_tbl_size) {
		current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT;

		++num_reallocs;

		ccltbl = reallocate_Character_array(ccltbl,
		    current_max_ccl_tbl_size);
	}
	ccllen[cclp] = len + 1;
	ccltbl[newpos] = ch;
}

/* dump_cclp - same thing as list_character_set, but for cclps.  */

static void 
dump_cclp(FILE * file, int cclp)
{
	int i;

	putc('[', file);

	for (i = 0; i < csize; ++i) {
		if (ccl_contains(cclp, i)) {
			int start_char = i;

			putc(' ', file);

			fputs(readable_form(i), file);

			while (++i < csize && ccl_contains(cclp, i));

			if (i - 1 > start_char)
				/* this was a run */
				fprintf(file, "-%s",
				    readable_form(i - 1));

			putc(' ', file);
		}
	}

	putc(']', file);
}



/* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */
int
ccl_set_diff(int a, int b)
{
	int d, ch;

	/* create new class  */
	d = cclinit();

	/*
	 * In order to handle negation, we spin through all possible chars,
	 * addding each char in a that is not in b. (This could be O(n^2),
	 * but n is small and bounded.)
	 */
	for (ch = 0; ch < csize; ++ch)
		if (ccl_contains(a, ch) && !ccl_contains(b, ch))
			ccladd(d, ch);

	/* debug */
	if (0) {
		fprintf(stderr, "ccl_set_diff (");
		fprintf(stderr, "\n    ");
		dump_cclp(stderr, a);
		fprintf(stderr, "\n    ");
		dump_cclp(stderr, b);
		fprintf(stderr, "\n    ");
		dump_cclp(stderr, d);
		fprintf(stderr, "\n)\n");
	}
	return d;
}

/* ccl_set_union - create a new ccl as the set union of the two given ccls. */
int
ccl_set_union(int a, int b)
{
	int d, i;

	/* create new class  */
	d = cclinit();

	/* Add all of a */
	for (i = 0; i < ccllen[a]; ++i)
		ccladd(d, ccltbl[cclmap[a] + i]);

	/* Add all of b */
	for (i = 0; i < ccllen[b]; ++i)
		ccladd(d, ccltbl[cclmap[b] + i]);

	/* debug */
	if (0) {
		fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d);
		fprintf(stderr, "\n    ");
		dump_cclp(stderr, a);
		fprintf(stderr, "\n    ");
		dump_cclp(stderr, b);
		fprintf(stderr, "\n    ");
		dump_cclp(stderr, d);
		fprintf(stderr, "\n)\n");
	}
	return d;
}


/* cclinit - return an empty ccl */

int 
cclinit()
{
	if (++lastccl >= current_maxccls) {
		current_maxccls += MAX_CCLS_INCREMENT;

		++num_reallocs;

		cclmap =
		    reallocate_integer_array(cclmap, current_maxccls);
		ccllen =
		    reallocate_integer_array(ccllen, current_maxccls);
		cclng = reallocate_integer_array(cclng, current_maxccls);
		ccl_has_nl =
		    reallocate_bool_array(ccl_has_nl,
		    current_maxccls);
	}
	if (lastccl == 1)
		/* we're making the first ccl */
		cclmap[lastccl] = 0;

	else
		/*
		 * The new pointer is just past the end of the last ccl.
		 * Since the cclmap points to the \first/ character of a ccl,
		 * adding the length of the ccl to the cclmap pointer will
		 * produce a cursor to the first free space.
		 */
		cclmap[lastccl] =
		    cclmap[lastccl - 1] + ccllen[lastccl - 1];

	ccllen[lastccl] = 0;
	cclng[lastccl] = 0;	/* ccl's start out life un-negated */
	ccl_has_nl[lastccl] = false;

	return lastccl;
}


/* cclnegate - negate the given ccl */

void 
cclnegate(cclp)
	int cclp;
{
	cclng[cclp] = 1;
	ccl_has_nl[cclp] = !ccl_has_nl[cclp];
}


/* list_character_set - list the members of a set of characters in CCL form
 *
 * Writes to the given file a character-class representation of those
 * characters present in the given CCL.  A character is present if it
 * has a non-zero value in the cset array.
 */

void 
list_character_set(file, cset)
	FILE *file;
	int cset[];
{
	int i;

	putc('[', file);

	for (i = 0; i < csize; ++i) {
		if (cset[i]) {
			int start_char = i;

			putc(' ', file);

			fputs(readable_form(i), file);

			while (++i < csize && cset[i]);

			if (i - 1 > start_char)
				/* this was a run */
				fprintf(file, "-%s",
				    readable_form(i - 1));

			putc(' ', file);
		}
	}

	putc(']', file);
}

/** Determines if the range [c1-c2] is unambiguous in a case-insensitive
 * scanner.  Specifically, if a lowercase or uppercase character, x, is in the
 * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also
 * be in the range. If not, then this range is ambiguous, and the function
 * returns false.  For example, [@-_] spans [a-z] but not [A-Z].  Beware that
 * [a-z] will be labeled ambiguous because it does not include [A-Z].
 *
 * @param c1 the lower end of the range
 * @param c2 the upper end of the range
 * @return true if [c1-c2] is not ambiguous for a caseless scanner.
 */
bool 
range_covers_case(int c1, int c2)
{
	int i, o;

	for (i = c1; i <= c2; i++) {
		if (has_case(i)) {
			o = reverse_case(i);
			if (o < c1 || c2 < o)
				return false;
		}
	}
	return true;
}

/** Reverse the case of a character, if possible.
 * @return c if case-reversal does not apply.
 */
int 
reverse_case(int c)
{
	return isupper(c) ? tolower(c) : (islower(c) ? toupper(c) : c);
}

/** Return true if c is uppercase or lowercase. */
bool 
has_case(int c)
{
	return (isupper(c) || islower(c)) ? true : false;
}