diff options
-rw-r--r-- | usr.bin/mklocale/Makefile | 16 | ||||
-rw-r--r-- | usr.bin/mklocale/ldef.h | 51 | ||||
-rw-r--r-- | usr.bin/mklocale/lex.l | 171 | ||||
-rw-r--r-- | usr.bin/mklocale/mklocale.1 | 282 | ||||
-rw-r--r-- | usr.bin/mklocale/yacc.y | 857 |
5 files changed, 1377 insertions, 0 deletions
diff --git a/usr.bin/mklocale/Makefile b/usr.bin/mklocale/Makefile new file mode 100644 index 00000000000..a30e04c221a --- /dev/null +++ b/usr.bin/mklocale/Makefile @@ -0,0 +1,16 @@ +# $NetBSD: Makefile,v 1.11 2003/08/01 17:04:04 lukem Exp $ +# $OpenBSD: Makefile,v 1.1 2005/08/07 10:06:25 espie Exp $ + +.include <bsd.own.mk> + +PROG= mklocale +SRCS= yacc.y lex.l +CPPFLAGS+= -I. -I${.CURDIR} -I${.CURDIR}/../../lib/libc -I${.CURDIR}/../../lib/libc/include +YHEADER= 1 + +yacc.c yacc.h: yacc.y + ${YACC} -d ${.ALLSRC} && mv y.tab.c yacc.c && mv y.tab.h yacc.h + + +CLEANFILES+= yacc.c yacc.h +.include <bsd.prog.mk> diff --git a/usr.bin/mklocale/ldef.h b/usr.bin/mklocale/ldef.h new file mode 100644 index 00000000000..2431a870a75 --- /dev/null +++ b/usr.bin/mklocale/ldef.h @@ -0,0 +1,51 @@ +/* $NetBSD: ldef.h,v 1.4 2003/08/07 11:15:14 agc Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ldef.h 8.1 (Berkeley) 6/6/93 + */ + +/* + * This should look a LOT like a _RuneEntry + */ +typedef struct rune_list { + rune_t min; + rune_t max; + rune_t map; + u_int32_t *types; + struct rune_list *next; +} rune_list; + +typedef struct rune_map { + u_int32_t map[_CACHED_RUNES]; + rune_list *root; +} rune_map; diff --git a/usr.bin/mklocale/lex.l b/usr.bin/mklocale/lex.l new file mode 100644 index 00000000000..ab56f2ed96c --- /dev/null +++ b/usr.bin/mklocale/lex.l @@ -0,0 +1,171 @@ +/* $NetBSD: lex.l,v 1.13 2003/10/27 00:12:43 lukem Exp $ */ + +%{ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "locale/runetype.h" +#include <stdio.h> +#include <stdlib.h> + +#include "ldef.h" +#include "yacc.h" + +int yylex(void); +%} + +ODIGIT [0-7] +DIGIT [0-9] +XDIGIT [0-9a-fA-F] +W [\t\n\r ] + +%% +\'.\' { yylval.rune = (unsigned char)yytext[1]; + return(RUNE); } + +'\\a' { yylval.rune = '\a'; + return(RUNE); } +'\\b' { yylval.rune = '\b'; + return(RUNE); } +'\\f' { yylval.rune = '\f'; + return(RUNE); } +'\\n' { yylval.rune = '\n'; + return(RUNE); } +'\\r' { yylval.rune = '\r'; + return(RUNE); } +'\\t' { yylval.rune = '\t'; + return(RUNE); } +'\\v' { yylval.rune = '\v'; + return(RUNE); } + +0x{XDIGIT}+ { yylval.rune = strtoul(yytext, 0, 16); + return(RUNE); } +0{ODIGIT}+ { yylval.rune = strtoul(yytext, 0, 8); + return(RUNE); } +{DIGIT}+ { yylval.rune = strtoul(yytext, 0, 10); + return(RUNE); } + + +MAPLOWER { return(MAPLOWER); } +MAPUPPER { return(MAPUPPER); } +TODIGIT { return(DIGITMAP); } +INVALID { return(INVALID); } + +ALPHA { yylval.i = _RUNETYPE_A|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +CONTROL { yylval.i = _RUNETYPE_C; + return(LIST); } +DIGIT { yylval.i = _RUNETYPE_D|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +GRAPH { yylval.i = _RUNETYPE_G|_RUNETYPE_R; + return(LIST); } +LOWER { yylval.i = _RUNETYPE_L|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +PUNCT { yylval.i = _RUNETYPE_P|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +SPACE { yylval.i = _RUNETYPE_S; + return(LIST); } +UPPER { yylval.i = _RUNETYPE_U|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +XDIGIT { yylval.i = _RUNETYPE_X|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +BLANK { yylval.i = _RUNETYPE_B; + return(LIST); } +PRINT { yylval.i = _RUNETYPE_R; + return(LIST); } +IDEOGRAM { yylval.i = _RUNETYPE_I|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +SPECIAL { yylval.i = _RUNETYPE_T|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +PHONOGRAM { yylval.i = _RUNETYPE_Q|_RUNETYPE_R|_RUNETYPE_G; + return(LIST); } +SWIDTH0 { yylval.i = _RUNETYPE_SW0; return(LIST); } +SWIDTH1 { yylval.i = _RUNETYPE_SW1; return(LIST); } +SWIDTH2 { yylval.i = _RUNETYPE_SW2; return(LIST); } +SWIDTH3 { yylval.i = _RUNETYPE_SW3; return(LIST); } + +VARIABLE[\t ] { static char vbuf[1024]; + char *v = vbuf; + while ((*v = input()) && *v != '\n') + ++v; + if (*v) { + unput(*v); + *v = 0; + } + yylval.str = vbuf; + return(VARIABLE); + } + +CHARSET { return(CHARSET); } + +ENCODING { return(ENCODING); } + +\".*\" { char *e = yytext + 1; + yylval.str = e; + while (*e && *e != '"') + ++e; + *e = 0; + return(STRING); } + +\<|\(|\[ { return(LBRK); } + +\>|\)|\] { return(RBRK); } + +\- { return(THRU); } +\.\.\. { return(THRU); } + +\: { return(':'); } + +{W}+ ; + +^\#.*\n ; +\/\* { char lc = 0; + do { + while ((lc) != '*') + if ((lc = input()) == 0) + break; + } while((lc = input()) != '/'); + } + +\\$ ; +. { printf("Lex is skipping '%s'\n", yytext); } +%% + +#if !defined(yywrap) +int +yywrap() +{ + return(1); +} +#endif diff --git a/usr.bin/mklocale/mklocale.1 b/usr.bin/mklocale/mklocale.1 new file mode 100644 index 00000000000..8b8eb413e40 --- /dev/null +++ b/usr.bin/mklocale/mklocale.1 @@ -0,0 +1,282 @@ +.\" $NetBSD: mklocale.1,v 1.13 2004/01/24 17:03:26 wiz Exp $ +.\" FreeBSD: src/usr.bin/mklocale/mklocale.1,v 1.6 1999/09/20 09:15:21 phantom Exp +.\" +.\" Copyright (c) 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Paul Borman at Krystal Technologies. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)mklocale.1 8.2 (Berkeley) 4/18/94 +.\" +.Dd April 18, 1994 +.Dt MKLOCALE 1 +.Os +.Sh NAME +.Nm mklocale +.Nd make LC_CTYPE locale files +.Sh SYNOPSIS +.Nm mklocale +.Op Fl d +\*[Lt] +.Ar src-file +\*[Gt] +.Ar language/LC_CTYPE +.Nm mklocale +.Op Fl d +.Fl o +.Ar language/LC_CTYPE +.Ar src-file +.Sh DESCRIPTION +The +.Nm mklocale +utility reads an +.Dv LC_CTYPE +source file from standard input and produces an +.Dv LC_CTYPE +binary file on standard output suitable for placement in +.Pa /usr/share/locale/\*[Lt]language\*[Gt]/LC_CTYPE . +.Pp +The format of +.Ar src-file +is quite simple. +It consists of a series of lines which start with a keyword and have +associated data following. +C style comments are used +to place comments in the file. +.Pp +Following options are available: +.Bl -tag -width XXX +.It Fl d +Turns on debugging messages. +.It Fl o +Specify output file. +.El +.Pp +Besides the keywords which will be listed below, +the following are valid tokens in +.Ar src-file : +.Bl -tag -width literal +.It Dv RUNE +A +.Dv RUNE +may be any of the following: +.Bl -tag -width 0x[0-9a-z]* +.It Ar 'x' +The ASCII character +.Ar x . +.It Ar '\ex' +The ANSI C character +.Ar \ex +where +.Ar \ex +is one of +.Dv \ea , +.Dv \eb , +.Dv \ef , +.Dv \en , +.Dv \er , +.Dv \et , +or +.Dv \ev . +.It Ar 0x[0-9a-z]* +A hexadecimal number representing a rune code. +.It Ar 0[0-7]* +An octal number representing a rune code. +.It Ar [1-9][0-9]* +A decimal number representing a rune code. +.El +.It Dv STRING +A string enclosed in double quotes ("). +.It Dv THRU +Either +.Dv ... +or +.Dv - . +Used to indicate ranges. +.It Ar literal +The follow characters are taken literally: +.Bl -tag -width "\*[Lt]\|\|(\|\|[" +.It Dv "\*[Lt]\|(\|[" +Used to start a mapping. +All are equivalent. +.It Dv "\*[Gt]\|\^)\|]" +Used to end a mapping. +All are equivalent. +.It Dv : +Used as a delimiter in mappings. +.El +.El +.Pp +Key words which should only appear once are: +.Bl -tag -width PHONOGRAM +.It Dv ENCODING +Followed by a +.Dv STRING +which indicates the encoding mechanism to be used for this locale. +The current encodings are: +.Bl -tag -width NONE +.It Dv NONE +No translation and the default. +.It Dv UTF2 +.Dv "Universal character set Transformation Format" +adopted from +.Nm "Plan 9 from Bell Labs" . +.It Dv EUC +.Dv EUC +encoding as used by several +vendors of +.Ux +systems. +.El +.It Dv VARIABLE +This keyword must be followed by a single tab or space character, +after which encoding specific data is placed. +Currently only the +.Dv "EUC" +encoding requires variable data. +.\" See +.\" .Xr euc 4 +.\" for further details. +.It Dv INVALID +A single +.Dv RUNE +follows and is used as the invalid rune for this locale. +.El +.Pp +The following keywords may appear multiple times and have the following +format for data: +.in +.5i +.Bl -tag -width "XXRUNE1 THRU RUNEn : RUNE2XX" +.It Aq Dv RUNE1 RUNE2 +.Dv RUNE1 +is mapped to +.Dv RUNE2 . +.It Aq Dv RUNE1 THRU RUNEn : RUNE2 +Runes +.Dv RUNE1 +through +.Dv RUNEn +are mapped to +.Dv RUNE2 +through +.Dv RUNE2 ++ n-1. +.El +.in -.5i +.Bl -tag -width PHONOGRAM +.It Dv MAPLOWER +Defines the tolower mappings. +.Dv RUNE2 +is the lower case representation of +.Dv RUNE1 . +.It Dv MAPUPPER +Defines the toupper mappings. +.Dv RUNE2 +is the upper case representation of +.Dv RUNE1 . +.It Dv TODIGIT +Defines a map from runes to their digit value. +.Dv RUNE2 +is the integer value represented by +.Dv RUNE1 . +For example, the ASCII character +.Sq 0 +would map to the decimal value 0. +Only values up to 255 are allowed. +.El +.Pp +The following keywords may appear multiple times and have the following +format for data: +.Bl -tag -width "RUNE1 THRU RUNEn" +.It Dv RUNE +This rune has the property defined by the keyword. +.It Dv "RUNE1 THRU RUNEn" +All the runes between and including +.Dv RUNE1 +and +.Dv RUNEn +have the property defined by the keyword. +.El +.Bl -tag -width PHONOGRAM +.It Dv ALPHA +Defines runes which are alphabetic, printable, and graphic. +.It Dv CONTROL +Defines runes which are control characters. +.It Dv DIGIT +Defines runes which are decimal digits, printable, and graphic. +.It Dv GRAPH +Defines runes which are graphic and printable. +.It Dv LOWER +Defines runes which are lower case, printable, and graphic. +.It Dv PUNCT +Defines runes which are punctuation, printable, and graphic. +.It Dv SPACE +Defines runes which are spaces. +.It Dv UPPER +Defines runes which are upper case, printable, and graphic. +.It Dv XDIGIT +Defines runes which are hexadecimal digits, printable, and graphic. +.It Dv BLANK +Defines runes which are blank. +.It Dv PRINT +Defines runes which are printable. +.It Dv IDEOGRAM +Defines runes which are ideograms, printable, and graphic. +.It Dv SPECIAL +Defines runes which are special characters, printable, and graphic. +.It Dv PHONOGRAM +Defines runes which are phonograms, printable, and graphic. +.It Dv SWIDTHn +Defines runes with specific glyph width. +.Ar n +takes 0 to 3. +.It Dv CHARSET +Controls character set for subsequent runes. +.\" To support +.\" .Xr iso2022 4 +.\" locale definitions. +.El +.Sh SEE ALSO +.\"Xr colldef 1 , +.Xr setlocale 3 , +.\" .Xr euc 4 , +.\" .Xr utf8 4 +.Xr nls 7 +.Sh HISTORY +The +.Nm mklocale +utility first appeared in +.Bx 4.4 . +.Sh BUGS +The +.Nm mklocale +utility is overly simplistic. +.Pp +We should switch to +.Nm localedef +and its file format, which is more standard. diff --git a/usr.bin/mklocale/yacc.y b/usr.bin/mklocale/yacc.y new file mode 100644 index 00000000000..36cf0fc25b5 --- /dev/null +++ b/usr.bin/mklocale/yacc.y @@ -0,0 +1,857 @@ +/* $NetBSD: yacc.y,v 1.24 2004/01/05 23:23:36 jmmv Exp $ */ + +%{ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/types.h> +#include <netinet/in.h> /* Needed by <arpa/inet.h> on NetBSD 1.5. */ +#include <arpa/inet.h> /* Needed for htonl on POSIX systems. */ + +#include <err.h> +#include "locale/runetype.h" +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> + +#include "ldef.h" + +const char *locale_file = "<stdout>"; + +rune_map maplower = { { 0, }, }; +rune_map mapupper = { { 0, }, }; +rune_map types = { { 0, }, }; + +_RuneLocale new_locale = { { 0, }, }; + +rune_t charsetbits = (rune_t)0x00000000; +#if 0 +rune_t charsetmask = (rune_t)0x0000007f; +#endif +rune_t charsetmask = (rune_t)0xffffffff; + +void set_map(rune_map *, rune_list *, u_int32_t); +void set_digitmap(rune_map *, rune_list *); +void add_map(rune_map *, rune_list *, u_int32_t); + +int main(int, char *[]); +int yyerror(const char *s); +void *xmalloc(size_t sz); +u_int32_t *xlalloc(size_t sz); +u_int32_t *xrelalloc(u_int32_t *old, size_t sz); +void dump_tables(void); +int yyparse(void); +extern int yylex(void); +%} + +%union { + rune_t rune; + int i; + char *str; + + rune_list *list; +} + +%token <rune> RUNE +%token LBRK +%token RBRK +%token THRU +%token MAPLOWER +%token MAPUPPER +%token DIGITMAP +%token <i> LIST +%token <str> VARIABLE +%token CHARSET +%token ENCODING +%token INVALID +%token <str> STRING + +%type <list> list +%type <list> map + + +%% + +locale : /* empty */ + | table + { dump_tables(); } + ; + +table : entry + | table entry + ; + +entry : ENCODING STRING + { strncpy(new_locale.rl_encoding, $2, sizeof(new_locale.rl_encoding)); } + | VARIABLE + { new_locale.rl_variable_len = strlen($1) + 1; + new_locale.rl_variable = strdup($1); + } + | CHARSET RUNE + { charsetbits = $2; charsetmask = 0x0000007f; } + | CHARSET RUNE RUNE + { charsetbits = $2; charsetmask = $3; } + | CHARSET STRING + { int final = $2[strlen($2) - 1] & 0x7f; + charsetbits = final << 24; + if ($2[0] == '$') { + charsetmask = 0x00007f7f; + if (strchr(",-./", $2[1])) + charsetbits |= 0x80; + if (0xd0 <= final && final <= 0xdf) + charsetmask |= 0x007f0000; + } else { + charsetmask = 0x0000007f; + if (strchr(",-./", $2[0])) + charsetbits |= 0x80; + if (strlen($2) == 2 && $2[0] == '!') + charsetbits |= ((0x80 | $2[0]) << 16); + } + + /* + * special rules + */ + if (charsetbits == ('B' << 24) + && charsetmask == 0x0000007f) { + /*ASCII: 94B*/ + charsetbits = 0; + charsetmask = 0x0000007f; + } else if (charsetbits == (('A' << 24) | 0x80) + && charsetmask == 0x0000007f) { + /*Latin1: 96A*/ + charsetbits = 0x80; + charsetmask = 0x0000007f; + } + } + | INVALID RUNE + { new_locale.rl_invalid_rune = $2; } + | LIST list + { set_map(&types, $2, $1); } + | MAPLOWER map + { set_map(&maplower, $2, 0); } + | MAPUPPER map + { set_map(&mapupper, $2, 0); } + | DIGITMAP map + { set_digitmap(&types, $2); } + ; + +list : RUNE + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($1 & charsetmask) | charsetbits; + $$->max = ($1 & charsetmask) | charsetbits; + $$->next = 0; + } + | RUNE THRU RUNE + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($1 & charsetmask) | charsetbits; + $$->max = ($3 & charsetmask) | charsetbits; + $$->next = 0; + } + | list RUNE + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($2 & charsetmask) | charsetbits; + $$->max = ($2 & charsetmask) | charsetbits; + $$->next = $1; + } + | list RUNE THRU RUNE + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($2 & charsetmask) | charsetbits; + $$->max = ($4 & charsetmask) | charsetbits; + $$->next = $1; + } + ; + +map : LBRK RUNE RUNE RBRK + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($2 & charsetmask) | charsetbits; + $$->max = ($2 & charsetmask) | charsetbits; + $$->map = $3; + $$->next = 0; + } + | map LBRK RUNE RUNE RBRK + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($3 & charsetmask) | charsetbits; + $$->max = ($3 & charsetmask) | charsetbits; + $$->map = $4; + $$->next = $1; + } + | LBRK RUNE THRU RUNE ':' RUNE RBRK + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($2 & charsetmask) | charsetbits; + $$->max = ($4 & charsetmask) | charsetbits; + $$->map = $6; + $$->next = 0; + } + | map LBRK RUNE THRU RUNE ':' RUNE RBRK + { + $$ = (rune_list *)malloc(sizeof(rune_list)); + $$->min = ($3 & charsetmask) | charsetbits; + $$->max = ($5 & charsetmask) | charsetbits; + $$->map = $7; + $$->next = $1; + } + ; +%% + +int debug = 0; +FILE *ofile; + +int +main(int ac, char *av[]) +{ + int x; + + extern char *optarg; + extern int optind; + + while ((x = getopt(ac, av, "do:")) != EOF) { + switch(x) { + case 'd': + debug = 1; + break; + case 'o': + locale_file = optarg; + if ((ofile = fopen(locale_file, "w")) == 0) + err(1, "unable to open output file %s", locale_file); + break; + default: + usage: + fprintf(stderr, "usage: mklocale [-d] [-o output] [source]\n"); + exit(1); + } + } + + switch (ac - optind) { + case 0: + break; + case 1: + if (freopen(av[optind], "r", stdin) == 0) + err(1, "unable to open input file %s", av[optind]); + break; + default: + goto usage; + } + for (x = 0; x < _CACHED_RUNES; ++x) { + mapupper.map[x] = x; + maplower.map[x] = x; + } + new_locale.rl_invalid_rune = _DEFAULT_INVALID_RUNE; + memcpy(new_locale.rl_magic, _RUNE_MAGIC_1, sizeof(new_locale.rl_magic)); + + yyparse(); + + return 0; +} + +int +yyerror(const char *s) +{ + fprintf(stderr, "%s\n", s); + + return 0; +} + +void * +xmalloc(size_t sz) +{ + void *r = malloc(sz); + if (!r) { + perror("xmalloc"); + abort(); + } + return(r); +} + +u_int32_t * +xlalloc(size_t sz) +{ + u_int32_t *r = (u_int32_t *)malloc(sz * sizeof(u_int32_t)); + if (!r) { + perror("xlalloc"); + abort(); + } + return(r); +} + +u_int32_t * +xrelalloc(u_int32_t *old, size_t sz) +{ + u_int32_t *r = (u_int32_t *)realloc(old, sz * sizeof(u_int32_t)); + if (!r) { + perror("xrelalloc"); + abort(); + } + return(r); +} + +void +set_map(rune_map *map, rune_list *list, u_int32_t flag) +{ + list->map &= charsetmask; + list->map |= charsetbits; + while (list) { + rune_list *nlist = list->next; + add_map(map, list, flag); + list = nlist; + } +} + +void +set_digitmap(rune_map *map, rune_list *list) +{ + rune_t i; + + while (list) { + rune_list *nlist = list->next; + for (i = list->min; i <= list->max; ++i) { + if (list->map + (i - list->min)) { + rune_list *tmp = (rune_list *)xmalloc(sizeof(rune_list)); + tmp->min = i; + tmp->max = i; + add_map(map, tmp, list->map + (i - list->min)); + } + } + free(list); + list = nlist; + } +} + +void +add_map(rune_map *map, rune_list *list, u_int32_t flag) +{ + rune_t i; + rune_list *lr = 0; + rune_list *r; + rune_t run; + + while (list->min < _CACHED_RUNES && list->min <= list->max) { + if (flag) + map->map[list->min++] |= flag; + else + map->map[list->min++] = list->map++; + } + + if (list->min > list->max) { + free(list); + return; + } + + run = list->max - list->min + 1; + + if (!(r = map->root) || (list->max < r->min - 1) + || (!flag && list->max == r->min - 1)) { + if (flag) { + list->types = xlalloc(run); + for (i = 0; i < run; ++i) + list->types[i] = flag; + } + list->next = map->root; + map->root = list; + return; + } + + for (r = map->root; r && r->max + 1 < list->min; r = r->next) + lr = r; + + if (!r) { + /* + * We are off the end. + */ + if (flag) { + list->types = xlalloc(run); + for (i = 0; i < run; ++i) + list->types[i] = flag; + } + list->next = 0; + lr->next = list; + return; + } + + if (list->max < r->min - 1) { + /* + * We come before this range and we do not intersect it. + * We are not before the root node, it was checked before the loop + */ + if (flag) { + list->types = xlalloc(run); + for (i = 0; i < run; ++i) + list->types[i] = flag; + } + list->next = lr->next; + lr->next = list; + return; + } + + /* + * At this point we have found that we at least intersect with + * the range pointed to by `r', we might intersect with one or + * more ranges beyond `r' as well. + */ + + if (!flag && list->map - list->min != r->map - r->min) { + /* + * There are only two cases when we are doing case maps and + * our maps needn't have the same offset. When we are adjoining + * but not intersecting. + */ + if (list->max + 1 == r->min) { + lr->next = list; + list->next = r; + return; + } + if (list->min - 1 == r->max) { + list->next = r->next; + r->next = list; + return; + } + fprintf(stderr, "Error: conflicting map entries\n"); + exit(1); + } + + if (list->min >= r->min && list->max <= r->max) { + /* + * Subset case. + */ + + if (flag) { + for (i = list->min; i <= list->max; ++i) + r->types[i - r->min] |= flag; + } + free(list); + return; + } + if (list->min <= r->min && list->max >= r->max) { + /* + * Superset case. Make him big enough to hold us. + * We might need to merge with the guy after him. + */ + if (flag) { + list->types = xlalloc(list->max - list->min + 1); + + for (i = list->min; i <= list->max; ++i) + list->types[i - list->min] = flag; + + for (i = r->min; i <= r->max; ++i) + list->types[i - list->min] |= r->types[i - r->min]; + + free(r->types); + r->types = list->types; + } else { + r->map = list->map; + } + r->min = list->min; + r->max = list->max; + free(list); + } else if (list->min < r->min) { + /* + * Our tail intersects his head. + */ + if (flag) { + list->types = xlalloc(r->max - list->min + 1); + + for (i = r->min; i <= r->max; ++i) + list->types[i - list->min] = r->types[i - r->min]; + + for (i = list->min; i < r->min; ++i) + list->types[i - list->min] = flag; + + for (i = r->min; i <= list->max; ++i) + list->types[i - list->min] |= flag; + + free(r->types); + r->types = list->types; + } else { + r->map = list->map; + } + r->min = list->min; + free(list); + return; + } else { + /* + * Our head intersects his tail. + * We might need to merge with the guy after him. + */ + if (flag) { + r->types = xrelalloc(r->types, list->max - r->min + 1); + + for (i = list->min; i <= r->max; ++i) + r->types[i - r->min] |= flag; + + for (i = r->max+1; i <= list->max; ++i) + r->types[i - r->min] = flag; + } + r->max = list->max; + free(list); + } + + /* + * Okay, check to see if we grew into the next guy(s) + */ + while ((lr = r->next) && r->max >= lr->min) { + if (flag) { + if (r->max >= lr->max) { + /* + * Good, we consumed all of him. + */ + for (i = lr->min; i <= lr->max; ++i) + r->types[i - r->min] |= lr->types[i - lr->min]; + } else { + /* + * "append" him on to the end of us. + */ + r->types = xrelalloc(r->types, lr->max - r->min + 1); + + for (i = lr->min; i <= r->max; ++i) + r->types[i - r->min] |= lr->types[i - lr->min]; + + for (i = r->max+1; i <= lr->max; ++i) + r->types[i - r->min] = lr->types[i - lr->min]; + + r->max = lr->max; + } + } else { + if (lr->max > r->max) + r->max = lr->max; + } + + r->next = lr->next; + + if (flag) + free(lr->types); + free(lr); + } +} + +void +dump_tables() +{ + int x, n; + rune_list *list; + _FileRuneLocale file_new_locale; + FILE *fp = (ofile ? ofile : stdout); + + memset(&file_new_locale, 0, sizeof(file_new_locale)); + + /* + * See if we can compress some of the istype arrays + */ + for(list = types.root; list; list = list->next) { + list->map = list->types[0]; + for (x = 1; x < list->max - list->min + 1; ++x) { + if (list->types[x] != list->map) { + list->map = 0; + break; + } + } + } + + memcpy(&file_new_locale.frl_magic, new_locale.rl_magic, + sizeof(file_new_locale.frl_magic)); + memcpy(&file_new_locale.frl_encoding, new_locale.rl_encoding, + sizeof(file_new_locale.frl_encoding)); + + file_new_locale.frl_invalid_rune = htonl(new_locale.rl_invalid_rune); + + /* + * Fill in our tables. Do this in network order so that + * diverse machines have a chance of sharing data. + * (Machines like Crays cannot share with little machines due to + * word size. Sigh. We tried.) + */ + for (x = 0; x < _CACHED_RUNES; ++x) { + file_new_locale.frl_runetype[x] = htonl(types.map[x]); + file_new_locale.frl_maplower[x] = htonl(maplower.map[x]); + file_new_locale.frl_mapupper[x] = htonl(mapupper.map[x]); + } + + /* + * Count up how many ranges we will need for each of the extents. + */ + list = types.root; + + while (list) { + new_locale.rl_runetype_ext.rr_nranges++; + list = list->next; + } + file_new_locale.frl_runetype_ext.frr_nranges = + htonl(new_locale.rl_runetype_ext.rr_nranges); + + list = maplower.root; + + while (list) { + new_locale.rl_maplower_ext.rr_nranges++; + list = list->next; + } + file_new_locale.frl_maplower_ext.frr_nranges = + htonl(new_locale.rl_maplower_ext.rr_nranges); + + list = mapupper.root; + + while (list) { + new_locale.rl_mapupper_ext.rr_nranges++; + list = list->next; + } + file_new_locale.frl_mapupper_ext.frr_nranges = + htonl(new_locale.rl_mapupper_ext.rr_nranges); + + file_new_locale.frl_variable_len = htonl(new_locale.rl_variable_len); + + /* + * Okay, we are now ready to write the new locale file. + */ + + /* + * PART 1: The _RuneLocale structure + */ + if (fwrite((char *)&file_new_locale, sizeof(file_new_locale), 1, fp) != 1) + err(1, "writing _RuneLocale to %s", locale_file); + /* + * PART 2: The runetype_ext structures (not the actual tables) + */ + for (list = types.root, n = 0; list != NULL; list = list->next, n++) { + _FileRuneEntry re; + + memset(&re, 0, sizeof(re)); + re.fre_min = htonl(list->min); + re.fre_max = htonl(list->max); + re.fre_map = htonl(list->map); + + if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) + err(1, "writing runetype_ext #%d to %s", n, locale_file); + } + /* + * PART 3: The maplower_ext structures + */ + for (list = maplower.root, n = 0; list != NULL; list = list->next, n++) { + _FileRuneEntry re; + + memset(&re, 0, sizeof(re)); + re.fre_min = htonl(list->min); + re.fre_max = htonl(list->max); + re.fre_map = htonl(list->map); + + if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) + err(1, "writing maplower_ext #%d to %s", n, locale_file); + } + /* + * PART 4: The mapupper_ext structures + */ + for (list = mapupper.root, n = 0; list != NULL; list = list->next, n++) { + _FileRuneEntry re; + + memset(&re, 0, sizeof(re)); + re.fre_min = htonl(list->min); + re.fre_max = htonl(list->max); + re.fre_map = htonl(list->map); + + if (fwrite((char *)&re, sizeof(re), 1, fp) != 1) + err(1, "writing mapupper_ext #%d to %s", n, locale_file); + } + /* + * PART 5: The runetype_ext tables + */ + for (list = types.root, n = 0; list != NULL; list = list->next, n++) { + for (x = 0; x < list->max - list->min + 1; ++x) + list->types[x] = htonl(list->types[x]); + + if (!list->map) { + if (fwrite((char *)list->types, + (list->max - list->min + 1) * sizeof(u_int32_t), + 1, fp) != 1) + err(1, "writing runetype_ext table #%d to %s", n, locale_file); + } + } + /* + * PART 5: And finally the variable data + */ + if (new_locale.rl_variable_len != 0 && + fwrite((char *)new_locale.rl_variable, + new_locale.rl_variable_len, 1, fp) != 1) + err(1, "writing variable data to %s", locale_file); + fclose(fp); + + if (!debug) + return; + + if (new_locale.rl_encoding[0]) + fprintf(stderr, "ENCODING %s\n", new_locale.rl_encoding); + if (new_locale.rl_variable) + fprintf(stderr, "VARIABLE %s\n", + (char *)new_locale.rl_variable); + + fprintf(stderr, "\nMAPLOWER:\n\n"); + + for (x = 0; x < _CACHED_RUNES; ++x) { + if (isprint(maplower.map[x])) + fprintf(stderr, " '%c'", (int)maplower.map[x]); + else if (maplower.map[x]) + fprintf(stderr, "%04x", maplower.map[x]); + else + fprintf(stderr, "%4x", 0); + if ((x & 0xf) == 0xf) + fprintf(stderr, "\n"); + else + fprintf(stderr, " "); + } + fprintf(stderr, "\n"); + + for (list = maplower.root; list; list = list->next) + fprintf(stderr, "\t%04x - %04x : %04x\n", list->min, list->max, list->map); + + fprintf(stderr, "\nMAPUPPER:\n\n"); + + for (x = 0; x < _CACHED_RUNES; ++x) { + if (isprint(mapupper.map[x])) + fprintf(stderr, " '%c'", (int)mapupper.map[x]); + else if (mapupper.map[x]) + fprintf(stderr, "%04x", mapupper.map[x]); + else + fprintf(stderr, "%4x", 0); + if ((x & 0xf) == 0xf) + fprintf(stderr, "\n"); + else + fprintf(stderr, " "); + } + fprintf(stderr, "\n"); + + for (list = mapupper.root; list; list = list->next) + fprintf(stderr, "\t%04x - %04x : %04x\n", list->min, list->max, list->map); + + + fprintf(stderr, "\nTYPES:\n\n"); + + for (x = 0; x < _CACHED_RUNES; ++x) { + u_int32_t r = types.map[x]; + + if (r) { + if (isprint(x)) + fprintf(stderr, " '%c':%2d", x, (int)(r & 0xff)); + else + fprintf(stderr, "%04x:%2d", x, (int)(r & 0xff)); + + fprintf(stderr, " %4s", (r & _RUNETYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_Q) ? "phon" : ""); + fprintf(stderr, "\n"); + } + } + + for (list = types.root; list; list = list->next) { + if (list->map && list->min + 3 < list->max) { + u_int32_t r = list->map; + + fprintf(stderr, "%04x:%2d", list->min, r & 0xff); + + fprintf(stderr, " %4s", (r & _RUNETYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_Q) ? "phon" : ""); + fprintf(stderr, "\n...\n"); + + fprintf(stderr, "%04x:%2d", list->max, r & 0xff); + + fprintf(stderr, " %4s", (r & _RUNETYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_Q) ? "phon" : ""); + fprintf(stderr, " %1u", (unsigned)((r & _RUNETYPE_SWM)>>_RUNETYPE_SWS)); + fprintf(stderr, "\n"); + } else + for (x = list->min; x <= list->max; ++x) { + u_int32_t r = ntohl(list->types[x - list->min]); + + if (r) { + fprintf(stderr, "%04x:%2d", x, (int)(r & 0xff)); + + fprintf(stderr, " %4s", (r & _RUNETYPE_A) ? "alph" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_C) ? "ctrl" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_D) ? "dig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_G) ? "graf" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_L) ? "low" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_P) ? "punc" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_S) ? "spac" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_U) ? "upp" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_X) ? "xdig" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_B) ? "blnk" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_R) ? "prnt" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_I) ? "ideo" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_T) ? "spec" : ""); + fprintf(stderr, " %4s", (r & _RUNETYPE_Q) ? "phon" : ""); + fprintf(stderr, " %1u", (unsigned)((r & _RUNETYPE_SWM)>>_RUNETYPE_SWS)); + fprintf(stderr, "\n"); + } + } + } +} |