summaryrefslogtreecommitdiff
path: root/gnu/egcs/gcc/mbchar.c
diff options
context:
space:
mode:
authorMarc Espie <espie@cvs.openbsd.org>1999-05-26 13:38:57 +0000
committerMarc Espie <espie@cvs.openbsd.org>1999-05-26 13:38:57 +0000
commit0126e157b87f137fc08dc7f46f6c291b9d06ac5d (patch)
treef8555e3e504eb82b4cd3cba5cec20ae4ce8124ff /gnu/egcs/gcc/mbchar.c
parentff8e9a4356e55ed142306c3a375fa280800abc86 (diff)
egcs projects compiler system
Exact copy of the snapshot, except for the removal of texinfo/ gcc/ch/ libchill/
Diffstat (limited to 'gnu/egcs/gcc/mbchar.c')
-rw-r--r--gnu/egcs/gcc/mbchar.c290
1 files changed, 290 insertions, 0 deletions
diff --git a/gnu/egcs/gcc/mbchar.c b/gnu/egcs/gcc/mbchar.c
new file mode 100644
index 00000000000..a22e52b56b3
--- /dev/null
+++ b/gnu/egcs/gcc/mbchar.c
@@ -0,0 +1,290 @@
+/* Multibyte Character Functions.
+ Copyright (C) 1998 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+/* These functions are used to manipulate multibyte characters. */
+
+/* Note regarding cross compilation:
+
+ In general translation of multibyte characters to wide characters can
+ only work in a native compiler since the translation function (mbtowc)
+ needs to know about both the source and target character encoding. However,
+ this particular implementation for JIS, SJIS and EUCJP source characters
+ will work for any compiler with a newlib target. Other targets may also
+ work provided that their wchar_t implementation is 2 bytes and the encoding
+ leaves the source character values unchanged (except for removing the
+ state shifting markers). */
+
+#ifdef MULTIBYTE_CHARS
+#include "config.h"
+#include "system.h"
+#include "mbchar.h"
+#include <locale.h>
+
+typedef enum
+{
+ ESCAPE, DOLLAR, BRACKET, AT, B, J, NUL, JIS_CHAR, OTHER, JIS_C_NUM
+} JIS_CHAR_TYPE;
+
+typedef enum
+{
+ ASCII, A_ESC, A_ESC_DL, JIS, JIS_1, JIS_2, J_ESC, J_ESC_BR,
+ J2_ESC, J2_ESC_BR, INV, JIS_S_NUM
+} JIS_STATE;
+
+typedef enum
+{
+ COPYA, COPYJ, COPYJ2, MAKE_A, MAKE_J, NOOP, EMPTY, ERROR
+} JIS_ACTION;
+
+/*****************************************************************************
+ * state/action tables for processing JIS encoding
+ * Where possible, switches to JIS are grouped with proceding JIS characters
+ * and switches to ASCII are grouped with preceding JIS characters.
+ * Thus, maximum returned length is:
+ * 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
+ *****************************************************************************/
+static JIS_STATE JIS_state_table[JIS_S_NUM][JIS_C_NUM] = {
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER*/
+/*ASCII*/ { A_ESC, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
+/*A_ESC*/ { ASCII, A_ESC_DL,ASCII, ASCII, ASCII, ASCII, ASCII,ASCII,ASCII},
+/*A_ESC_DL*/{ ASCII, ASCII, ASCII, JIS, JIS, ASCII, ASCII,ASCII,ASCII},
+/*JIS*/ { J_ESC, JIS_1, JIS_1, JIS_1, JIS_1, JIS_1, INV, JIS_1,INV },
+/*JIS_1*/ { INV, JIS_2, JIS_2, JIS_2, JIS_2, JIS_2, INV, JIS_2,INV },
+/*JIS_2*/ { J2_ESC,JIS, JIS, JIS, JIS, JIS, INV, JIS, JIS },
+/*J_ESC*/ { INV, INV, J_ESC_BR, INV, INV, INV, INV, INV, INV },
+/*J_ESC_BR*/{ INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
+/*J2_ESC*/ { INV, INV, J2_ESC_BR,INV, INV, INV, INV, INV, INV },
+/*J2_ESC_BR*/{INV, INV, INV, INV, ASCII, ASCII, INV, INV, INV },
+};
+
+static JIS_ACTION JIS_action_table[JIS_S_NUM][JIS_C_NUM] = {
+/* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
+/*ASCII */ {NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, EMPTY, COPYA, COPYA},
+/*A_ESC */ {COPYA, NOOP, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA, COPYA},
+/*A_ESC_DL */{COPYA, COPYA, COPYA, MAKE_J, MAKE_J, COPYA, COPYA, COPYA, COPYA},
+/*JIS */ {NOOP, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/*JIS_1 */ {ERROR, NOOP, NOOP, NOOP, NOOP, NOOP, ERROR, NOOP, ERROR },
+/*JIS_2 */ {NOOP, COPYJ2,COPYJ2,COPYJ2, COPYJ2, COPYJ2,ERROR, COPYJ2,COPYJ2},
+/*J_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
+/*J_ESC_BR */{ERROR, ERROR, ERROR, ERROR, NOOP, NOOP, ERROR, ERROR, ERROR },
+/*J2_ESC */ {ERROR, ERROR, NOOP, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR },
+/*J2_ESC_BR*/{ERROR, ERROR, ERROR, ERROR, COPYJ, COPYJ, ERROR, ERROR, ERROR },
+};
+
+
+char *literal_codeset = NULL;
+
+int
+local_mbtowc (pwc, s, n)
+ wchar_t *pwc;
+ const char *s;
+ size_t n;
+{
+ static JIS_STATE save_state = ASCII;
+ JIS_STATE curr_state = save_state;
+ unsigned char *t = (unsigned char *)s;
+
+ if (s != NULL && n == 0)
+ return -1;
+
+ if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
+ {
+ /* This must be the "C" locale or unknown locale -- fall thru */
+ }
+ else if (! strcmp (literal_codeset, "C-SJIS"))
+ {
+ int char1;
+ if (s == NULL)
+ return 0; /* not state-dependent */
+ char1 = *t;
+ if (ISSJIS1 (char1))
+ {
+ int char2 = t[1];
+ if (n <= 1)
+ return -1;
+ if (ISSJIS2 (char2))
+ {
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ return 2;
+ }
+ return -1;
+ }
+ if (pwc != NULL)
+ *pwc = (wchar_t)*t;
+ if (*t == '\0')
+ return 0;
+ return 1;
+ }
+ else if (! strcmp (literal_codeset, "C-EUCJP"))
+ {
+ int char1;
+ if (s == NULL)
+ return 0; /* not state-dependent */
+ char1 = *t;
+ if (ISEUCJP (char1))
+ {
+ int char2 = t[1];
+ if (n <= 1)
+ return -1;
+ if (ISEUCJP (char2))
+ {
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*t) << 8) + (wchar_t)(*(t+1));
+ return 2;
+ }
+ return -1;
+ }
+ if (pwc != NULL)
+ *pwc = (wchar_t)*t;
+ if (*t == '\0')
+ return 0;
+ return 1;
+ }
+ else if (! strcmp (literal_codeset, "C-JIS"))
+ {
+ JIS_ACTION action;
+ JIS_CHAR_TYPE ch;
+ unsigned char *ptr;
+ int i, curr_ch;
+
+ if (s == NULL)
+ {
+ save_state = ASCII;
+ return 1; /* state-dependent */
+ }
+
+ ptr = t;
+
+ for (i = 0; i < n; ++i)
+ {
+ curr_ch = t[i];
+ switch (curr_ch)
+ {
+ case JIS_ESC_CHAR:
+ ch = ESCAPE;
+ break;
+ case '$':
+ ch = DOLLAR;
+ break;
+ case '@':
+ ch = AT;
+ break;
+ case '(':
+ ch = BRACKET;
+ break;
+ case 'B':
+ ch = B;
+ break;
+ case 'J':
+ ch = J;
+ break;
+ case '\0':
+ ch = NUL;
+ break;
+ default:
+ if (ISJIS (curr_ch))
+ ch = JIS_CHAR;
+ else
+ ch = OTHER;
+ }
+
+ action = JIS_action_table[curr_state][ch];
+ curr_state = JIS_state_table[curr_state][ch];
+
+ switch (action)
+ {
+ case NOOP:
+ break;
+ case EMPTY:
+ if (pwc != NULL)
+ *pwc = (wchar_t)0;
+ save_state = curr_state;
+ return i;
+ case COPYA:
+ if (pwc != NULL)
+ *pwc = (wchar_t)*ptr;
+ save_state = curr_state;
+ return (i + 1);
+ case COPYJ:
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ save_state = curr_state;
+ return (i + 1);
+ case COPYJ2:
+ if (pwc != NULL)
+ *pwc = (((wchar_t)*ptr) << 8) + (wchar_t)(*(ptr+1));
+ save_state = curr_state;
+ return (ptr - t) + 2;
+ case MAKE_A:
+ case MAKE_J:
+ ptr = (char *)(t + i + 1);
+ break;
+ case ERROR:
+ default:
+ return -1;
+ }
+ }
+
+ return -1; /* n < bytes needed */
+ }
+
+#ifdef CROSS_COMPILE
+ if (s == NULL)
+ return 0; /* not state-dependent */
+ if (pwc != NULL)
+ *pwc = *s;
+ return 1;
+#else
+ /* This must be the "C" locale or unknown locale. */
+ return mbtowc (pwc, s, n);
+#endif
+}
+
+int
+local_mblen (s, n)
+ const char *s;
+ size_t n;
+{
+ return local_mbtowc (NULL, s, n);
+}
+
+int
+local_mb_cur_max ()
+{
+ if (literal_codeset == NULL || strlen (literal_codeset) <= 1)
+ ;
+ else if (! strcmp (literal_codeset, "C-SJIS"))
+ return 2;
+ else if (! strcmp (literal_codeset, "C-EUCJP"))
+ return 2;
+ else if (! strcmp (literal_codeset, "C-JIS"))
+ return 8; /* 3 + 2 + 3 */
+
+#ifdef CROSS_COMPILE
+ return 1;
+#else
+ if (MB_CUR_MAX > 0)
+ return MB_CUR_MAX;
+
+ return 1; /* default */
+#endif
+}
+#endif /* MULTIBYTE_CHARS */