summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2023-03-06 17:14:45 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2023-03-06 17:14:45 +0000
commitf9c9584b084610d30872d9cd3ad09cff1c101dca (patch)
treeb069c4e4e4719a39a38d46389f15a8278185a04d
parent23fe24ccf2f47a0fa76d39b963db348c6460e286 (diff)
Replace old'n'wrong UTF-8 logic with a better one borrowed from Citrus;
issue reported by Crystal Kolipe on tech@
-rw-r--r--sys/dev/wscons/wsemul_subr.c173
-rw-r--r--sys/dev/wscons/wsemul_sun.c6
-rw-r--r--sys/dev/wscons/wsemul_vt100.c8
-rw-r--r--sys/dev/wscons/wsemul_vt100var.h4
-rw-r--r--sys/dev/wscons/wsemulvar.h5
5 files changed, 116 insertions, 80 deletions
diff --git a/sys/dev/wscons/wsemul_subr.c b/sys/dev/wscons/wsemul_subr.c
index 50d7b7a2436..95266a69a2e 100644
--- a/sys/dev/wscons/wsemul_subr.c
+++ b/sys/dev/wscons/wsemul_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: wsemul_subr.c,v 1.1 2013/10/18 22:06:41 miod Exp $ */
+/* $OpenBSD: wsemul_subr.c,v 1.2 2023/03/06 17:14:44 miod Exp $ */
/*
* Copyright (c) 2007, 2013 Miodrag Vallat.
@@ -17,6 +17,36 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
+/*
+ * Part of the UTF-8 state machine logic borrowed from citrus_utf8.c
+ * under the following licence:
+ */
+/*-
+ * Copyright (c) 2002-2004 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
@@ -38,29 +68,25 @@ int
wsemul_getchar(const u_char **inbuf, u_int *inlen,
struct wsemul_inputstate *state, int allow_utf8)
{
-#ifndef HAVE_UTF8_SUPPORT
u_int len = *inlen;
const u_char *buf = *inbuf;
+#ifdef HAVE_UTF8_SUPPORT
+ int rc;
+ u_int32_t tmpchar, lbound;
+ u_int mbleft;
+#endif
if (len == 0)
- return (EAGAIN);
+ return EAGAIN;
+#ifndef HAVE_UTF8_SUPPORT
state->inchar = *buf++;
state->mbleft = 0;
len--;
*inlen = len;
*inbuf = buf;
- return (0);
+ return 0;
#else
- u_int len = *inlen;
- const u_char *buf = *inbuf;
- int rc = EAGAIN;
- u_int32_t tmpchar;
- u_int mbleft;
-
- if (len == 0)
- return (rc);
-
/*
* If we do not allow multibyte sequences, process as quickly
* as possible.
@@ -71,10 +97,12 @@ wsemul_getchar(const u_char **inbuf, u_int *inlen,
len--;
*inlen = len;
*inbuf = buf;
- return (0);
+ return 0;
}
+ rc = EAGAIN;
tmpchar = state->inchar;
+ lbound = state->lbound;
mbleft = state->mbleft;
while (len != 0) {
@@ -87,19 +115,22 @@ wsemul_getchar(const u_char **inbuf, u_int *inlen,
*/
if (mbleft != 0) {
- if ((frag & 0xc0) != 0x80) {
- /* Abort the sequence and continue */
- mbleft = 0;
- tmpchar = 0;
- rc = EILSEQ;
- } else {
- tmpchar = (tmpchar << 6) | (frag & 0x3f);
- mbleft--;
- if (mbleft == 0) {
- rc = 0;
- break;
- }
+ if ((frag & 0xc0) != 0x80)
+ goto invalid;
+
+ tmpchar = (tmpchar << 6) | (frag & 0x3f);
+ mbleft--;
+ if (mbleft == 0) {
+ if (tmpchar < lbound)
+ goto invalid;
+ if (tmpchar >= 0xd800 && tmpchar < 0xe000)
+ goto invalid;
+ if (tmpchar >= 0x110000)
+ goto invalid;
+ rc = 0;
+ break;
}
+ continue;
}
/*
@@ -113,41 +144,38 @@ wsemul_getchar(const u_char **inbuf, u_int *inlen,
break;
}
- if (frag == 0xfe || frag == 0xff || (frag & 0x40) == 0) {
- /* Abort the sequence and continue */
- mbleft = 0;
- tmpchar = 0;
- rc = EILSEQ;
- } else {
- frag &= ~(0x80 | 0x40);
+ if ((frag & 0xe0) == 0xc0) {
+ frag &= 0x1f;
mbleft = 1;
+ lbound = 0x80;
+ } else if ((frag & 0xf0) == 0xe0) {
+ frag &= 0x0f;
+ mbleft = 2;
+ lbound = 0x800;
+ } else if ((frag & 0xf8) == 0xf0) {
+ frag &= 0x07;
+ mbleft = 3;
+ lbound = 0x10000;
+ } else {
+ goto invalid;
+ }
- if (frag & 0x20) {
- frag &= ~0x20;
- mbleft++;
- }
- if (frag & 0x10) {
- frag &= ~0x10;
- mbleft++;
- }
- if (frag & 0x08) {
- frag &= ~0x08;
- mbleft++;
- }
- if (frag & 0x04) {
- frag &= ~0x04;
- mbleft++;
- }
+ tmpchar = frag;
+ state->lbound = lbound;
+ continue;
- tmpchar = frag;
- }
+invalid:
+ /* Abort the ill-formed sequence and continue */
+ mbleft = 0;
+ tmpchar = 0;
+ rc = EILSEQ;
}
state->inchar = tmpchar;
state->mbleft = mbleft;
*inlen = len;
*inbuf = buf;
- return (rc);
+ return rc;
#endif
}
@@ -659,7 +687,7 @@ wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out)
/*
* Keysym to UTF-8 sequence translation function.
- * The out buffer is at least 6 characters long.
+ * The out buffer is at least 4 characters long.
*/
int
wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
@@ -671,28 +699,27 @@ wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
u_int pos, length, headpat;
if (!allow_utf8)
- return (wsemul_local_translate(unisym, layout, out));
-
- if (unisym >= 0x80000000) {
- return (0);
- } else if (unisym > 0x04000000) {
- headpat = 0xfc;
- length = 6;
- } else if (unisym > 0x00200000) {
- headpat = 0xf8;
- length = 5;
- } else if (unisym > 0x00010000) {
- headpat = 0xf0;
- length = 4;
- } else if (unisym > 0x00000800) {
- headpat = 0xe0;
- length = 3;
- } else if (unisym > 0x00000080) {
+ return wsemul_local_translate(unisym, layout, out);
+
+ if (unisym < 0x80) {
+ /* Fast path for plain ASCII characters. */
+ *out = (u_char)unisym;
+ return 1;
+ }
+
+ if (unisym < 0x800) {
headpat = 0xc0;
length = 2;
+ } else if (unisym < 0x10000) {
+ if (unisym >= 0xd800 && unisym < 0xe000)
+ return 0;
+ headpat = 0xe0;
+ length = 3;
} else {
- headpat = 0x00;
- length = 1;
+ if (unisym >= 0x110000)
+ return 0;
+ headpat = 0xf0;
+ length = 4;
}
for (pos = length - 1; pos > 0; pos--) {
@@ -701,6 +728,6 @@ wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
}
out[0] = headpat | unisym;
- return (length);
+ return length;
#endif
}
diff --git a/sys/dev/wscons/wsemul_sun.c b/sys/dev/wscons/wsemul_sun.c
index f75986b8dfb..c866b11eed7 100644
--- a/sys/dev/wscons/wsemul_sun.c
+++ b/sys/dev/wscons/wsemul_sun.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: wsemul_sun.c,v 1.34 2020/05/25 09:55:49 jsg Exp $ */
+/* $OpenBSD: wsemul_sun.c,v 1.35 2023/03/06 17:14:44 miod Exp $ */
/* $NetBSD: wsemul_sun.c,v 1.11 2000/01/05 11:19:36 drochner Exp $ */
/*
@@ -118,7 +118,7 @@ struct wsemul_sun_emuldata {
struct wsemul_inputstate kstate; /* kernel input state */
#ifdef HAVE_UTF8_SUPPORT
- u_char translatebuf[6];
+ u_char translatebuf[4];
#else
u_char translatebuf[1];
#endif
@@ -185,8 +185,10 @@ wsemul_sun_reset(struct wsemul_sun_emuldata *edp)
edp->bgcol = WSCOL_WHITE;
edp->scrolldist = 1;
edp->instate.inchar = 0;
+ edp->instate.lbound = 0;
edp->instate.mbleft = 0;
edp->kstate.inchar = 0;
+ edp->kstate.lbound = 0;
edp->kstate.mbleft = 0;
}
diff --git a/sys/dev/wscons/wsemul_vt100.c b/sys/dev/wscons/wsemul_vt100.c
index 7796bab1700..b58f2237dd8 100644
--- a/sys/dev/wscons/wsemul_vt100.c
+++ b/sys/dev/wscons/wsemul_vt100.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: wsemul_vt100.c,v 1.43 2023/02/26 15:09:53 miod Exp $ */
+/* $OpenBSD: wsemul_vt100.c,v 1.44 2023/03/06 17:14:44 miod Exp $ */
/* $NetBSD: wsemul_vt100.c,v 1.13 2000/04/28 21:56:16 mycroft Exp $ */
/*
@@ -309,6 +309,12 @@ wsemul_vt100_reset(struct wsemul_vt100_emuldata *edp)
edp->chartab0 = 0;
edp->chartab1 = 2;
edp->sschartab = 0;
+ edp->instate.inchar = 0;
+ edp->instate.lbound = 0;
+ edp->instate.mbleft = 0;
+ edp->kstate.inchar = 0;
+ edp->kstate.lbound = 0;
+ edp->kstate.mbleft = 0;
}
/*
diff --git a/sys/dev/wscons/wsemul_vt100var.h b/sys/dev/wscons/wsemul_vt100var.h
index 6d52490216c..4b7c7a6c716 100644
--- a/sys/dev/wscons/wsemul_vt100var.h
+++ b/sys/dev/wscons/wsemul_vt100var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: wsemul_vt100var.h,v 1.12 2023/01/12 20:39:37 nicm Exp $ */
+/* $OpenBSD: wsemul_vt100var.h,v 1.13 2023/03/06 17:14:44 miod Exp $ */
/* $NetBSD: wsemul_vt100var.h,v 1.5 2000/04/28 21:56:17 mycroft Exp $ */
/*
@@ -95,7 +95,7 @@ struct wsemul_vt100_emuldata {
struct wsemul_inputstate kstate; /* kernel input state */
#ifdef HAVE_UTF8_SUPPORT
- u_char translatebuf[6];
+ u_char translatebuf[4];
#else
u_char translatebuf[1];
#endif
diff --git a/sys/dev/wscons/wsemulvar.h b/sys/dev/wscons/wsemulvar.h
index 71dd3071c83..a0a88e727be 100644
--- a/sys/dev/wscons/wsemulvar.h
+++ b/sys/dev/wscons/wsemulvar.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: wsemulvar.h,v 1.18 2020/09/13 10:05:46 fcambus Exp $ */
+/* $OpenBSD: wsemulvar.h,v 1.19 2023/03/06 17:14:44 miod Exp $ */
/* $NetBSD: wsemulvar.h,v 1.6 1999/01/17 15:46:15 drochner Exp $ */
/*
@@ -78,7 +78,8 @@ struct wsemul_ops {
* decoding.
*/
struct wsemul_inputstate {
- u_int32_t inchar; /* character being reconstructed */
+ uint32_t inchar; /* character being reconstructed */
+ uint32_t lbound; /* lower bound of above */
u_int mbleft; /* multibyte bytes left until char complete */
};