summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2021-06-27 15:53:34 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2021-06-27 15:53:34 +0000
commit8f5451587b03af0de0dc56df9392ea61649d4abf (patch)
tree891190dd9ce0df6c0bae20fda1222fece23f3175 /bin
parentf51cddac0222b7a73a662718532a17752f1a3d5e (diff)
In addition to 2-byte and 3-byte UTF-8 sequences, correctly identify all
4-byte UTF-8 sequences and not just some of them, to keep them together and avoid passing them on byte by byte, helping tools like tmux(1). While here, also do all the range tests with < and > rather than & for uniformity and readability, and add some comments. Input and OK jca@ and nicm@. Soeren at Soeren dash Tempel dot net originally reported the bug and provided an incomplete patch that was used as a starting point, and he also tested this final patch.
Diffstat (limited to 'bin')
-rw-r--r--bin/ksh/emacs.c19
1 files changed, 13 insertions, 6 deletions
diff --git a/bin/ksh/emacs.c b/bin/ksh/emacs.c
index 694c402ff6b..1a5ff6e9927 100644
--- a/bin/ksh/emacs.c
+++ b/bin/ksh/emacs.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: emacs.c,v 1.87 2020/05/08 14:30:42 jca Exp $ */
+/* $OpenBSD: emacs.c,v 1.88 2021/06/27 15:53:33 schwarze Exp $ */
/*
* Emacs-like command line editing and history
@@ -1851,11 +1851,17 @@ x_e_getu8(char *buf, int off)
return -1;
buf[off++] = c;
- if (c == 0xf4)
+ /*
+ * In the following, comments refer to violations of
+ * the inequality tests at the ends of the lines.
+ * See the utf8(7) manual page for details.
+ */
+
+ if ((c & 0xf8) == 0xf0 && c < 0xf5) /* beyond Unicode */
len = 4;
else if ((c & 0xf0) == 0xe0)
len = 3;
- else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+ else if ((c & 0xe0) == 0xc0 && c > 0xc1) /* use single byte */
len = 2;
else
len = 1;
@@ -1865,9 +1871,10 @@ x_e_getu8(char *buf, int off)
if (cc == -1)
break;
if (isu8cont(cc) == 0 ||
- (c == 0xe0 && len == 3 && cc < 0xa0) ||
- (c == 0xed && len == 3 && cc & 0x20) ||
- (c == 0xf4 && len == 4 && cc & 0x30)) {
+ (c == 0xe0 && len == 3 && cc < 0xa0) || /* use 2 bytes */
+ (c == 0xed && len == 3 && cc > 0x9f) || /* surrogates */
+ (c == 0xf0 && len == 4 && cc < 0x90) || /* use 3 bytes */
+ (c == 0xf4 && len == 4 && cc > 0x8f)) { /* beyond Uni. */
x_e_ungetc(cc);
break;
}