summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2016-03-20 17:19:49 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2016-03-20 17:19:49 +0000
commit6ca06ac650adf739b39345e368bfcd7888376514 (patch)
treeb43bbfe48c2156b0d9705e0f52a605c074b80b41 /lib
parent862c21567d8b88224fd439b5e1e39be39841de5c (diff)
Fix the CHARSET_IS_UTF8 case in read_char().
For now, this mainly help programs explicitly using wide-character functions like el_wgetc(3) and el_wgets(3). 1. After reading an invalid byte sequence, do not throw away additional valid bytes; fix by me using mbrtowc(3), obsoleting utf8_islead(). 2. When read(2) returns EOF, return that information to the caller, do not prod on and potentially access garbage data in the buffer; from Linas Vepstas via NetBSD read.c rev. 1.70 2013/05/27. 3. After read__fixio() failure, restore errno to the one set by read(); from Steffen Nurpmeso via NetBSD read.c rev. 1.68 2012/09/10. 4. After read__fixio() success, restore errno to the initial state upon function entry; fix by me. OK czarkoff@. Also committed to NetBSD.
Diffstat (limited to 'lib')
-rw-r--r--lib/libedit/chartype.h8
-rw-r--r--lib/libedit/read.c59
2 files changed, 43 insertions, 24 deletions
diff --git a/lib/libedit/chartype.h b/lib/libedit/chartype.h
index 505fa7b436f..505805f511e 100644
--- a/lib/libedit/chartype.h
+++ b/lib/libedit/chartype.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: chartype.h,v 1.5 2014/10/17 06:07:50 deraadt Exp $ */
+/* $OpenBSD: chartype.h,v 1.6 2016/03/20 17:19:48 schwarze Exp $ */
/* $NetBSD: chartype.h,v 1.5 2010/04/15 00:55:57 christos Exp $ */
/*-
@@ -61,8 +61,7 @@
#warning Build environment does not support non-BMP characters
#endif
-#define ct_mbtowc mbtowc
-#define ct_mbtowc_reset mbtowc(0,0,0)
+#define ct_mbrtowc mbrtowc
#define ct_wctomb wctomb
#define ct_wctomb_reset wctomb(0,0)
#define ct_wcstombs wcstombs
@@ -110,8 +109,7 @@
#else /* NARROW */
-#define ct_mbtowc error
-#define ct_mbtowc_reset
+#define ct_mbrtowc error
#define ct_wctomb error
#define ct_wctomb_reset
#define ct_wcstombs(a, b, c) (strncpy(a, b, c), strlen(a))
diff --git a/lib/libedit/read.c b/lib/libedit/read.c
index c00d86af658..c59b76fad69 100644
--- a/lib/libedit/read.c
+++ b/lib/libedit/read.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: read.c,v 1.20 2016/01/31 20:42:33 schwarze Exp $ */
+/* $OpenBSD: read.c,v 1.21 2016/03/20 17:19:48 schwarze Exp $ */
/* $NetBSD: read.c,v 1.57 2010/07/21 18:18:52 christos Exp $ */
/*-
@@ -288,18 +288,6 @@ read_getcmd(EditLine *el, el_action_t *cmdnum, Char *ch)
return OKCMD;
}
-#ifdef WIDECHAR
-/* utf8_islead():
- * Test whether a byte is a leading byte of a UTF-8 sequence.
- */
-private int
-utf8_islead(unsigned char c)
-{
- return c < 0x80 || /* single byte char */
- (c >= 0xc2 && c <= 0xf4); /* start of multibyte sequence */
-}
-#endif
-
/* read_char():
* Read a character from the tty.
*/
@@ -311,10 +299,12 @@ read_char(EditLine *el, Char *cp)
char cbuf[MB_LEN_MAX];
int cbp = 0;
int bytes = 0;
+ int save_errno = errno;
again:
el->el_signal->sig_no = 0;
while ((num_read = read(el->el_infd, cbuf + cbp, 1)) == -1) {
+ int e = errno;
switch (el->el_signal->sig_no) {
case SIGCONT:
el_set(el, EL_REFRESH);
@@ -325,26 +315,57 @@ read_char(EditLine *el, Char *cp)
default:
break;
}
- if (!tried && read__fixio(el->el_infd, errno) == 0)
+ if (!tried && read__fixio(el->el_infd, e) == 0) {
+ errno = save_errno;
tried = 1;
- else {
+ } else {
+ errno = e;
*cp = '\0';
return -1;
}
}
+ /* Test for EOF */
+ if (num_read == 0) {
+ *cp = '\0';
+ return 0;
+ }
+
#ifdef WIDECHAR
if (el->el_flags & CHARSET_IS_UTF8) {
- if (!utf8_islead((unsigned char)cbuf[0]))
- goto again; /* discard the byte we read and try again */
+ mbstate_t mbs;
+ size_t rbytes;
+again_lastbyte:
++cbp;
- if ((bytes = ct_mbtowc(cp, cbuf, cbp)) == -1) {
- ct_mbtowc_reset;
+ /* This only works because UTF8 is stateless */
+ memset(&mbs, 0, sizeof(mbs));
+ switch (rbytes = ct_mbrtowc(cp, cbuf, cbp, &mbs)) {
+ case (size_t)-1:
+ if (cbp > 1) {
+ /*
+ * Invalid sequence, discard all bytes
+ * except the last one.
+ */
+ cbuf[0] = cbuf[cbp - 1];
+ cbp = 0;
+ goto again_lastbyte;
+ } else {
+ /* Invalid byte, discard it. */
+ cbp = 0;
+ goto again;
+ }
+ case (size_t)-2:
if (cbp >= MB_LEN_MAX) { /* "shouldn't happen" */
+ errno = EILSEQ;
*cp = '\0';
return -1;
}
+ /* Incomplete sequence, read another byte. */
goto again;
+ default:
+ /* Valid character, process it. */
+ bytes = (int)rbytes;
+ break;
}
} else /* we don't support other multibyte charsets */
#endif