summaryrefslogtreecommitdiff
path: root/usr.bin/tmux/utf8.c
diff options
context:
space:
mode:
authorNicholas Marriott <nicm@cvs.openbsd.org>2016-03-02 15:36:04 +0000
committerNicholas Marriott <nicm@cvs.openbsd.org>2016-03-02 15:36:04 +0000
commit58e1679b6ddb893e7e057b491aac76757819f994 (patch)
tree9160603edcb49d2b7c1c991f82e444b1c12d42ab /usr.bin/tmux/utf8.c
parent0cd5b4650ef6c671175dd8c336f0b25caaf290da (diff)
Handle wcwidth() and mbtowc() failures in better style and drop
characters where we can't find the width (wcwidth() fails) on input, the same as we drop invalid UTF-8. Suggested by schwarze@.
Diffstat (limited to 'usr.bin/tmux/utf8.c')
-rw-r--r--usr.bin/tmux/utf8.c45
1 files changed, 30 insertions, 15 deletions
diff --git a/usr.bin/tmux/utf8.c b/usr.bin/tmux/utf8.c
index e9571e7a22f..9c9aa223cb2 100644
--- a/usr.bin/tmux/utf8.c
+++ b/usr.bin/tmux/utf8.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: utf8.c,v 1.28 2016/03/01 12:02:08 nicm Exp $ */
+/* $OpenBSD: utf8.c,v 1.29 2016/03/02 15:36:03 nicm Exp $ */
/*
* Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
@@ -25,6 +25,8 @@
#include "tmux.h"
+static int utf8_width(wchar_t);
+
/* Set a single character. */
void
utf8_set(struct utf8_data *ud, u_char ch)
@@ -80,6 +82,9 @@ utf8_open(struct utf8_data *ud, u_char ch)
enum utf8_state
utf8_append(struct utf8_data *ud, u_char ch)
{
+ wchar_t wc;
+ int width;
+
if (ud->have >= ud->size)
fatalx("UTF-8 character overflow");
if (ud->size > sizeof ud->data)
@@ -94,39 +99,49 @@ utf8_append(struct utf8_data *ud, u_char ch)
if (ud->width == 0xff)
return (UTF8_ERROR);
- ud->width = utf8_width(utf8_combine(ud));
+
+ if (utf8_combine(ud, &wc) != UTF8_DONE)
+ return (UTF8_ERROR);
+ if ((width = utf8_width(wc)) < 0)
+ return (UTF8_ERROR);
+ ud->width = width;
+
return (UTF8_DONE);
}
/* Get width of Unicode character. */
-u_int
+static int
utf8_width(wchar_t wc)
{
- int width;
+ int width;
width = wcwidth(wc);
- if (width < 0)
- return (0);
+ if (width < 0 || width > 0xff)
+ return (-1);
return (width);
}
/* Combine UTF-8 into Unicode. */
-wchar_t
-utf8_combine(const struct utf8_data *ud)
+enum utf8_state
+utf8_combine(const struct utf8_data *ud, wchar_t *wc)
{
- wchar_t wc;
-
- if (mbtowc(&wc, ud->data, ud->size) <= 0)
- return (0xfffd);
- return (wc);
+ switch (mbtowc(wc, ud->data, ud->size)) {
+ case -1:
+ mbtowc(NULL, NULL, MB_CUR_MAX);
+ return (UTF8_ERROR);
+ case 0:
+ return (UTF8_ERROR);
+ default:
+ return (UTF8_DONE);
+ }
}
/* Split Unicode into UTF-8. */
enum utf8_state
utf8_split(wchar_t wc, struct utf8_data *ud)
{
- char s[MB_CUR_MAX];
- int slen;
+ char s[MB_LEN_MAX];
+ int slen;
slen = wctomb(s, wc);
if (slen <= 0 || slen > (int)sizeof ud->data)