diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2019-05-09 10:36:00 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2019-05-09 10:36:00 +0000 |
commit | ec39ce910cbd28208609bd58c475f1d25dd47cce (patch) | |
tree | 552e2c51eb9a4b1eaf1394ea3671a228d3f22bef | |
parent | c063612ebf75706f33c52380b19b75dc8890d3d2 (diff) |
UTF-8 cleanup in the function pshift().
Use the standard functions mbtowc(3), wcwidth(3), iscntrl(3) instead
of bad functions like get_wchar(), utf_len(), is_wide_char(),
is_composing_char(), is_combining_char(), control_char().
If only half of a double-width character is shifted off screen, do not
inspect anything following it because that clearly remains on-screen.
Improve and add comments.
OK millert@
-rw-r--r-- | usr.bin/less/line.c | 76 |
1 files changed, 37 insertions, 39 deletions
diff --git a/usr.bin/less/line.c b/usr.bin/less/line.c index 8b811d700e5..217da04d4ef 100644 --- a/usr.bin/less/line.c +++ b/usr.bin/less/line.c @@ -32,11 +32,11 @@ int ntabstops = 1; /* Number of tabstops */ int tabdefault = 8; /* Default repeated tabstops */ off_t highest_hilite; /* Pos of last hilite in file found so far */ -static int curr; /* Index into linebuf */ -static int column; /* Printable length, accounting for backspaces, etc. */ +static int curr; /* Total number of bytes in linebuf */ +static int column; /* Display columns needed to show linebuf */ static int overstrike; /* Next char should overstrike previous char */ static int is_null_line; /* There is no current line */ -static int lmargin; /* Left margin */ +static int lmargin; /* Index in linebuf of start of content */ static char pendc; static off_t pendpos; static char *end_ansi_chars; @@ -202,20 +202,21 @@ plinenum(off_t pos) /* * Shift the input line left. - * This means discarding N printable chars at the start of the buffer. + * Starting at lmargin, some bytes are discarded from the linebuf, + * until the number of display columns needed to show these bytes + * would exceed the argument. */ static void pshift(int shift) { - LWCHAR prev_ch = 0; - unsigned char c; - int shifted = 0; - int to; - int from; - int len; - int width; - int prev_attr; - int next_attr; + int shifted = 0; /* Number of display columns already discarded. */ + int from; /* Index in linebuf of the current character. */ + int to; /* Index in linebuf to move this character to. */ + int len; /* Number of bytes in this character. */ + int width = 0; /* Display columns needed for this character. */ + int prev_attr; /* Attributes of the preceding character. */ + int next_attr; /* Attributes of the following character. */ + unsigned char c; /* First byte of current character. */ if (shift > column - lmargin) shift = column - lmargin; @@ -241,44 +242,41 @@ pshift(int shift) } continue; } - - width = 0; - if (!IS_ASCII_OCTET(c) && utf_mode) { - /* Assumes well-formedness validation already done. */ - LWCHAR ch; - - len = utf_len(c); - if (from + len > curr) - break; - ch = get_wchar(linebuf + from); - if (!is_composing_char(ch) && - !is_combining_char(prev_ch, ch)) - width = is_wide_char(ch) ? 2 : 1; - prev_ch = ch; + wchar_t ch; + /* + * Before this point, UTF-8 validity was already + * checked, but for additional safety, treat + * invalid bytes as single-width characters + * if they ever make it here. Similarly, treat + * non-printable characters as width 1. + */ + len = mbtowc(&ch, linebuf + from, curr - from); + if (len == -1) + len = width = 1; + else if ((width = wcwidth(ch)) == -1) + width = 1; } else { len = 1; if (c == '\b') /* XXX - Incorrect if several '\b' in a row. */ - width = (utf_mode && is_wide_char(prev_ch)) ? - -2 : -1; - else if (!control_char(c)) - width = 1; - prev_ch = 0; + width = width > 0 ? -width : -1; + else + width = iscntrl(c) ? 0 : 1; } if (width == 2 && shift - shifted == 1) { - /* Should never happen when called by pshift_all(). */ - attr[to] = attr[from]; /* - * Assume a wide_char will never be the first half of a - * combining_char pair, so reset prev_ch in case we're - * followed by a '\b'. + * Move the first half of a double-width character + * off screen. Print a space instead of the second + * half. This should never happen when called + * by pshift_all(). */ - prev_ch = linebuf[to++] = ' '; + attr[to] = attr[from]; + linebuf[to++] = ' '; from += len; shifted++; - continue; + break; } /* Adjust width for magic cookies. */ |