From c11d303f41f15a1a25dbf1ffcbebf91b86ef9556 Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Sat, 31 Aug 2019 13:44:30 +0000 Subject: New utility function mbtowc_left() because this functionality - decoding a UTF-8 multibyte character to the left of a given byte - is already needed at three places in line.c and will also be needed for cleanup work in cmdbuf.c in the future. OK millert@ --- usr.bin/less/charset.c | 21 ++++++++++++++++++++ usr.bin/less/funcs.h | 1 + usr.bin/less/line.c | 53 ++++++++------------------------------------------ 3 files changed, 30 insertions(+), 45 deletions(-) diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c index 4be75d678a9..5739fe1b71a 100644 --- a/usr.bin/less/charset.c +++ b/usr.bin/less/charset.c @@ -145,6 +145,27 @@ init_charset(void) setbinfmt("LESSUTFBINFMT", &utfbinfmt, ""); } +/* + * Like mbtowc(3), except that it converts the multibyte character + * preceding ps rather than the one starting at ps. + */ +int +mbtowc_left(wchar_t *pwc, const char *ps, size_t psz) +{ + size_t sz = 0; + int len; + + do { + if (++sz > psz) + return -1; + } while (utf_mode && IS_UTF8_TRAIL(ps[-sz])); + if ((len = mbtowc(pwc, ps - sz, sz)) == -1) { + (void)mbtowc(NULL, NULL, 0); + return -1; + } + return len == sz || (len == 0 && sz == 1) ? len : -1; +} + /* * Is a given character a "control" character? */ diff --git a/usr.bin/less/funcs.h b/usr.bin/less/funcs.h index 2bfab74475e..5ef595a3bbe 100644 --- a/usr.bin/less/funcs.h +++ b/usr.bin/less/funcs.h @@ -55,6 +55,7 @@ void ch_set_eof(void); void ch_init(int, int); void ch_close(void); int ch_getflags(void); +int mbtowc_left(wchar_t *, const char *, size_t); void init_charset(void); char *prchar(LWCHAR); char *prutfchar(LWCHAR); diff --git a/usr.bin/less/line.c b/usr.bin/less/line.c index aa1a2b834fc..123353d2121 100644 --- a/usr.bin/less/line.c +++ b/usr.bin/less/line.c @@ -437,44 +437,20 @@ backc(void) wchar_t ch, prev_ch; int i, len, width; - i = curr - 1; - if (utf_mode) { - while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i])) - i--; - } - if (i < lmargin) + if ((len = mbtowc_left(&ch, linebuf + curr, curr)) <= 0) return (0); - if (utf_mode) { - len = mbtowc(&ch, linebuf + i, curr - i); - if (len == -1 || i + len < curr) { - (void)mbtowc(NULL, NULL, MB_CUR_MAX); - return (0); - } - } else - ch = linebuf[i]; + curr -= len; /* This assumes that there is no '\b' in linebuf. */ - while (curr > lmargin && column > lmargin && - (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) { - curr = i--; - if (utf_mode) { - while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i])) - i--; - } - if (i < lmargin) + while (curr >= lmargin && column > lmargin && + !(attr[curr] & (AT_ANSI|AT_BINARY))) { + if ((len = mbtowc_left(&prev_ch, linebuf + curr, curr)) <= 0) prev_ch = L'\0'; - else if (utf_mode) { - len = mbtowc(&prev_ch, linebuf + i, curr - i); - if (len == -1 || i + len < curr) { - (void)mbtowc(NULL, NULL, MB_CUR_MAX); - prev_ch = L'\0'; - } - } else - prev_ch = linebuf[i]; width = pwidth(ch, attr[curr], prev_ch); column -= width; if (width > 0) return (1); + curr -= len; if (prev_ch == L'\0') return (0); ch = prev_ch; @@ -554,21 +530,8 @@ store_char(LWCHAR ch, char a, char *rep, off_t pos) } if (w == -1) { wchar_t prev_ch; - - if (utf_mode) { - for (i = curr - 1; i >= 0; i--) - if (!IS_UTF8_TRAIL(linebuf[i])) - break; - if (i >= 0) { - w = mbtowc(&prev_ch, linebuf + i, curr - i); - if (w == -1 || i + w < curr) { - (void)mbtowc(NULL, NULL, MB_CUR_MAX); - prev_ch = L' '; - } - } else - prev_ch = L' '; - } else - prev_ch = curr > 0 ? linebuf[curr - 1] : L' '; + if (mbtowc_left(&prev_ch, linebuf + curr, curr) <= 0) + prev_ch = L' '; w = pwidth(ch, a, prev_ch); } -- cgit v1.2.3