summaryrefslogtreecommitdiff
path: root/usr.bin
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2019-08-31 13:44:30 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2019-08-31 13:44:30 +0000
commitc11d303f41f15a1a25dbf1ffcbebf91b86ef9556 (patch)
treed76161ac9a43e36024d369d480feff877b1e7cda /usr.bin
parent98d35f056057ba971085a40278eb7528f52627b0 (diff)
New utility function mbtowc_left() because this functionality -
decoding a UTF-8 multibyte character to the left of a given byte - is already needed at three places in line.c and will also be needed for cleanup work in cmdbuf.c in the future. OK millert@
Diffstat (limited to 'usr.bin')
-rw-r--r--usr.bin/less/charset.c21
-rw-r--r--usr.bin/less/funcs.h1
-rw-r--r--usr.bin/less/line.c53
3 files changed, 30 insertions, 45 deletions
diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c
index 4be75d678a9..5739fe1b71a 100644
--- a/usr.bin/less/charset.c
+++ b/usr.bin/less/charset.c
@@ -146,6 +146,27 @@ init_charset(void)
}
/*
+ * Like mbtowc(3), except that it converts the multibyte character
+ * preceding ps rather than the one starting at ps.
+ */
+int
+mbtowc_left(wchar_t *pwc, const char *ps, size_t psz)
+{
+ size_t sz = 0;
+ int len;
+
+ do {
+ if (++sz > psz)
+ return -1;
+ } while (utf_mode && IS_UTF8_TRAIL(ps[-sz]));
+ if ((len = mbtowc(pwc, ps - sz, sz)) == -1) {
+ (void)mbtowc(NULL, NULL, 0);
+ return -1;
+ }
+ return len == sz || (len == 0 && sz == 1) ? len : -1;
+}
+
+/*
* Is a given character a "control" character?
*/
static int
diff --git a/usr.bin/less/funcs.h b/usr.bin/less/funcs.h
index 2bfab74475e..5ef595a3bbe 100644
--- a/usr.bin/less/funcs.h
+++ b/usr.bin/less/funcs.h
@@ -55,6 +55,7 @@ void ch_set_eof(void);
void ch_init(int, int);
void ch_close(void);
int ch_getflags(void);
+int mbtowc_left(wchar_t *, const char *, size_t);
void init_charset(void);
char *prchar(LWCHAR);
char *prutfchar(LWCHAR);
diff --git a/usr.bin/less/line.c b/usr.bin/less/line.c
index aa1a2b834fc..123353d2121 100644
--- a/usr.bin/less/line.c
+++ b/usr.bin/less/line.c
@@ -437,44 +437,20 @@ backc(void)
wchar_t ch, prev_ch;
int i, len, width;
- i = curr - 1;
- if (utf_mode) {
- while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
- i--;
- }
- if (i < lmargin)
+ if ((len = mbtowc_left(&ch, linebuf + curr, curr)) <= 0)
return (0);
- if (utf_mode) {
- len = mbtowc(&ch, linebuf + i, curr - i);
- if (len == -1 || i + len < curr) {
- (void)mbtowc(NULL, NULL, MB_CUR_MAX);
- return (0);
- }
- } else
- ch = linebuf[i];
+ curr -= len;
/* This assumes that there is no '\b' in linebuf. */
- while (curr > lmargin && column > lmargin &&
- (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {
- curr = i--;
- if (utf_mode) {
- while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))
- i--;
- }
- if (i < lmargin)
+ while (curr >= lmargin && column > lmargin &&
+ !(attr[curr] & (AT_ANSI|AT_BINARY))) {
+ if ((len = mbtowc_left(&prev_ch, linebuf + curr, curr)) <= 0)
prev_ch = L'\0';
- else if (utf_mode) {
- len = mbtowc(&prev_ch, linebuf + i, curr - i);
- if (len == -1 || i + len < curr) {
- (void)mbtowc(NULL, NULL, MB_CUR_MAX);
- prev_ch = L'\0';
- }
- } else
- prev_ch = linebuf[i];
width = pwidth(ch, attr[curr], prev_ch);
column -= width;
if (width > 0)
return (1);
+ curr -= len;
if (prev_ch == L'\0')
return (0);
ch = prev_ch;
@@ -554,21 +530,8 @@ store_char(LWCHAR ch, char a, char *rep, off_t pos)
}
if (w == -1) {
wchar_t prev_ch;
-
- if (utf_mode) {
- for (i = curr - 1; i >= 0; i--)
- if (!IS_UTF8_TRAIL(linebuf[i]))
- break;
- if (i >= 0) {
- w = mbtowc(&prev_ch, linebuf + i, curr - i);
- if (w == -1 || i + w < curr) {
- (void)mbtowc(NULL, NULL, MB_CUR_MAX);
- prev_ch = L' ';
- }
- } else
- prev_ch = L' ';
- } else
- prev_ch = curr > 0 ? linebuf[curr - 1] : L' ';
+ if (mbtowc_left(&prev_ch, linebuf + curr, curr) <= 0)
+ prev_ch = L' ';
w = pwidth(ch, a, prev_ch);
}