src - OpenBSD base system

diff options


context:
space:
mode:

author	Ingo Schwarze <schwarze@cvs.openbsd.org>	2019-08-31 13:44:30 +0000
committer	Ingo Schwarze <schwarze@cvs.openbsd.org>	2019-08-31 13:44:30 +0000
commit	c11d303f41f15a1a25dbf1ffcbebf91b86ef9556 (patch)
tree	d76161ac9a43e36024d369d480feff877b1e7cda /usr.bin
parent	98d35f056057ba971085a40278eb7528f52627b0 (diff)

New utility function mbtowc_left() because this functionality -

decoding a UTF-8 multibyte character to the left of a given byte - is already needed at three places in line.c and will also be needed for cleanup work in cmdbuf.c in the future. OK millert@

Diffstat (limited to 'usr.bin')

-rw-r--r--

usr.bin/less/charset.c

-rw-r--r--

usr.bin/less/funcs.h

-rw-r--r--

usr.bin/less/line.c

3 files changed, 30 insertions, 45 deletions

diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c
index 4be75d678a9..5739fe1b71a 100644
--- a/usr.bin/less/charset.c
+++ b/usr.bin/less/charset.c

@@ -146,6 +146,27 @@ init_charset(void)

}

+ * Like mbtowc(3), except that it converts the multibyte character

+ * preceding ps rather than the one starting at ps.

+ */

+int

+mbtowc_left(wchar_t *pwc, const char *ps, size_t psz)

+ size_t sz = 0;

+ int len;

+ do {

+ if (++sz > psz)

+ return -1;

+ } while (utf_mode && IS_UTF8_TRAIL(ps[-sz]));

+ if ((len = mbtowc(pwc, ps - sz, sz)) == -1) {

+ (void)mbtowc(NULL, NULL, 0);

+ return -1;

+ }

+ return len == sz || (len == 0 && sz == 1) ? len : -1;

+/*

* Is a given character a "control" character?

static int

diff --git a/usr.bin/less/funcs.h b/usr.bin/less/funcs.h
index 2bfab74475e..5ef595a3bbe 100644
--- a/usr.bin/less/funcs.h
+++ b/usr.bin/less/funcs.h

@@ -55,6 +55,7 @@ void ch_set_eof(void);

void ch_init(int, int);

void ch_close(void);

int ch_getflags(void);

+int mbtowc_left(wchar_t *, const char *, size_t);

void init_charset(void);

char *prchar(LWCHAR);

char *prutfchar(LWCHAR);

diff --git a/usr.bin/less/line.c b/usr.bin/less/line.c
index aa1a2b834fc..123353d2121 100644
--- a/usr.bin/less/line.c
+++ b/usr.bin/less/line.c

@@ -437,44 +437,20 @@ backc(void)

wchar_t ch, prev_ch;

int i, len, width;

- i = curr - 1;

- if (utf_mode) {

- while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))

- i--;

- }

- if (i < lmargin)

+ if ((len = mbtowc_left(&ch, linebuf + curr, curr)) <= 0)

return (0);

- if (utf_mode) {

- len = mbtowc(&ch, linebuf + i, curr - i);

- if (len == -1 || i + len < curr) {

- (void)mbtowc(NULL, NULL, MB_CUR_MAX);

- return (0);

- }

- } else

- ch = linebuf[i];

+ curr -= len;

/* This assumes that there is no '\b' in linebuf. */

- while (curr > lmargin && column > lmargin &&

- (!(attr[curr - 1] & (AT_ANSI|AT_BINARY)))) {

- curr = i--;

- if (utf_mode) {

- while (i >= lmargin && IS_UTF8_TRAIL(linebuf[i]))

- i--;

- }

- if (i < lmargin)

+ while (curr >= lmargin && column > lmargin &&

+ !(attr[curr] & (AT_ANSI|AT_BINARY))) {

+ if ((len = mbtowc_left(&prev_ch, linebuf + curr, curr)) <= 0)

prev_ch = L'\0';

- else if (utf_mode) {

- len = mbtowc(&prev_ch, linebuf + i, curr - i);

- if (len == -1 || i + len < curr) {

- (void)mbtowc(NULL, NULL, MB_CUR_MAX);

- prev_ch = L'\0';

- }

- } else

- prev_ch = linebuf[i];

width = pwidth(ch, attr[curr], prev_ch);

column -= width;

if (width > 0)

return (1);

+ curr -= len;

if (prev_ch == L'\0')

return (0);

ch = prev_ch;

@@ -554,21 +530,8 @@ store_char(LWCHAR ch, char a, char *rep, off_t pos)

}

if (w == -1) {

wchar_t prev_ch;

- if (utf_mode) {

- for (i = curr - 1; i >= 0; i--)

- if (!IS_UTF8_TRAIL(linebuf[i]))

- break;

- if (i >= 0) {

- w = mbtowc(&prev_ch, linebuf + i, curr - i);

- if (w == -1 || i + w < curr) {

- (void)mbtowc(NULL, NULL, MB_CUR_MAX);

- prev_ch = L' ';

- }

- } else

- prev_ch = L' ';

- } else

- prev_ch = curr > 0 ? linebuf[curr - 1] : L' ';

+ if (mbtowc_left(&prev_ch, linebuf + curr, curr) <= 0)

+ prev_ch = L' ';

w = pwidth(ch, a, prev_ch);

}