diff options
author | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2019-05-07 14:26:39 +0000 |
---|---|---|
committer | Ingo Schwarze <schwarze@cvs.openbsd.org> | 2019-05-07 14:26:39 +0000 |
commit | 64558816947626bf9651ddf70ab29f0876ad0231 (patch) | |
tree | ac76036fefaddd780fd0372cb9cfac72034e6a74 /usr.bin | |
parent | 52156f96f22bd308ecb1ff974e3b34b02f682223 (diff) |
Completely clean up UTF-8 handling in the file filename.c:
Employ the usual form of an mbtowc(3) loop, eliminating two calls
to the bad function step_char() and reducing the number of nested
loops by one. This also removes the last caller of the bad function
binary_char(), which is consequently deleted.
While here, count ASCII C0 non-whitespace control characters as
binary (except backspace and, with -R only, escape).
OK millert@
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/less/charset.c | 12 | ||||
-rw-r--r-- | usr.bin/less/filename.c | 24 | ||||
-rw-r--r-- | usr.bin/less/funcs.h | 1 |
3 files changed, 12 insertions, 25 deletions
diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c index 4c8db50d553..363d1a80cba 100644 --- a/usr.bin/less/charset.c +++ b/usr.bin/less/charset.c @@ -146,18 +146,6 @@ init_charset(void) } /* - * Is a given character a "binary" character? - */ -int -binary_char(LWCHAR c) -{ - if (utf_mode) - return (is_ubin_char(c)); - c &= 0377; - return (!isprint((unsigned char)c) && !iscntrl((unsigned char)c)); -} - -/* * Is a given character a "control" character? */ int diff --git a/usr.bin/less/filename.c b/usr.bin/less/filename.c index 2e480e6445d..68c37906498 100644 --- a/usr.bin/less/filename.c +++ b/usr.bin/less/filename.c @@ -334,25 +334,25 @@ fcomplete(char *s) int bin_file(int f) { - int n; - int bin_count = 0; char data[256]; - char *p; - char *pend; + ssize_t i, n; + wchar_t ch; + int bin_count, len; if (!seekable(f)) return (0); if (lseek(f, (off_t)0, SEEK_SET) == (off_t)-1) return (0); n = read(f, data, sizeof (data)); - pend = &data[n]; - for (p = data; p < pend; ) { - LWCHAR c = step_char(&p, +1, pend); - if (ctldisp == OPT_ONPLUS && c == ESC) { - do { - c = step_char(&p, +1, pend); - } while (p < pend && is_ansi_middle(c)); - } else if (binary_char(c)) + bin_count = 0; + for (i = 0; i < n; i += len) { + len = mbtowc(&ch, data + i, n - i); + if (len <= 0) { + bin_count++; + len = 1; + } else if (iswprint(ch) == 0 && iswspace(ch) == 0 && + data[i] != '\b' && + (ctldisp != OPT_ONPLUS || data[i] != ESC)) bin_count++; } /* diff --git a/usr.bin/less/funcs.h b/usr.bin/less/funcs.h index abf596f2a45..5c24ba89e96 100644 --- a/usr.bin/less/funcs.h +++ b/usr.bin/less/funcs.h @@ -56,7 +56,6 @@ void ch_init(int, int); void ch_close(void); int ch_getflags(void); void init_charset(void); -int binary_char(LWCHAR); int control_char(LWCHAR); char *prchar(LWCHAR); char *prutfchar(LWCHAR); |