From b294a54bcbed199e16dad0c11ec5aee728bd1deb Mon Sep 17 00:00:00 2001 From: Ingo Schwarze Date: Thu, 7 Jan 2016 18:02:44 +0000 Subject: UTF-8 support for fmt -c. This implies two small changes in behaviour: 1. Let fmt -c replace invalid bytes with ASCII question marks just like when called without -c. 2. On lines to be centered, replace each tab with a single blank, simply because there is no useful way to define the meaning of a tab on such a line. Having the width of a tab depend on what is to the right of it would be completely crazy (and complicate the code a lot), and otherwise, tabs on adjacent lines of different length wouldn't align anyway. OK millert@ --- usr.bin/fmt/fmt.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'usr.bin') diff --git a/usr.bin/fmt/fmt.c b/usr.bin/fmt/fmt.c index 21b666bc4bf..a29accde429 100644 --- a/usr.bin/fmt/fmt.c +++ b/usr.bin/fmt/fmt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fmt.c,v 1.35 2015/12/31 16:10:31 millert Exp $ */ +/* $OpenBSD: fmt.c,v 1.36 2016/01/07 18:02:43 schwarze Exp $ */ /* Sensible version of fmt * @@ -619,13 +619,29 @@ output_word(size_t indent0, size_t indent1, const char *word, static void center_stream(FILE *stream, const char *name) { - char *line; - size_t l; + char *line, *cp; + wchar_t wc; + size_t l; /* Display width of the line. */ + int wcw; /* Display width of one character. */ + int wcl; /* Length in bytes of one character. */ while ((line = get_line(stream)) != NULL) { - while (isspace((unsigned char)*line)) - ++line; - l = strlen(line); + l = 0; + for (cp = line; *cp != '\0'; cp += wcl) { + if (*cp == '\t') + *cp = ' '; + if ((wcl = mbtowc(&wc, cp, MB_CUR_MAX)) == -1) { + (void)mbtowc(NULL, NULL, MB_CUR_MAX); + *cp = '?'; + wcl = 1; + wcw = 1; + } else if ((wcw = wcwidth(wc)) == -1) + wcw = 1; + if (l == 0 && iswspace(wc)) + line += wcl; + else + l += wcw; + } while (l < goal_length) { putchar(' '); l += 2; -- cgit v1.2.3