diff options
-rw-r--r-- | usr.bin/ul/ul.1 | 142 | ||||
-rw-r--r-- | usr.bin/ul/ul.c | 394 |
2 files changed, 351 insertions, 185 deletions
diff --git a/usr.bin/ul/ul.1 b/usr.bin/ul/ul.1 index 6c41720ceea..a9cc036c38d 100644 --- a/usr.bin/ul/ul.1 +++ b/usr.bin/ul/ul.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: ul.1,v 1.15 2010/10/29 08:02:05 jmc Exp $ +.\" $OpenBSD: ul.1,v 1.16 2016/01/18 17:34:26 schwarze Exp $ .\" $NetBSD: ul.1,v 1.3 1994/12/07 00:28:23 jtc Exp $ .\" .\" Copyright (c) 1980, 1991, 1993 @@ -30,7 +30,7 @@ .\" .\" @(#)ul.1 8.1 (Berkeley) 6/6/93 .\" -.Dd $Mdocdate: October 29 2010 $ +.Dd $Mdocdate: January 18 2016 $ .Dt UL 1 .Os .Sh NAME @@ -44,38 +44,129 @@ .Sh DESCRIPTION .Nm reads the named files (or standard input if none are given) -and translates occurrences of underscores to the sequence -which indicates underlining for the terminal in use, as specified +and translates various kinds of in-band markup to forms +appropriate for the terminal in use, as specified by the environment variable -.Ev TERM . -The file -.Pa /etc/termcap -is read to determine the appropriate sequences for underlining. -If the terminal is incapable of underlining, but is capable of -a standout mode, then that is used instead. -If the terminal can overstrike, -or handles underlining automatically, +.Ev TERM +and the +.Xr terminfo 5 +database. +In particular, +.Xr man 1 +.Fl T Cm ascii , +.Fl T Cm utf8 , +and +.Fl T Cm locale +produce output that +.Nm +can handle as input. +.Pp +The following control characters are handled in the input stream: +.Bl -tag -width Ds +.It backspace (ASCII 0x08) +Reset the output display column to the beginning of the previous +character, to prepare for overstriking. +The display width of the previous character does not matter: +backing up over a double-width character does not require two +backspace characters. +However, if a double-width character is followed by two backspace +characters, the second one is discarded, for compatibility with +.Xr fold 1 . +.It tabulator (ASCII 0x09) +Advance the output display column to the next multiple of 8. +Tabs are always expanded into blanks. +.It newline (ASCII 0x0a) +End the current output line. +.It carriage return (ASCII 0x0d) +Reset the output display column to the beginning of the line, +to prepare for overstriking. +.It shift out (ASCII 0x0e) +Switch on reverse video mode. +.It shift in (ASCII 0x0f) +Switch off reverse video mode. +.It escape 7 (ASCII 0x1b 0x37) +Full reverse line feed. +.It escape 8 (ASCII 0x1b 0x38) +Half reverse line feed. +Sometimes used for superscripts. +.It escape 9 (ASCII 0x1b 0x39) +Half forward line feed. +Sometimes used for subscripts. +.El +.Pp +The following kinds of markup are handled: +.Bl -tag -width Ds +.It underline +Requested by putting an underscore into the same display cell as +another character, by using backspace or carriage return characters. +The usual sequence to request an underlined character is "character +backspace underscore", but "underscore backspace character" works, +too. +If the terminal cannot underline, standout mode is tried as a fallback. +.It boldface +Requested by putting two copies of the same character into the same +display cell, by using backspace or carriage return characters. +The usual sequence to request a boldface character is "character +backspace character". +If the terminal does not provide boldface display, reverse video and +standout mode are tried as fallbacks. +.It reverse video +Switched on and off by the shift out and shift in control characters, +respectively. +If the terminal does not provide reverse video, standout mode is +tried as a fallback. +.El +.Pp +If the input text contains markup the terminal cannot handle and +no working fallback is available, that markup is ignored. +Non-printable characters and invalid bytes are discarded. +Unknown escape sequences cause .Nm -degenerates to -.Xr cat 1 . -If the terminal cannot underline, underlining is ignored. +to abort with an error message and a non-zero exit code. .Pp The options are as follows: .Bl -tag -width Ds .It Fl i -Underlining is indicated by a separate line containing appropriate -dashes -.Pq Ql - . +Markup is not applied. +Instead, after each output line containing at least one marked-up +character, an additional line is printed, containing the following +ASCII codes below each character they apply to: +.Pp +.Bl -tag -width 1n -compact +.It _ +underline +.It ! +boldface +.It g +inverse video +.It ^ +one half line above the current line (superscript) +.It v +one half line below the current line (subscript) +.It X +more than one kind of markup +.El .It Fl t Ar terminal Overrides the terminal type specified in the environment with .Ar terminal . .El .Sh ENVIRONMENT -.Bl -tag -width TERM +.Bl -tag -width LC_CTYPE +.It Ev LC_CTYPE +The character set +.Xr locale 1 . +It decides which byte sequences form characters, which characters are +printable, and how many output display columns each character occupies. +If set to +.Qq C , +.Qq POSIX , +or an unsupported value, each ASCII character except the control +characters listed above is regarded as a character, and if it is +printable, of display width 1. .It Ev TERM Used to relate a tty device with its device capability description (see -.Xr termcap 5 ) . +.Xr terminfo 5 ) . .Ev TERM is set at login time, either by the default terminal type specified in @@ -85,10 +176,19 @@ or as set during the login process by the user in their file (see .Xr environ 7 ) . .El +.Sh EXIT STATUS +.Ex -std .Sh SEE ALSO -.Xr man 1 +.Xr man 1 , +.Xr terminfo 5 .Sh HISTORY The .Nm command appeared in .Bx 3.0 . +.Sh BUGS +Half reverse and half forward line feeds only work on few terminals, +and full reverse line feeds aren't very portable, either. +.Pp +If more than one kind of markup is applied to the same character, +all these markups are ignored and standout mode is used instead. diff --git a/usr.bin/ul/ul.c b/usr.bin/ul/ul.c index 6593fdbff0e..847d1013373 100644 --- a/usr.bin/ul/ul.c +++ b/usr.bin/ul/ul.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ul.c,v 1.19 2015/10/10 16:15:03 deraadt Exp $ */ +/* $OpenBSD: ul.c,v 1.20 2016/01/18 17:34:26 schwarze Exp $ */ /* $NetBSD: ul.c,v 1.3 1994/12/07 00:28:24 jtc Exp $ */ /* @@ -32,15 +32,18 @@ #include <curses.h> #include <err.h> +#include <errno.h> +#include <locale.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <term.h> #include <unistd.h> +#include <wchar.h> -#define IESC '\033' -#define SO '\016' -#define SI '\017' +#define IESC L'\033' +#define SO L'\016' +#define SI L'\017' #define HFWD '9' #define HREV '8' #define FREV '7' @@ -60,7 +63,9 @@ char *CURS_UP, *CURS_RIGHT, *CURS_LEFT, struct CHAR { char c_mode; - char c_char; + wchar_t c_char; + int c_width; + int c_pos; } ; struct CHAR obuf[MAXBUF]; @@ -78,7 +83,7 @@ void reverse(void); void fwd(void); void flushln(void); void msetmode(int); -void outc(int); +void outc(wchar_t, int); void overstrike(void); void iattr(void); @@ -98,6 +103,8 @@ main(int argc, char *argv[]) FILE *f; char termcap[1024]; + setlocale(LC_CTYPE, ""); + if (pledge("stdio rpath tty", NULL) == -1) err(1, "pledge"); @@ -154,100 +161,163 @@ main(int argc, char *argv[]) void mfilter(FILE *f) { - int c; + struct CHAR *cp; + wint_t c; + int skip_bs, w, wt; + + col = 1; + skip_bs = 0; + while (col < MAXBUF) { + switch (c = fgetwc(f)) { + case WEOF: + /* Discard invalid bytes. */ + if (ferror(f)) { + if (errno != EILSEQ) + err(1, NULL); + clearerr(f); + break; + } + + /* End of file. */ + if (maxcol) + flushln(); + return; - while ((c = getc(f)) != EOF && col < MAXBUF) switch(c) { - case '\b': - if (col > 0) - col--; - continue; - case '\t': - col = (col+8) & ~07; - if (col > maxcol) - maxcol = col; - continue; - case '\r': - col = 0; - continue; - case SO: - mode |= ALTSET; - continue; - case SI: - mode &= ~ALTSET; - continue; - case IESC: - switch (c = getc(f)) { - case HREV: - if (halfpos == 0) { - mode |= SUPERSC; - halfpos--; - } else if (halfpos > 0) { - mode &= ~SUBSC; - halfpos--; - } else { - halfpos = 0; + case L'\b': + /* + * Back up one character position, not one + * display column, but ignore a second + * backspace after a double-width character. + */ + if (skip_bs > 0) + skip_bs--; + else if (col > 1) + if (obuf[--col].c_width > 1) + skip_bs = obuf[col].c_width - 1; + continue; + + case L'\t': + /* Calculate the target position. */ + wt = (obuf[col - 1].c_pos + 8) & ~7; + + /* Advance past known positions. */ + while ((w = obuf[col].c_pos) > 0 && w <= wt) + col++; + + /* Advance beyond the end. */ + if (w == 0) { + w = obuf[col - 1].c_pos; + while (w < wt) { + obuf[col].c_width = 1; + obuf[col++].c_pos = ++w; + } + } + if (col > maxcol) + maxcol = col; + break; + + case L'\r': + col = 1; + break; + + case SO: + mode |= ALTSET; + break; + + case SI: + mode &= ~ALTSET; + break; + + case IESC: + switch (c = fgetwc(f)) { + case HREV: + if (halfpos == 0) { + mode |= SUPERSC; + halfpos--; + } else if (halfpos > 0) { + mode &= ~SUBSC; + halfpos--; + } else { + halfpos = 0; + reverse(); + } + break; + case HFWD: + if (halfpos == 0) { + mode |= SUBSC; + halfpos++; + } else if (halfpos < 0) { + mode &= ~SUPERSC; + halfpos++; + } else { + halfpos = 0; + fwd(); + } + break; + case FREV: reverse(); + break; + default: + errx(1, "0%o: unknown escape sequence", c); } - continue; - case HFWD: - if (halfpos == 0) { - mode |= SUBSC; - halfpos++; - } else if (halfpos < 0) { - mode &= ~SUPERSC; - halfpos++; - } else { - halfpos = 0; - fwd(); + break; + + case L'_': + if (obuf[col].c_char == L'\0') { + obuf[col].c_char = L'_'; + obuf[col].c_width = 1; + } else + obuf[col].c_mode |= UNDERL | mode; + /* FALLTHROUGH */ + + case L' ': + if (obuf[col].c_pos == 0) { + obuf[col].c_width = 1; + obuf[col].c_pos = obuf[col - 1].c_pos + 1; } - continue; - case FREV: - reverse(); - continue; + col++; + if (col > maxcol) + maxcol = col; + break; + + case L'\n': + flushln(); + break; + + case L'\f': + flushln(); + putwchar(L'\f'); + break; + default: - errx(1, "0%o: unknown escape sequence", c); - /* NOTREACHED */ - } - continue; + /* Discard valid, but non-printable characters. */ + if ((w = wcwidth(c)) == -1) + break; - case '_': - if (obuf[col].c_char) - obuf[col].c_mode |= UNDERL | mode; - else - obuf[col].c_char = '_'; - /* FALLTHROUGH */ - case ' ': - col++; - if (col > maxcol) - maxcol = col; - continue; - case '\n': - flushln(); - continue; - case '\f': - flushln(); - putchar('\f'); - continue; - default: - if (c < ' ') /* non printing */ - continue; - if (obuf[col].c_char == '\0') { - obuf[col].c_char = c; - obuf[col].c_mode = mode; - } else if (obuf[col].c_char == '_') { - obuf[col].c_char = c; - obuf[col].c_mode |= UNDERL|mode; - } else if (obuf[col].c_char == c) - obuf[col].c_mode |= BOLD|mode; - else - obuf[col].c_mode = mode; - col++; - if (col > maxcol) - maxcol = col; - continue; + if (obuf[col].c_char == L'\0') { + obuf[col].c_char = c; + obuf[col].c_mode = mode; + obuf[col].c_width = w; + obuf[col].c_pos = obuf[col - 1].c_pos + w; + } else if (obuf[col].c_char == L'_') { + obuf[col].c_char = c; + obuf[col].c_mode |= UNDERL|mode; + obuf[col].c_width = w; + obuf[col].c_pos = obuf[col - 1].c_pos + w; + for (cp = obuf + col; cp[1].c_pos > 0; cp++) + cp[1].c_pos = cp[0].c_pos + + cp[1].c_width; + } else if (obuf[col].c_char == c) + obuf[col].c_mode |= BOLD|mode; + else + obuf[col].c_mode = mode; + col++; + if (col > maxcol) + maxcol = col; + break; + } + skip_bs = 0; } - if (maxcol) - flushln(); } void @@ -257,26 +327,25 @@ flushln(void) int hadmodes = 0; lastmode = NORMAL; - for (i=0; i < maxcol; i++) { + for (i = 1; i < maxcol; i++) { if (obuf[i].c_mode != lastmode) { - hadmodes++; + hadmodes = 1; msetmode(obuf[i].c_mode); lastmode = obuf[i].c_mode; } - if (obuf[i].c_char == '\0') { + if (obuf[i].c_char == L'\0') { if (upln) PRINT(CURS_RIGHT); else - outc(' '); + outc(L' ', 1); } else - outc(obuf[i].c_char); + outc(obuf[i].c_char, obuf[i].c_width); } - if (lastmode != NORMAL) { + if (lastmode != NORMAL) msetmode(0); - } if (must_overstrike && hadmodes) overstrike(); - putchar('\n'); + putwchar(L'\n'); if (iflag && hadmodes) iattr(); (void)fflush(stdout); @@ -292,80 +361,74 @@ flushln(void) void overstrike(void) { - int i; - char *buf, *cp; - int hadbold = 0; + wchar_t wc; + int i, j, needspace; + + putwchar(L'\r'); + needspace = 0; + for (i = 1; i < maxcol; i++) { + if (obuf[i].c_mode != UNDERL && obuf[i].c_mode != BOLD) { + needspace += obuf[i].c_width; + continue; + } + while (needspace > 0) { + putwchar(L' '); + needspace--; + } + if (obuf[i].c_mode == BOLD) + putwchar(obuf[i].c_char); + else + for (j = 0; j < obuf[i].c_width; j++) + putwchar(L'_'); + } +} - if ((buf = malloc(maxcol + 1)) == NULL) - err(1, NULL); - cp = buf; +void +iattr(void) +{ + int i, j, needspace; + char c; - /* Set up overstrike buffer */ - for (i = 0; i < maxcol; i++) + needspace = 0; + for (i = 1; i < maxcol; i++) { switch (obuf[i].c_mode) { case NORMAL: - default: - *cp++ = ' '; + needspace += obuf[i].c_width; + continue; + case ALTSET: + c = 'g'; + break; + case SUPERSC: + c = '^'; + break; + case SUBSC: + c = 'v'; break; case UNDERL: - *cp++ = '_'; + c = '_'; break; case BOLD: - *cp++ = obuf[i].c_char; - hadbold=1; + c = '!'; + break; + default: + c = 'X'; break; } - putchar('\r'); - while (cp > buf && *(cp - 1) == ' ') - cp--; - *cp = '\0'; - for (cp = buf; *cp != '\0'; cp++) - putchar(*cp); - if (hadbold) { - putchar('\r'); - for (cp = buf; *cp != '\0'; cp++) - putchar(*cp=='_' ? ' ' : *cp); - putchar('\r'); - for (cp = buf; *cp != '\0'; cp++) - putchar(*cp=='_' ? ' ' : *cp); - } - free(buf); -} - -void -iattr(void) -{ - int i; - char *buf, *cp; - - if ((buf = malloc(maxcol + 1)) == NULL) - err(1, NULL); - cp = buf; - - for (i=0; i < maxcol; i++) - switch (obuf[i].c_mode) { - case NORMAL: *cp++ = ' '; break; - case ALTSET: *cp++ = 'g'; break; - case SUPERSC: *cp++ = '^'; break; - case SUBSC: *cp++ = 'v'; break; - case UNDERL: *cp++ = '_'; break; - case BOLD: *cp++ = '!'; break; - default: *cp++ = 'X'; break; + while (needspace > 0) { + putwchar(L' '); + needspace--; } - while (cp > buf && *(cp - 1) == ' ') - cp--; - *cp = '\0'; - for (cp = buf; *cp != '\0'; cp++) - putchar(*cp); - free(buf); - putchar('\n'); + for (j = 0; j < obuf[i].c_width; j++) + putwchar(c); + } + putwchar(L'\n'); } void initbuf(void) { bzero(obuf, sizeof (obuf)); /* depends on NORMAL == 0 */ - col = 0; + col = 1; maxcol = 0; mode &= ALTSET; } @@ -448,19 +511,22 @@ initcap(void) int outchar(int c) { - putchar(c & 0177); - return (0); + return (putwchar(c) != WEOF ? c : EOF); } static int curmode = 0; void -outc(int c) +outc(wchar_t c, int width) { - putchar(c); + int i; + + putwchar(c); if (must_use_uc && (curmode&UNDERL)) { - PRINT(CURS_LEFT); - PRINT(UNDER_CHAR); + for (i = 0; i < width; i++) + PRINT(CURS_LEFT); + for (i = 0; i < width; i++) + PRINT(UNDER_CHAR); } } |