summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr.bin/ul/ul.1142
-rw-r--r--usr.bin/ul/ul.c394
2 files changed, 351 insertions, 185 deletions
diff --git a/usr.bin/ul/ul.1 b/usr.bin/ul/ul.1
index 6c41720ceea..a9cc036c38d 100644
--- a/usr.bin/ul/ul.1
+++ b/usr.bin/ul/ul.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: ul.1,v 1.15 2010/10/29 08:02:05 jmc Exp $
+.\" $OpenBSD: ul.1,v 1.16 2016/01/18 17:34:26 schwarze Exp $
.\" $NetBSD: ul.1,v 1.3 1994/12/07 00:28:23 jtc Exp $
.\"
.\" Copyright (c) 1980, 1991, 1993
@@ -30,7 +30,7 @@
.\"
.\" @(#)ul.1 8.1 (Berkeley) 6/6/93
.\"
-.Dd $Mdocdate: October 29 2010 $
+.Dd $Mdocdate: January 18 2016 $
.Dt UL 1
.Os
.Sh NAME
@@ -44,38 +44,129 @@
.Sh DESCRIPTION
.Nm
reads the named files (or standard input if none are given)
-and translates occurrences of underscores to the sequence
-which indicates underlining for the terminal in use, as specified
+and translates various kinds of in-band markup to forms
+appropriate for the terminal in use, as specified
by the environment variable
-.Ev TERM .
-The file
-.Pa /etc/termcap
-is read to determine the appropriate sequences for underlining.
-If the terminal is incapable of underlining, but is capable of
-a standout mode, then that is used instead.
-If the terminal can overstrike,
-or handles underlining automatically,
+.Ev TERM
+and the
+.Xr terminfo 5
+database.
+In particular,
+.Xr man 1
+.Fl T Cm ascii ,
+.Fl T Cm utf8 ,
+and
+.Fl T Cm locale
+produce output that
+.Nm
+can handle as input.
+.Pp
+The following control characters are handled in the input stream:
+.Bl -tag -width Ds
+.It backspace (ASCII 0x08)
+Reset the output display column to the beginning of the previous
+character, to prepare for overstriking.
+The display width of the previous character does not matter:
+backing up over a double-width character does not require two
+backspace characters.
+However, if a double-width character is followed by two backspace
+characters, the second one is discarded, for compatibility with
+.Xr fold 1 .
+.It tabulator (ASCII 0x09)
+Advance the output display column to the next multiple of 8.
+Tabs are always expanded into blanks.
+.It newline (ASCII 0x0a)
+End the current output line.
+.It carriage return (ASCII 0x0d)
+Reset the output display column to the beginning of the line,
+to prepare for overstriking.
+.It shift out (ASCII 0x0e)
+Switch on reverse video mode.
+.It shift in (ASCII 0x0f)
+Switch off reverse video mode.
+.It escape 7 (ASCII 0x1b 0x37)
+Full reverse line feed.
+.It escape 8 (ASCII 0x1b 0x38)
+Half reverse line feed.
+Sometimes used for superscripts.
+.It escape 9 (ASCII 0x1b 0x39)
+Half forward line feed.
+Sometimes used for subscripts.
+.El
+.Pp
+The following kinds of markup are handled:
+.Bl -tag -width Ds
+.It underline
+Requested by putting an underscore into the same display cell as
+another character, by using backspace or carriage return characters.
+The usual sequence to request an underlined character is "character
+backspace underscore", but "underscore backspace character" works,
+too.
+If the terminal cannot underline, standout mode is tried as a fallback.
+.It boldface
+Requested by putting two copies of the same character into the same
+display cell, by using backspace or carriage return characters.
+The usual sequence to request a boldface character is "character
+backspace character".
+If the terminal does not provide boldface display, reverse video and
+standout mode are tried as fallbacks.
+.It reverse video
+Switched on and off by the shift out and shift in control characters,
+respectively.
+If the terminal does not provide reverse video, standout mode is
+tried as a fallback.
+.El
+.Pp
+If the input text contains markup the terminal cannot handle and
+no working fallback is available, that markup is ignored.
+Non-printable characters and invalid bytes are discarded.
+Unknown escape sequences cause
.Nm
-degenerates to
-.Xr cat 1 .
-If the terminal cannot underline, underlining is ignored.
+to abort with an error message and a non-zero exit code.
.Pp
The options are as follows:
.Bl -tag -width Ds
.It Fl i
-Underlining is indicated by a separate line containing appropriate
-dashes
-.Pq Ql - .
+Markup is not applied.
+Instead, after each output line containing at least one marked-up
+character, an additional line is printed, containing the following
+ASCII codes below each character they apply to:
+.Pp
+.Bl -tag -width 1n -compact
+.It _
+underline
+.It !
+boldface
+.It g
+inverse video
+.It ^
+one half line above the current line (superscript)
+.It v
+one half line below the current line (subscript)
+.It X
+more than one kind of markup
+.El
.It Fl t Ar terminal
Overrides the terminal type specified in the environment with
.Ar terminal .
.El
.Sh ENVIRONMENT
-.Bl -tag -width TERM
+.Bl -tag -width LC_CTYPE
+.It Ev LC_CTYPE
+The character set
+.Xr locale 1 .
+It decides which byte sequences form characters, which characters are
+printable, and how many output display columns each character occupies.
+If set to
+.Qq C ,
+.Qq POSIX ,
+or an unsupported value, each ASCII character except the control
+characters listed above is regarded as a character, and if it is
+printable, of display width 1.
.It Ev TERM
Used to relate a tty device
with its device capability description (see
-.Xr termcap 5 ) .
+.Xr terminfo 5 ) .
.Ev TERM
is set at login time, either by the default terminal type
specified in
@@ -85,10 +176,19 @@ or as set during the login process by the user in their
file (see
.Xr environ 7 ) .
.El
+.Sh EXIT STATUS
+.Ex -std
.Sh SEE ALSO
-.Xr man 1
+.Xr man 1 ,
+.Xr terminfo 5
.Sh HISTORY
The
.Nm
command appeared in
.Bx 3.0 .
+.Sh BUGS
+Half reverse and half forward line feeds only work on few terminals,
+and full reverse line feeds aren't very portable, either.
+.Pp
+If more than one kind of markup is applied to the same character,
+all these markups are ignored and standout mode is used instead.
diff --git a/usr.bin/ul/ul.c b/usr.bin/ul/ul.c
index 6593fdbff0e..847d1013373 100644
--- a/usr.bin/ul/ul.c
+++ b/usr.bin/ul/ul.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ul.c,v 1.19 2015/10/10 16:15:03 deraadt Exp $ */
+/* $OpenBSD: ul.c,v 1.20 2016/01/18 17:34:26 schwarze Exp $ */
/* $NetBSD: ul.c,v 1.3 1994/12/07 00:28:24 jtc Exp $ */
/*
@@ -32,15 +32,18 @@
#include <curses.h>
#include <err.h>
+#include <errno.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <term.h>
#include <unistd.h>
+#include <wchar.h>
-#define IESC '\033'
-#define SO '\016'
-#define SI '\017'
+#define IESC L'\033'
+#define SO L'\016'
+#define SI L'\017'
#define HFWD '9'
#define HREV '8'
#define FREV '7'
@@ -60,7 +63,9 @@ char *CURS_UP, *CURS_RIGHT, *CURS_LEFT,
struct CHAR {
char c_mode;
- char c_char;
+ wchar_t c_char;
+ int c_width;
+ int c_pos;
} ;
struct CHAR obuf[MAXBUF];
@@ -78,7 +83,7 @@ void reverse(void);
void fwd(void);
void flushln(void);
void msetmode(int);
-void outc(int);
+void outc(wchar_t, int);
void overstrike(void);
void iattr(void);
@@ -98,6 +103,8 @@ main(int argc, char *argv[])
FILE *f;
char termcap[1024];
+ setlocale(LC_CTYPE, "");
+
if (pledge("stdio rpath tty", NULL) == -1)
err(1, "pledge");
@@ -154,100 +161,163 @@ main(int argc, char *argv[])
void
mfilter(FILE *f)
{
- int c;
+ struct CHAR *cp;
+ wint_t c;
+ int skip_bs, w, wt;
+
+ col = 1;
+ skip_bs = 0;
+ while (col < MAXBUF) {
+ switch (c = fgetwc(f)) {
+ case WEOF:
+ /* Discard invalid bytes. */
+ if (ferror(f)) {
+ if (errno != EILSEQ)
+ err(1, NULL);
+ clearerr(f);
+ break;
+ }
+
+ /* End of file. */
+ if (maxcol)
+ flushln();
+ return;
- while ((c = getc(f)) != EOF && col < MAXBUF) switch(c) {
- case '\b':
- if (col > 0)
- col--;
- continue;
- case '\t':
- col = (col+8) & ~07;
- if (col > maxcol)
- maxcol = col;
- continue;
- case '\r':
- col = 0;
- continue;
- case SO:
- mode |= ALTSET;
- continue;
- case SI:
- mode &= ~ALTSET;
- continue;
- case IESC:
- switch (c = getc(f)) {
- case HREV:
- if (halfpos == 0) {
- mode |= SUPERSC;
- halfpos--;
- } else if (halfpos > 0) {
- mode &= ~SUBSC;
- halfpos--;
- } else {
- halfpos = 0;
+ case L'\b':
+ /*
+ * Back up one character position, not one
+ * display column, but ignore a second
+ * backspace after a double-width character.
+ */
+ if (skip_bs > 0)
+ skip_bs--;
+ else if (col > 1)
+ if (obuf[--col].c_width > 1)
+ skip_bs = obuf[col].c_width - 1;
+ continue;
+
+ case L'\t':
+ /* Calculate the target position. */
+ wt = (obuf[col - 1].c_pos + 8) & ~7;
+
+ /* Advance past known positions. */
+ while ((w = obuf[col].c_pos) > 0 && w <= wt)
+ col++;
+
+ /* Advance beyond the end. */
+ if (w == 0) {
+ w = obuf[col - 1].c_pos;
+ while (w < wt) {
+ obuf[col].c_width = 1;
+ obuf[col++].c_pos = ++w;
+ }
+ }
+ if (col > maxcol)
+ maxcol = col;
+ break;
+
+ case L'\r':
+ col = 1;
+ break;
+
+ case SO:
+ mode |= ALTSET;
+ break;
+
+ case SI:
+ mode &= ~ALTSET;
+ break;
+
+ case IESC:
+ switch (c = fgetwc(f)) {
+ case HREV:
+ if (halfpos == 0) {
+ mode |= SUPERSC;
+ halfpos--;
+ } else if (halfpos > 0) {
+ mode &= ~SUBSC;
+ halfpos--;
+ } else {
+ halfpos = 0;
+ reverse();
+ }
+ break;
+ case HFWD:
+ if (halfpos == 0) {
+ mode |= SUBSC;
+ halfpos++;
+ } else if (halfpos < 0) {
+ mode &= ~SUPERSC;
+ halfpos++;
+ } else {
+ halfpos = 0;
+ fwd();
+ }
+ break;
+ case FREV:
reverse();
+ break;
+ default:
+ errx(1, "0%o: unknown escape sequence", c);
}
- continue;
- case HFWD:
- if (halfpos == 0) {
- mode |= SUBSC;
- halfpos++;
- } else if (halfpos < 0) {
- mode &= ~SUPERSC;
- halfpos++;
- } else {
- halfpos = 0;
- fwd();
+ break;
+
+ case L'_':
+ if (obuf[col].c_char == L'\0') {
+ obuf[col].c_char = L'_';
+ obuf[col].c_width = 1;
+ } else
+ obuf[col].c_mode |= UNDERL | mode;
+ /* FALLTHROUGH */
+
+ case L' ':
+ if (obuf[col].c_pos == 0) {
+ obuf[col].c_width = 1;
+ obuf[col].c_pos = obuf[col - 1].c_pos + 1;
}
- continue;
- case FREV:
- reverse();
- continue;
+ col++;
+ if (col > maxcol)
+ maxcol = col;
+ break;
+
+ case L'\n':
+ flushln();
+ break;
+
+ case L'\f':
+ flushln();
+ putwchar(L'\f');
+ break;
+
default:
- errx(1, "0%o: unknown escape sequence", c);
- /* NOTREACHED */
- }
- continue;
+ /* Discard valid, but non-printable characters. */
+ if ((w = wcwidth(c)) == -1)
+ break;
- case '_':
- if (obuf[col].c_char)
- obuf[col].c_mode |= UNDERL | mode;
- else
- obuf[col].c_char = '_';
- /* FALLTHROUGH */
- case ' ':
- col++;
- if (col > maxcol)
- maxcol = col;
- continue;
- case '\n':
- flushln();
- continue;
- case '\f':
- flushln();
- putchar('\f');
- continue;
- default:
- if (c < ' ') /* non printing */
- continue;
- if (obuf[col].c_char == '\0') {
- obuf[col].c_char = c;
- obuf[col].c_mode = mode;
- } else if (obuf[col].c_char == '_') {
- obuf[col].c_char = c;
- obuf[col].c_mode |= UNDERL|mode;
- } else if (obuf[col].c_char == c)
- obuf[col].c_mode |= BOLD|mode;
- else
- obuf[col].c_mode = mode;
- col++;
- if (col > maxcol)
- maxcol = col;
- continue;
+ if (obuf[col].c_char == L'\0') {
+ obuf[col].c_char = c;
+ obuf[col].c_mode = mode;
+ obuf[col].c_width = w;
+ obuf[col].c_pos = obuf[col - 1].c_pos + w;
+ } else if (obuf[col].c_char == L'_') {
+ obuf[col].c_char = c;
+ obuf[col].c_mode |= UNDERL|mode;
+ obuf[col].c_width = w;
+ obuf[col].c_pos = obuf[col - 1].c_pos + w;
+ for (cp = obuf + col; cp[1].c_pos > 0; cp++)
+ cp[1].c_pos = cp[0].c_pos +
+ cp[1].c_width;
+ } else if (obuf[col].c_char == c)
+ obuf[col].c_mode |= BOLD|mode;
+ else
+ obuf[col].c_mode = mode;
+ col++;
+ if (col > maxcol)
+ maxcol = col;
+ break;
+ }
+ skip_bs = 0;
}
- if (maxcol)
- flushln();
}
void
@@ -257,26 +327,25 @@ flushln(void)
int hadmodes = 0;
lastmode = NORMAL;
- for (i=0; i < maxcol; i++) {
+ for (i = 1; i < maxcol; i++) {
if (obuf[i].c_mode != lastmode) {
- hadmodes++;
+ hadmodes = 1;
msetmode(obuf[i].c_mode);
lastmode = obuf[i].c_mode;
}
- if (obuf[i].c_char == '\0') {
+ if (obuf[i].c_char == L'\0') {
if (upln)
PRINT(CURS_RIGHT);
else
- outc(' ');
+ outc(L' ', 1);
} else
- outc(obuf[i].c_char);
+ outc(obuf[i].c_char, obuf[i].c_width);
}
- if (lastmode != NORMAL) {
+ if (lastmode != NORMAL)
msetmode(0);
- }
if (must_overstrike && hadmodes)
overstrike();
- putchar('\n');
+ putwchar(L'\n');
if (iflag && hadmodes)
iattr();
(void)fflush(stdout);
@@ -292,80 +361,74 @@ flushln(void)
void
overstrike(void)
{
- int i;
- char *buf, *cp;
- int hadbold = 0;
+ wchar_t wc;
+ int i, j, needspace;
+
+ putwchar(L'\r');
+ needspace = 0;
+ for (i = 1; i < maxcol; i++) {
+ if (obuf[i].c_mode != UNDERL && obuf[i].c_mode != BOLD) {
+ needspace += obuf[i].c_width;
+ continue;
+ }
+ while (needspace > 0) {
+ putwchar(L' ');
+ needspace--;
+ }
+ if (obuf[i].c_mode == BOLD)
+ putwchar(obuf[i].c_char);
+ else
+ for (j = 0; j < obuf[i].c_width; j++)
+ putwchar(L'_');
+ }
+}
- if ((buf = malloc(maxcol + 1)) == NULL)
- err(1, NULL);
- cp = buf;
+void
+iattr(void)
+{
+ int i, j, needspace;
+ char c;
- /* Set up overstrike buffer */
- for (i = 0; i < maxcol; i++)
+ needspace = 0;
+ for (i = 1; i < maxcol; i++) {
switch (obuf[i].c_mode) {
case NORMAL:
- default:
- *cp++ = ' ';
+ needspace += obuf[i].c_width;
+ continue;
+ case ALTSET:
+ c = 'g';
+ break;
+ case SUPERSC:
+ c = '^';
+ break;
+ case SUBSC:
+ c = 'v';
break;
case UNDERL:
- *cp++ = '_';
+ c = '_';
break;
case BOLD:
- *cp++ = obuf[i].c_char;
- hadbold=1;
+ c = '!';
+ break;
+ default:
+ c = 'X';
break;
}
- putchar('\r');
- while (cp > buf && *(cp - 1) == ' ')
- cp--;
- *cp = '\0';
- for (cp = buf; *cp != '\0'; cp++)
- putchar(*cp);
- if (hadbold) {
- putchar('\r');
- for (cp = buf; *cp != '\0'; cp++)
- putchar(*cp=='_' ? ' ' : *cp);
- putchar('\r');
- for (cp = buf; *cp != '\0'; cp++)
- putchar(*cp=='_' ? ' ' : *cp);
- }
- free(buf);
-}
-
-void
-iattr(void)
-{
- int i;
- char *buf, *cp;
-
- if ((buf = malloc(maxcol + 1)) == NULL)
- err(1, NULL);
- cp = buf;
-
- for (i=0; i < maxcol; i++)
- switch (obuf[i].c_mode) {
- case NORMAL: *cp++ = ' '; break;
- case ALTSET: *cp++ = 'g'; break;
- case SUPERSC: *cp++ = '^'; break;
- case SUBSC: *cp++ = 'v'; break;
- case UNDERL: *cp++ = '_'; break;
- case BOLD: *cp++ = '!'; break;
- default: *cp++ = 'X'; break;
+ while (needspace > 0) {
+ putwchar(L' ');
+ needspace--;
}
- while (cp > buf && *(cp - 1) == ' ')
- cp--;
- *cp = '\0';
- for (cp = buf; *cp != '\0'; cp++)
- putchar(*cp);
- free(buf);
- putchar('\n');
+ for (j = 0; j < obuf[i].c_width; j++)
+ putwchar(c);
+ }
+ putwchar(L'\n');
}
void
initbuf(void)
{
bzero(obuf, sizeof (obuf)); /* depends on NORMAL == 0 */
- col = 0;
+ col = 1;
maxcol = 0;
mode &= ALTSET;
}
@@ -448,19 +511,22 @@ initcap(void)
int
outchar(int c)
{
- putchar(c & 0177);
- return (0);
+ return (putwchar(c) != WEOF ? c : EOF);
}
static int curmode = 0;
void
-outc(int c)
+outc(wchar_t c, int width)
{
- putchar(c);
+ int i;
+
+ putwchar(c);
if (must_use_uc && (curmode&UNDERL)) {
- PRINT(CURS_LEFT);
- PRINT(UNDER_CHAR);
+ for (i = 0; i < width; i++)
+ PRINT(CURS_LEFT);
+ for (i = 0; i < width; i++)
+ PRINT(UNDER_CHAR);
}
}