summaryrefslogtreecommitdiff
path: root/usr.bin/lam
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2018-07-29 11:27:16 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2018-07-29 11:27:16 +0000
commit8ce9e984cde9dca0dcc314238f944ac7e83aa363 (patch)
tree9583a42f2626eecfbd08840d34c13ad767850876 /usr.bin/lam
parentc4b6915160cfb12c0cbc2bec189375d2b6c1f633 (diff)
UTF-8 support: use wcwidth(3) when calculating column widths;
written during g218; no objection when shown on tech@
Diffstat (limited to 'usr.bin/lam')
-rw-r--r--usr.bin/lam/Makefile3
-rw-r--r--usr.bin/lam/lam.124
-rw-r--r--usr.bin/lam/lam.c83
-rw-r--r--usr.bin/lam/utf8.c47
4 files changed, 126 insertions, 31 deletions
diff --git a/usr.bin/lam/Makefile b/usr.bin/lam/Makefile
index 956087cd7f2..74a2ab00bfa 100644
--- a/usr.bin/lam/Makefile
+++ b/usr.bin/lam/Makefile
@@ -1,5 +1,6 @@
-# $OpenBSD: Makefile,v 1.3 1997/09/21 11:49:24 deraadt Exp $
+# $OpenBSD: Makefile,v 1.4 2018/07/29 11:27:14 schwarze Exp $
PROG= lam
+SRCS= lam.c utf8.c
.include <bsd.prog.mk>
diff --git a/usr.bin/lam/lam.1 b/usr.bin/lam/lam.1
index fd9d5d03934..bbdeb1e10b4 100644
--- a/usr.bin/lam/lam.1
+++ b/usr.bin/lam/lam.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: lam.1,v 1.9 2016/01/04 23:21:28 schwarze Exp $
+.\" $OpenBSD: lam.1,v 1.10 2018/07/29 11:27:14 schwarze Exp $
.\" $NetBSD: lam.1,v 1.4 2002/02/08 01:36:25 ross Exp $
.\"
.\" Copyright (c) 1993
@@ -30,7 +30,7 @@
.\"
.\" @(#)lam.1 8.1 (Berkeley) 6/6/93
.\"
-.Dd $Mdocdate: January 4 2016 $
+.Dd $Mdocdate: July 29 2018 $
.Dt LAM 1
.Os
.Sh NAME
@@ -74,8 +74,8 @@ is the minimum field width and
the maximum field width.
If
.Ar min
-begins with a zero, zeros will be added to make up the field width,
-and if it begins with a
+begins with a zero, zeros will be prepended to make up the field width
+instead of blanks, and if it begins with a
.Sq \&- ,
the fragment will be left-adjusted
within the field.
@@ -98,6 +98,22 @@ The newline normally appended to each output line is omitted.
.Pp
To print files simultaneously for easy viewing use
.Xr pr 1 .
+.Sh ENVIRONMENT
+.Bl -tag -width LC_CTYPE
+.It Ev LC_CTYPE
+The character encoding
+.Xr locale 1 .
+It determines the display widths of characters used by the
+.Fl f
+and
+.Fl p
+options.
+If unset or set to
+.Qq C ,
+.Qq POSIX ,
+or an unsupported value, each byte is regarded as a character
+of display width 1.
+.El
.Sh EXAMPLES
Join four files together along each line:
.Pp
diff --git a/usr.bin/lam/lam.c b/usr.bin/lam/lam.c
index 1e169eff265..9b84b17924f 100644
--- a/usr.bin/lam/lam.c
+++ b/usr.bin/lam/lam.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lam.c,v 1.21 2018/07/11 11:42:17 schwarze Exp $ */
+/* $OpenBSD: lam.c,v 1.22 2018/07/29 11:27:14 schwarze Exp $ */
/* $NetBSD: lam.c,v 1.2 1994/11/14 20:27:42 jtc Exp $ */
/*-
@@ -39,6 +39,7 @@
#include <ctype.h>
#include <err.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -48,11 +49,13 @@
struct openfile { /* open file structure */
FILE *fp; /* file pointer */
+ int minwidth; /* pad this column to this width */
+ int maxwidth; /* truncate this column */
short eof; /* eof flag */
short pad; /* pad flag for missing columns */
char eol; /* end of line character */
+ char align; /* '0' for zero fill, '-' for left align */
char *sepstring; /* string to print before each line */
- char *format; /* printf(3) style string spec. */
} input[NOFILE_MAX + 1]; /* last one is for the last -s arg. */
#define INPUTSIZE sizeof(input) / sizeof(*input)
@@ -61,6 +64,8 @@ int nofinalnl; /* normally append \n to each output line */
char line[BIGBUFSIZ];
char *linep;
+int mbswidth_truncate(char *, int); /* utf8.c */
+
void usage(void);
char *gatherline(struct openfile *);
void getargs(int, char *[]);
@@ -71,6 +76,8 @@ main(int argc, char *argv[])
{
int i;
+ setlocale(LC_CTYPE, "");
+
if (pledge("stdio rpath", NULL) == -1)
err(1, "pledge");
@@ -106,9 +113,9 @@ void
getargs(int argc, char *argv[])
{
struct openfile *ip = input;
- char *p;
+ const char *errstr;
+ char *p, *q;
int ch, P, S, F, T;
- size_t siz;
P = S = F = T = 0; /* capitalized options */
while (optind < argc) {
@@ -120,17 +127,28 @@ getargs(int argc, char *argv[])
case 'F': case 'f':
F = (ch == 'F');
/* Validate format string argument. */
- for (p = optarg; *p != '\0'; p++)
- if (!isdigit((unsigned char)*p) &&
- *p != '.' && *p != '-')
- errx(1, "%s: invalid width specified",
- optarg);
- /* '%' + width + 's' + '\0' */
- siz = p - optarg + 3;
- if ((p = realloc(ip->format, siz)) == NULL)
- err(1, NULL);
- snprintf(p, siz, "%%%ss", optarg);
- ip->format = p;
+ p = optarg;
+ if (*p == '0' || *p == '-')
+ ip->align = *p++;
+ else
+ ip->align = ' ';
+ if ((q = strchr(p, '.')) != NULL)
+ *q++ = '\0';
+ if (*p != '\0') {
+ ip->minwidth = strtonum(p, 1, INT_MAX,
+ &errstr);
+ if (errstr != NULL)
+ errx(1, "minimum width is %s: %s",
+ errstr, p);
+ }
+ if (q != NULL) {
+ ip->maxwidth = strtonum(q, 1, INT_MAX,
+ &errstr);
+ if (errstr != NULL)
+ errx(1, "maximum width is %s: %s",
+ errstr, q);
+ } else
+ ip->maxwidth = INT_MAX;
break;
case 'S': case 's':
S = (ch == 'S');
@@ -157,10 +175,16 @@ getargs(int argc, char *argv[])
ip->pad = P;
if (ip->sepstring == NULL)
ip->sepstring = S ? (ip-1)->sepstring : "";
- if (ip->format == NULL)
- ip->format = (P || F) ? (ip-1)->format : "%s";
if (ip->eol == '\0')
ip->eol = T ? (ip-1)->eol : '\n';
+ if (ip->align == '\0') {
+ if (F || P) {
+ ip->align = (ip-1)->align;
+ ip->minwidth = (ip-1)->minwidth;
+ ip->maxwidth = (ip-1)->maxwidth;
+ } else
+ ip->maxwidth = INT_MAX;
+ }
ip++;
optind++;
break;
@@ -179,14 +203,14 @@ pad(struct openfile *ip)
{
size_t n;
char *lp = linep;
+ int i = 0;
n = strlcpy(lp, ip->sepstring, line + sizeof(line) - lp);
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
- if (ip->pad) {
- n = snprintf(lp, line + sizeof(line) - lp, ip->format, "");
- if (n > 0)
- lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
- }
+ if (ip->pad)
+ while (i++ < ip->minwidth && lp + 1 < line + sizeof(line))
+ *lp++ = ' ';
+ *lp = '\0';
return (lp);
}
@@ -202,7 +226,7 @@ gatherline(struct openfile *ip)
char *p;
char *lp = linep;
char *end = s + BUFSIZ - 1;
- int c;
+ int c, width;
if (ip->eof)
return (pad(ip));
@@ -220,9 +244,16 @@ gatherline(struct openfile *ip)
numfiles++;
n = strlcpy(lp, ip->sepstring, line + sizeof(line) - lp);
lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
- n = snprintf(lp, line + sizeof(line) - lp, ip->format, s);
- if (n > 0)
- lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
+ width = mbswidth_truncate(s, ip->maxwidth);
+ if (ip->align != '-')
+ while (width++ < ip->minwidth && lp + 1 < line + sizeof(line))
+ *lp++ = ip->align;
+ n = strlcpy(lp, s, line + sizeof(line) - lp);
+ lp += (n < line + sizeof(line) - lp) ? n : strlen(lp);
+ if (ip->align == '-')
+ while (width++ < ip->minwidth && lp + 1 < line + sizeof(line))
+ *lp++ = ' ';
+ *lp = '\0';
return (lp);
}
diff --git a/usr.bin/lam/utf8.c b/usr.bin/lam/utf8.c
new file mode 100644
index 00000000000..0f6892466a3
--- /dev/null
+++ b/usr.bin/lam/utf8.c
@@ -0,0 +1,47 @@
+/* $OpenBSD: utf8.c,v 1.1 2018/07/29 11:27:15 schwarze Exp $ */
+/*
+ * Copyright (c) 2018 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <wchar.h>
+
+/*
+ * Measure the display width of the multibyte string.
+ * Treat invalid bytes and non-printable characters as width 1.
+ * Truncate the string to a display width of maxwidth.
+ * Return the total width, possibly after truncation.
+ */
+int
+mbswidth_truncate(char *mbs, int maxwidth)
+{
+ wchar_t wc;
+ int len, width, sum;
+
+ sum = 0;
+ while (*mbs != '\0') {
+ if ((len = mbtowc(&wc, mbs, MB_CUR_MAX)) == -1)
+ len = width = 1;
+ else if ((width = wcwidth(wc)) < 0)
+ width = 1;
+ if (sum + width > maxwidth) {
+ *mbs = '\0';
+ break;
+ }
+ sum += width;
+ mbs += len;
+ }
+ return sum;
+}