summaryrefslogtreecommitdiff
path: root/usr.bin/rs
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-03 12:23:16 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-03 12:23:16 +0000
commit1368ff20b846a89d84d16a6a6b5a3e99c6f0008d (patch)
tree30950ed601006bfbc0e121a18493262bfeeee3cf /usr.bin/rs
parentbf1238a62fc939582179123c95fc26d7d76dc5f2 (diff)
UTF-8 support: In a UTF-8 locale, properly align columns in the
presence of zero-width and double-width characters and replace non-printable codepoints and invalid bytes with ASCII question marks. No change in the C/POSIX locale. As a side effect, get rid of all pointer to pointer variables and simplify some of the code. Partially based on ideas from tedu@. Feedback and OK czarkoff@, OK tedu@.
Diffstat (limited to 'usr.bin/rs')
-rw-r--r--usr.bin/rs/Makefile4
-rw-r--r--usr.bin/rs/rs.c140
-rw-r--r--usr.bin/rs/utf8.c61
3 files changed, 132 insertions, 73 deletions
diff --git a/usr.bin/rs/Makefile b/usr.bin/rs/Makefile
index 6c658e84a99..7fb1d6b9758 100644
--- a/usr.bin/rs/Makefile
+++ b/usr.bin/rs/Makefile
@@ -1,6 +1,6 @@
-# $OpenBSD: Makefile,v 1.2 1996/06/26 05:38:46 deraadt Exp $
-
+# $OpenBSD: Makefile,v 1.3 2015/12/03 12:23:15 schwarze Exp $
PROG= rs
+SRCS= rs.c utf8.c
.include <bsd.prog.mk>
diff --git a/usr.bin/rs/rs.c b/usr.bin/rs/rs.c
index f8ca9bbb46a..bb149ff5d08 100644
--- a/usr.bin/rs/rs.c
+++ b/usr.bin/rs/rs.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: rs.c,v 1.29 2015/11/14 17:03:02 schwarze Exp $ */
+/* $OpenBSD: rs.c,v 1.30 2015/12/03 12:23:15 schwarze Exp $ */
/*-
* Copyright (c) 1993
@@ -39,11 +39,17 @@
#include <err.h>
#include <errno.h>
#include <limits.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+struct entry {
+ int w; /* Display width. */
+ char *s; /* Multibyte string. */
+};
+
long flags;
#define TRANSPOSE 000001
#define MTRANSPOSE 000002
@@ -63,26 +69,27 @@ long flags;
short *colwidths;
int nelem;
-char **elem;
-char **endelem;
+struct entry *elem;
+struct entry *endelem;
char *curline;
int allocsize = BUFSIZ;
-ssize_t curlen;
int irows, icols;
int orows, ocols;
-ssize_t maxlen;
+int maxwidth;
int skip;
int propgutter;
char isep = ' ', osep = ' ';
int owidth = 80, gutter = 2;
+int mbsavis(char **, const char *);
+
void usage(void);
void getargs(int, char *[]);
void getfile(void);
int get_line(void);
-char **getptrs(char **);
+struct entry *getptrs(struct entry *);
void prepfile(void);
-void prints(char *, int);
+void prints(struct entry *, int);
void putfile(void);
#define INCR(ep) do { \
@@ -93,6 +100,8 @@ void putfile(void);
int
main(int argc, char *argv[])
{
+ setlocale(LC_CTYPE, "");
+
if (pledge("stdio", NULL) == -1)
err(1, "pledge");
@@ -110,13 +119,14 @@ main(int argc, char *argv[])
void
getfile(void)
{
+ const char delim[2] = { isep, '\0' };
char *p;
- char *endp;
- char **ep = NULL;
+ struct entry *ep;
int multisep = (flags & ONEISEPONLY ? 0 : 1);
int nullpad = flags & NULLPAD;
- char **padto;
+ struct entry *padto;
+ curline = NULL;
while (skip--) {
if (get_line() == EOF)
return;
@@ -125,67 +135,67 @@ getfile(void)
}
if (get_line() == EOF)
return;
- if (flags & NOARGS && curlen < owidth)
+ if (flags & NOARGS && strlen(curline) < (size_t)owidth)
flags |= ONEPERLINE;
if (flags & ONEPERLINE)
icols = 1;
else /* count cols on first line */
- for (p = curline, endp = curline + curlen; p < endp; p++) {
+ for (p = curline; *p != '\0'; p++) {
if (*p == isep && multisep)
continue;
icols++;
while (*p && *p != isep)
p++;
}
- ep = getptrs(elem);
+ ep = getptrs(NULL);
p = curline;
do {
if (flags & ONEPERLINE) {
- *ep = curline;
+ ep->w = mbsavis(&ep->s, curline);
+ if (maxwidth < ep->w)
+ maxwidth = ep->w;
INCR(ep); /* prepare for next entry */
- if (maxlen < curlen)
- maxlen = curlen;
irows++;
continue;
}
- for (p = curline, endp = curline + curlen; p < endp; p++) {
- if (*p == isep && multisep)
- continue; /* eat up column separators */
- if (*p == isep) /* must be an empty column */
- *ep = "";
- else /* store column entry */
- *ep = p;
- while (p < endp && *p != isep)
- p++; /* find end of entry */
- *p = '\0'; /* mark end of entry */
- if (maxlen < p - *ep) /* update maxlen */
- maxlen = p - *ep;
+ p = curline;
+ while (p != NULL && *p != '\0') {
+ if (*p == isep) {
+ p++;
+ if (multisep)
+ continue;
+ ep->s = ""; /* empty column */
+ ep->w = 0;
+ } else
+ ep->w = mbsavis(&ep->s, strsep(&p, delim));
+ if (maxwidth < ep->w)
+ maxwidth = ep->w;
INCR(ep); /* prepare for next entry */
}
irows++; /* update row count */
if (nullpad) { /* pad missing entries */
padto = elem + irows * icols;
while (ep < padto) {
- *ep = "";
+ ep->s = "";
+ ep->w = 0;
INCR(ep);
}
}
} while (get_line() != EOF);
- *ep = NULL; /* mark end of pointers */
nelem = ep - elem;
}
void
putfile(void)
{
- char **ep;
+ struct entry *ep;
int i, j, n;
ep = elem;
if (flags & TRANSPOSE) {
for (i = 0; i < orows; i++) {
for (j = i; j < nelem; j += orows)
- prints(ep[j], (j - i) / orows);
+ prints(ep + j, (j - i) / orows);
putchar('\n');
}
} else {
@@ -193,7 +203,7 @@ putfile(void)
for (j = 0; j < ocols; j++) {
if (n++ >= nelem)
break;
- prints(*ep++, j);
+ prints(ep++, j);
}
putchar('\n');
}
@@ -201,19 +211,15 @@ putfile(void)
}
void
-prints(char *s, int col)
+prints(struct entry *ep, int col)
{
int n;
- char *p = s;
- while (*p)
- p++;
- n = (flags & ONEOSEPONLY ? 1 : colwidths[col] - (p - s));
+ n = (flags & ONEOSEPONLY ? 1 : colwidths[col] - ep->w);
if (flags & RIGHTADJUST)
while (n-- > 0)
putchar(osep);
- for (p = s; *p; p++)
- putchar(*p);
+ fputs(ep->s, stdout);
while (n-- > 0)
putchar(osep);
}
@@ -232,18 +238,18 @@ usage(void)
void
prepfile(void)
{
- char **ep;
+ struct entry *ep;
int i;
int j;
- char **lp;
+ struct entry *lp;
int colw;
int max = 0;
int n;
if (!nelem)
exit(0);
- gutter += maxlen * propgutter / 100.0;
- colw = maxlen + gutter;
+ gutter += maxwidth * propgutter / 100.0;
+ colw = maxwidth + gutter;
if (flags & MTRANSPOSE) {
orows = icols;
ocols = irows;
@@ -263,14 +269,11 @@ prepfile(void)
orows = nelem / ocols + (nelem % ocols ? 1 : 0);
else if (ocols == 0) /* decide on cols */
ocols = nelem / orows + (nelem % orows ? 1 : 0);
- lp = elem + orows * ocols;
- while (lp > endelem) {
- getptrs(elem + nelem);
- lp = elem + orows * ocols;
- }
+ while ((lp = elem + orows * ocols) > endelem)
+ (void)getptrs(NULL);
if (flags & RECYCLE) {
for (ep = elem + nelem; ep < lp; ep++)
- *ep = *(ep - nelem);
+ memcpy(ep, ep - nelem, sizeof(*ep));
nelem = lp - elem;
}
if (!(colwidths = calloc(ocols, sizeof(short))))
@@ -279,13 +282,13 @@ prepfile(void)
for (ep = elem, i = 0; i < ocols; i++) {
max = 0;
if (flags & TRANSPOSE) {
- for (j = 0; j < orows; j++)
- if ((n = strlen(*ep++)) > max)
- max = n;
+ for (j = 0; j < orows; j++, ep++)
+ if (ep->w > max)
+ max = ep->w;
} else {
for (j = i; j < nelem; j += ocols)
- if ((n = strlen(ep[j])) > max)
- max = n;
+ if (ep[j].w > max)
+ max = ep[j].w;
}
colwidths[i] = max + gutter;
}
@@ -305,43 +308,38 @@ prepfile(void)
}
int
-get_line(void) /* get line; maintain curline, curlen; manage storage */
+get_line(void)
{
- static char *ibuf = NULL;
- static size_t ibufsz = 0;
+ static size_t cursz;
+ static ssize_t curlen;
if (irows > 0 && flags & DETAILSHAPE)
printf(" %zd line %d\n", curlen, irows);
- if ((curlen = getline(&ibuf, &ibufsz, stdin)) == EOF) {
+ if ((curlen = getline(&curline, &cursz, stdin)) == EOF) {
if (ferror(stdin))
err(1, NULL);
return EOF;
}
- if (curlen > 0 && ibuf[curlen - 1] == '\n')
- ibuf[--curlen] = '\0';
-
- if (skip >= 0 || flags & SHAPEONLY)
- curline = ibuf;
- else if ((curline = strdup(ibuf)) == NULL)
- err(1, NULL);
+ if (curlen > 0 && curline[curlen - 1] == '\n')
+ curline[--curlen] = '\0';
return 0;
}
-char **
-getptrs(char **sp)
+struct entry *
+getptrs(struct entry *sp)
{
- char **p;
+ struct entry *p;
int newsize;
newsize = allocsize * 2;
- p = reallocarray(elem, newsize, sizeof(char *));
+ p = reallocarray(elem, newsize, sizeof(*p));
if (p == NULL)
err(1, "no memory");
allocsize = newsize;
- sp += p - elem;
+ sp = sp == NULL ? p : p + (sp - elem);
elem = p;
endelem = elem + allocsize;
return(sp);
diff --git a/usr.bin/rs/utf8.c b/usr.bin/rs/utf8.c
new file mode 100644
index 00000000000..c779d74fae1
--- /dev/null
+++ b/usr.bin/rs/utf8.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <err.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+int
+mbsavis(char** outp, const char *mbs)
+{
+ const char *src; /* Iterate mbs. */
+ char *dst; /* Iterate *outp. */
+ wchar_t wc;
+ int total_width; /* Display width of the whole string. */
+ int width; /* Display width of a single Unicode char. */
+ int len; /* Length in bytes of UTF-8 encoded string. */
+
+ len = strlen(mbs);
+ if ((*outp = malloc(len + 1)) == NULL)
+ err(1, NULL);
+
+ if (MB_CUR_MAX == 1) {
+ memcpy(*outp, mbs, len + 1);
+ return len;
+ }
+
+ src = mbs;
+ dst = *outp;
+ total_width = 0;
+ while (*src != '\0') {
+ if ((len = mbtowc(&wc, src, MB_CUR_MAX)) == -1) {
+ total_width++;
+ *dst++ = '?';
+ src++;
+ } else if ((width = wcwidth(wc)) == -1) {
+ total_width++;
+ *dst++ = '?';
+ src += len;
+ } else {
+ total_width += width;
+ while (len-- > 0)
+ *dst++ = *src++;
+ }
+ }
+ *dst = '\0';
+ return total_width;
+}