diff options
author | Martijn van Duren <martijn@cvs.openbsd.org> | 2018-11-06 13:51:29 +0000 |
---|---|---|
committer | Martijn van Duren <martijn@cvs.openbsd.org> | 2018-11-06 13:51:29 +0000 |
commit | 824a940081f19236d42c77da6553cf59f8a9128f (patch) | |
tree | a1ea811d0dc5654db85a8647a8a5c6ea3204ef66 /usr.bin | |
parent | ce79af513bef1298c5c0072990f7b30a7abc9198 (diff) |
Allow join(1) to join on a multibyte separator.
small nit and OK otto@
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/join/join.c | 49 |
1 files changed, 44 insertions, 5 deletions
diff --git a/usr.bin/join/join.c b/usr.bin/join/join.c index c4e829918b4..bb79487b166 100644 --- a/usr.bin/join/join.c +++ b/usr.bin/join/join.c @@ -1,4 +1,4 @@ -/* $OpenBSD: join.c,v 1.30 2018/10/23 08:41:45 martijn Exp $ */ +/* $OpenBSD: join.c,v 1.31 2018/11/06 13:51:28 martijn Exp $ */ /*- * Copyright (c) 1991, 1993, 1994 @@ -34,10 +34,14 @@ */ #include <err.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <wchar.h> #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) @@ -81,11 +85,12 @@ int joinout = 1; /* show lines with matched join fields (-v) */ int needsep; /* need separator character */ int spans = 1; /* span multiple delimiters (-t) */ char *empty; /* empty field replacement string (-e) */ -char *tabchar = " \t"; /* delimiter characters (-t) */ +wchar_t tabchar[] = L" \t"; /* delimiter characters (-t) */ int cmp(LINE *, u_long, LINE *, u_long); void fieldarg(char *); void joinlines(INPUT *, INPUT *); +char *mbssep(char **, const wchar_t *); void obsolete(char **); void outfield(LINE *, u_long, int); void outoneline(INPUT *, LINE *); @@ -101,6 +106,8 @@ main(int argc, char *argv[]) int aflag, ch, cval, vflag; char *end; + setlocale(LC_CTYPE, ""); + if (pledge("stdio rpath", NULL) == -1) err(1, "pledge"); @@ -161,8 +168,10 @@ main(int argc, char *argv[]) break; case 't': spans = 0; - if (strlen(tabchar = optarg) != 1) + if (mbtowc(tabchar, optarg, MB_CUR_MAX) != + strlen(optarg)) errx(1, "illegal tab character specification"); + tabchar[1] = L'\0'; break; case 'v': vflag = 1; @@ -333,7 +342,7 @@ slurp(INPUT *F) /* Split the line into fields, allocate space as necessary. */ lp->fieldcnt = 0; bp = lp->line; - while ((fieldp = strsep(&bp, tabchar)) != NULL) { + while ((fieldp = mbssep(&bp, tabchar)) != NULL) { if (spans && *fieldp == '\0') continue; if (lp->fieldcnt == lp->fieldalloc) { @@ -358,6 +367,36 @@ slurp(INPUT *F) free(line); } +char * +mbssep(char **stringp, const wchar_t *wcdelim) +{ + char *s, *p; + size_t ndelim; + int i; + /* tabchar is never more than 2 */ + char mbdelim[2][MB_LEN_MAX + 1]; + size_t mblen[2]; + + if ((s = *stringp) == NULL) + return NULL; + ndelim = wcslen(wcdelim); + for (i = 0; i < ndelim; i++) { + /* wcdelim generated via mbtowc */ + mblen[i] = wctomb(mbdelim[i], wcdelim[i]); + } + for (p = s; *p != '\0'; p++) { + for (i = 0; i < ndelim; i++) { + if (strncmp(p, mbdelim[i], mblen[i]) == 0) { + *p = '\0'; + *stringp = p + mblen[i]; + return s; + } + } + } + *stringp = NULL; + return s; +} + int cmp(LINE *lp1, u_long fieldno1, LINE *lp2, u_long fieldno2) { @@ -463,7 +502,7 @@ void outfield(LINE *lp, u_long fieldno, int out_empty) { if (needsep++) - putchar((int)*tabchar); + putwchar(*tabchar); if (!ferror(stdout)) { if (lp->fieldcnt <= fieldno || out_empty) { if (empty != NULL) |