summaryrefslogtreecommitdiff
path: root/usr.bin/uniq
diff options
context:
space:
mode:
authorIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-19 10:21:02 +0000
committerIngo Schwarze <schwarze@cvs.openbsd.org>2015-12-19 10:21:02 +0000
commit11df82023be3eadafdd1c5a52cf0a5d890c968f4 (patch)
treec02cab2c81dc898a40fc22bc76e8a9905df2b6c0 /usr.bin/uniq
parent27894a4364d6b49a029b610d0b6297ac66d4712a (diff)
UTF-8 support:
Let -f recognize non-ASCII blank characters and let -s count characters rather than bytes. OK zhuk@ bentley@
Diffstat (limited to 'usr.bin/uniq')
-rw-r--r--usr.bin/uniq/uniq.112
-rw-r--r--usr.bin/uniq/uniq.c35
2 files changed, 38 insertions, 9 deletions
diff --git a/usr.bin/uniq/uniq.1 b/usr.bin/uniq/uniq.1
index 7858404e913..d7e275372e2 100644
--- a/usr.bin/uniq/uniq.1
+++ b/usr.bin/uniq/uniq.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: uniq.1,v 1.17 2010/09/03 11:09:29 jmc Exp $
+.\" $OpenBSD: uniq.1,v 1.18 2015/12/19 10:21:01 schwarze Exp $
.\" $NetBSD: uniq.1,v 1.5 1994/12/06 07:51:15 jtc Exp $
.\"
.\" Copyright (c) 1991, 1993
@@ -33,7 +33,7 @@
.\"
.\" @(#)uniq.1 8.1 (Berkeley) 6/6/93
.\"
-.Dd $Mdocdate: September 3 2010 $
+.Dd $Mdocdate: December 19 2015 $
.Dt UNIQ 1
.Os
.Sh NAME
@@ -114,6 +114,14 @@ A file name of
.Ql -
denotes the standard input or the standard output
.Pq depending on its position on the command line .
+.Sh ENVIRONMENT
+.Bl -tag -width LC_CTYPE
+.It Ev LC_CTYPE
+The character set
+.Xr locale 1 .
+Determines which groups of bytes are treated as characters
+and which characters are considered blank.
+.El
.Sh EXIT STATUS
.Ex -std uniq
.Sh SEE ALSO
diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c
index 43e462e1463..7e4d7b86a11 100644
--- a/usr.bin/uniq/uniq.c
+++ b/usr.bin/uniq/uniq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uniq.c,v 1.23 2015/11/02 20:25:42 mmcc Exp $ */
+/* $OpenBSD: uniq.c,v 1.24 2015/12/19 10:21:01 schwarze Exp $ */
/* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */
/*
@@ -37,10 +37,13 @@
#include <err.h>
#include <errno.h>
#include <limits.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
#define MAXLINELEN (8 * 1024)
@@ -61,6 +64,8 @@ main(int argc, char *argv[])
int ch;
char *prevline, *thisline;
+ setlocale(LC_CTYPE, "");
+
if (pledge("stdio rpath wpath cpath", NULL) == -1)
err(1, "pledge");
@@ -176,16 +181,32 @@ show(FILE *ofp, char *str)
char *
skip(char *str)
{
+ wchar_t wc;
int nchars, nfields;
+ int len;
+ int field_started;
for (nfields = numfields; nfields && *str; nfields--) {
- while (isblank((unsigned char)*str))
- str++;
- while (*str && !isblank((unsigned char)*str))
- str++;
+ /* Skip one field, including preceding blanks. */
+ for (field_started = 0; *str != '\0'; str += len) {
+ if ((len = mbtowc(&wc, str, MB_CUR_MAX)) == -1) {
+ (void)mbtowc(NULL, NULL, MB_CUR_MAX);
+ wc = L'?';
+ len = 1;
+ }
+ if (iswblank(wc)) {
+ if (field_started)
+ break;
+ } else
+ field_started = 1;
+ }
}
- for (nchars = numchars; nchars-- && *str && *str != '\n'; ++str)
- ;
+
+ /* Skip some additional characters. */
+ for (nchars = numchars; nchars-- && *str != '\0'; str += len)
+ if ((len = mblen(str, MB_CUR_MAX)) == -1)
+ len = 1;
+
return (str);
}