summaryrefslogtreecommitdiff
path: root/usr.bin/uniq
diff options
context:
space:
mode:
authorScott Soule Cheloha <cheloha@cvs.openbsd.org>2021-11-17 23:09:39 +0000
committerScott Soule Cheloha <cheloha@cvs.openbsd.org>2021-11-17 23:09:39 +0000
commitdaaecf5f4fdf77ddf4e7152626365415ba57840b (patch)
tree0363975bebd3d780f72093182680cc5d904ddeb7 /usr.bin/uniq
parentc6a370580753cdef2a818e447ad4911083eeae8e (diff)
uniq(1): ignore trailing newlines when comparing lines
POSIX.1-2008 tweaked the uniq definition in light of AGI 1003.1-2001 #133. uniq must now *ignore* the trailing newline when comparing lines from the input. In practice this means that if the last line in the input is missing a trailing newline it isn't necessarily different from the line preceding it. So, uniq(1) now stubs the trailing newline before doing any line comparisons. For sake of simplicity, this patch introduces a second change: if the last line in the input is missing a trailing newline and we choose to print the line, a newline is appended when we print it. Adopting the newline change aligns our implementation with with POSIX.1-2008 (which we already claim in the manpage). Adopting both changes aligns our behavior with that of FreeBSD and GNU uniq. For better or worse, OpenBSD's uniq no longer behaves like NetBSD's uniq in this corner case. References: POSIX.1-2001 uniq: https://pubs.opengroup.org/onlinepubs/009695399/utilities/uniq.html Austin Group Interpretation 1003.1-2001 #133: https://collaboration.opengroup.org/austin/interps/documents/14355/AI-133.txt POSIX.1-2008 uniq: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html -- Discussed with millert@. With input from schwarze@. Positive feedback from bcallah@. Thread: https://marc.info/?l=openbsd-tech&m=163581613829524&w=2 ok millert@
Diffstat (limited to 'usr.bin/uniq')
-rw-r--r--usr.bin/uniq/uniq.c16
1 files changed, 11 insertions, 5 deletions
diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c
index 67698f04399..7356d922390 100644
--- a/usr.bin/uniq/uniq.c
+++ b/usr.bin/uniq/uniq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uniq.c,v 1.28 2021/11/01 23:20:35 cheloha Exp $ */
+/* $OpenBSD: uniq.c,v 1.29 2021/11/17 23:09:38 cheloha Exp $ */
/* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */
/*
@@ -60,6 +60,7 @@ main(int argc, char *argv[])
char *prevline, *t1, *t2, *thisline;
FILE *ifp = NULL, *ofp = NULL;
size_t prevsize, thissize, tmpsize;
+ ssize_t len;
int ch;
setlocale(LC_CTYPE, "");
@@ -133,16 +134,21 @@ main(int argc, char *argv[])
prevsize = 0;
prevline = NULL;
- if (getline(&prevline, &prevsize, ifp) == -1) {
+ if ((len = getline(&prevline, &prevsize, ifp)) == -1) {
free(prevline);
if (ferror(ifp))
err(1, "getline");
exit(0);
}
+ if (prevline[len - 1] == '\n')
+ prevline[len - 1] = '\0';
thissize = 0;
thisline = NULL;
- while (getline(&thisline, &thissize, ifp) != -1) {
+ while ((len = getline(&thisline, &thissize, ifp)) != -1) {
+ if (thisline[len - 1] == '\n')
+ thisline[len - 1] = '\0';
+
/* If requested get the chosen fields + character offsets. */
if (numfields || numchars) {
t1 = skip(thisline);
@@ -185,9 +191,9 @@ show(FILE *ofp, char *str)
{
if ((dflag && repeats) || (uflag && !repeats)) {
if (cflag)
- (void)fprintf(ofp, "%4d %s", repeats + 1, str);
+ fprintf(ofp, "%4d %s\n", repeats + 1, str);
else
- (void)fprintf(ofp, "%s", str);
+ fprintf(ofp, "%s\n", str);
}
}