diff options
author | Scott Soule Cheloha <cheloha@cvs.openbsd.org> | 2021-11-01 23:20:36 +0000 |
---|---|---|
committer | Scott Soule Cheloha <cheloha@cvs.openbsd.org> | 2021-11-01 23:20:36 +0000 |
commit | d0771c204075a285b76a10cb0b8255414fb9570a (patch) | |
tree | 1c3e48e55b009e15468593a8527766f78737e0cb | |
parent | 4826bb143de0d29c57e632181b3770ecc6e302ba (diff) |
uniq(1): support arbitrarily long input lines
Switch from fgets(3) to getline(3) to support input lines of any
length.
Tested by sthen@, who uncovered a dumb bug that cut throughput in
half. getline(3) is indeed slower than fgets(3), but not *twice* as
slow.
millert@ suggests that preallocating both line buffers might be
worthwhile. I will need to do some additional testing to figure out
whether 8KB buffers (like we had for fgets(3)) are appropriate
starting lengths. For now I am not preallocating either buffer.
ok millert@ sthen@
-rw-r--r-- | usr.bin/uniq/uniq.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c index ff972b58b24..67698f04399 100644 --- a/usr.bin/uniq/uniq.c +++ b/usr.bin/uniq/uniq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uniq.c,v 1.27 2018/07/31 02:55:57 deraadt Exp $ */ +/* $OpenBSD: uniq.c,v 1.28 2021/11/01 23:20:35 cheloha Exp $ */ /* $NetBSD: uniq.c,v 1.7 1995/08/31 22:03:48 jtc Exp $ */ /* @@ -45,8 +45,6 @@ #include <wchar.h> #include <wctype.h> -#define MAXLINELEN (8 * 1024) - int cflag, dflag, iflag, uflag; int numchars, numfields, repeats; @@ -59,10 +57,10 @@ __dead void usage(void); int main(int argc, char *argv[]) { - char *t1, *t2; + char *prevline, *t1, *t2, *thisline; FILE *ifp = NULL, *ofp = NULL; + size_t prevsize, thissize, tmpsize; int ch; - char *prevline, *thisline; setlocale(LC_CTYPE, ""); @@ -133,15 +131,18 @@ main(int argc, char *argv[]) if (pledge("stdio", NULL) == -1) err(1, "pledge"); - prevline = malloc(MAXLINELEN); - thisline = malloc(MAXLINELEN); - if (prevline == NULL || thisline == NULL) - err(1, "malloc"); - - if (fgets(prevline, MAXLINELEN, ifp) == NULL) + prevsize = 0; + prevline = NULL; + if (getline(&prevline, &prevsize, ifp) == -1) { + free(prevline); + if (ferror(ifp)) + err(1, "getline"); exit(0); - - while (fgets(thisline, MAXLINELEN, ifp)) { + } + + thissize = 0; + thisline = NULL; + while (getline(&thisline, &thissize, ifp) != -1) { /* If requested get the chosen fields + character offsets. */ if (numfields || numchars) { t1 = skip(thisline); @@ -157,11 +158,20 @@ main(int argc, char *argv[]) t1 = prevline; prevline = thisline; thisline = t1; + tmpsize = prevsize; + prevsize = thissize; + thissize = tmpsize; repeats = 0; } else ++repeats; } + free(thisline); + if (ferror(ifp)) + err(1, "getline"); + show(ofp, prevline); + free(prevline); + exit(0); } |