diff options
Diffstat (limited to 'usr.bin/sort')
-rw-r--r-- | usr.bin/sort/append.c | 36 | ||||
-rw-r--r-- | usr.bin/sort/extern.h | 5 | ||||
-rw-r--r-- | usr.bin/sort/fields.c | 31 | ||||
-rw-r--r-- | usr.bin/sort/files.c | 127 | ||||
-rw-r--r-- | usr.bin/sort/fsort.c | 55 | ||||
-rw-r--r-- | usr.bin/sort/fsort.h | 6 | ||||
-rw-r--r-- | usr.bin/sort/msort.c | 34 | ||||
-rw-r--r-- | usr.bin/sort/sort.1 | 21 | ||||
-rw-r--r-- | usr.bin/sort/sort.c | 9 | ||||
-rw-r--r-- | usr.bin/sort/sort.h | 6 |
10 files changed, 198 insertions, 132 deletions
diff --git a/usr.bin/sort/append.c b/usr.bin/sort/append.c index 64e5a06b2b8..fc162f95452 100644 --- a/usr.bin/sort/append.c +++ b/usr.bin/sort/append.c @@ -1,4 +1,4 @@ -/* $OpenBSD: append.c,v 1.5 1999/05/24 17:57:17 millert Exp $ */ +/* $OpenBSD: append.c,v 1.6 2001/02/04 21:27:00 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -40,7 +40,7 @@ #if 0 static char sccsid[] = "@(#)append.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: append.c,v 1.5 1999/05/24 17:57:17 millert Exp $"; +static char rcsid[] = "$OpenBSD: append.c,v 1.6 2001/02/04 21:27:00 ericj Exp $"; #endif #endif /* not lint */ @@ -70,16 +70,16 @@ void append(keylist, nelem, depth, fp, put, ftbl) u_char **keylist; int nelem; - register int depth; + int depth; FILE *fp; void (*put)(RECHEADER *, FILE *); struct field *ftbl; { - register u_char *wts, *wts1; - register int n, odepth; - register u_char **cpos, **ppos, **lastkey; - register u_char *cend, *pend, *start; - register RECHEADER *crec, *prec; + u_char *wts, *wts1; + int n, odepth; + u_char **cpos, **ppos, **lastkey; + u_char *cend, *pend, *start; + RECHEADER *crec, *prec; if (*keylist == '\0' && UNIQUE) return; @@ -98,11 +98,15 @@ append(keylist, nelem, depth, fp, put, ftbl) prec = (RECHEADER *) (*ppos - depth); if (UNIQUE) put(prec, fp); - for (cpos = keylist+1; cpos < lastkey; cpos++) { + for (cpos = &keylist[1]; cpos < lastkey; cpos++) { crec = (RECHEADER *) (*cpos - depth); if (crec->length == prec->length) { - pend = (u_char *) &prec->offset + prec->length; - cend = (u_char *) &crec->offset + crec->length; + /* + * Set pend and cend so that trailing NUL and + * record separator is ignored. + */ + pend = (u_char *)&prec->data + prec->length - 2; + cend = (u_char *)&crec->data + crec->length - 2; for (start = *cpos; cend >= start; cend--) { if (wts[*cend] != wts[*pend]) break; @@ -131,11 +135,15 @@ append(keylist, nelem, depth, fp, put, ftbl) ppos = keylist; prec = (RECHEADER *) (*ppos - depth); put(prec, fp); - for (cpos = keylist+1; cpos < lastkey; cpos++) { + for (cpos = &keylist[1]; cpos < lastkey; cpos++) { crec = (RECHEADER *) (*cpos - depth); if (crec->offset == prec->offset) { - pend = (u_char *) &prec->offset + prec->offset; - cend = (u_char *) &crec->offset + crec->offset; + /* + * Set pend and cend so that trailing NUL and + * record separator is ignored. + */ + pend = (u_char *)&prec->data + prec->offset - 2; + cend = (u_char *)&crec->data + crec->offset - 2; for (start = *cpos; cend >= start; cend--) { if (wts[*cend] != wts[*pend]) break; diff --git a/usr.bin/sort/extern.h b/usr.bin/sort/extern.h index 91776f3cec2..1a468b346d1 100644 --- a/usr.bin/sort/extern.h +++ b/usr.bin/sort/extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: extern.h,v 1.2 1997/06/30 05:36:15 millert Exp $ */ +/* $OpenBSD: extern.h,v 1.3 2001/02/04 21:27:00 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -58,12 +58,11 @@ int makeline __P((int, union f_handle, int, RECHEADER *, u_char *, struct field *)); void merge __P((int, int, int (*)(), FILE *, void (*)(), struct field *)); void num_init __P((void)); -void onepass __P((u_char **, int, int, int *, u_char *, FILE *)); +void onepass __P((u_char **, int, long, long *, u_char *, FILE *)); int optval __P((int, int)); void order __P((union f_handle, int (*)(), struct field *)); void putline __P((RECHEADER *, FILE *)); void putrec __P((RECHEADER *, FILE *)); void rd_append __P((int, union f_handle, int, FILE *, u_char *, u_char *)); -int seq __P((FILE *, DBT *, DBT *)); int setfield __P((char *, struct field *, int)); void settables __P((int)); diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c index 46e955a59bc..74387e0eebe 100644 --- a/usr.bin/sort/fields.c +++ b/usr.bin/sort/fields.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fields.c,v 1.4 1999/05/24 17:57:17 millert Exp $ */ +/* $OpenBSD: fields.c,v 1.5 2001/02/04 21:27:00 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -40,7 +40,7 @@ #if 0 static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: fields.c,v 1.4 1999/05/24 17:57:17 millert Exp $"; +static char rcsid[] = "$OpenBSD: fields.c,v 1.5 2001/02/04 21:27:00 ericj Exp $"; #endif #endif /* not lint */ @@ -86,11 +86,11 @@ enterkey(keybuf, line, size, fieldtable) struct field fieldtable[]; { int i; - register u_char *l_d_mask; - register u_char *lineend, *pos; + u_char *l_d_mask; + u_char *lineend, *pos; u_char *endkey, *keypos; - register struct coldesc *clpos; - register int col = 1; + struct coldesc *clpos; + int col = 1; struct field *ftpos; l_d_mask = d_mask; pos = (u_char *) line->data - 1; @@ -100,8 +100,9 @@ enterkey(keybuf, line, size, fieldtable) for (i = 0; i < ncols; i++) { clpos = clist + i; - for (; (col < clpos->num) && (pos < lineend); col++) - { NEXTCOL(pos); } + for (; (col < clpos->num) && (pos < lineend); col++) { + NEXTCOL(pos); + } if (pos >= lineend) break; clpos->start = SEP_FLAG ? pos + 1 : pos; @@ -140,12 +141,12 @@ enterkey(keybuf, line, size, fieldtable) u_char * enterfield(tablepos, endkey, cur_fld, gflags) struct field *cur_fld; - register u_char *tablepos, *endkey; + u_char *tablepos, *endkey; int gflags; { - register u_char *start, *end, *lineend, *mask, *lweight; + u_char *start, *end, *lineend, *mask, *lweight; struct column icol, tcol; - register u_int flags; + u_int flags; u_int Rflag; icol = cur_fld->icol; @@ -210,13 +211,13 @@ enterfield(tablepos, endkey, cur_fld, gflags) u_char * number(pos, bufend, line, lineend, Rflag) - register u_char *line, *pos, *bufend, *lineend; + u_char *line, *pos, *bufend, *lineend; int Rflag; { - register int or_sign, parity = 0; - register int expincr = 1, exponent = -1; + int or_sign, parity = 0; + int expincr = 1, exponent = -1; int bite, expsign = 1, sign = 1; - register u_char lastvalue, *nonzero, *tline, *C_TENS; + u_char lastvalue, *nonzero, *tline, *C_TENS; u_char *nweights; if (Rflag) diff --git a/usr.bin/sort/files.c b/usr.bin/sort/files.c index 90317da4872..20056cdea47 100644 --- a/usr.bin/sort/files.c +++ b/usr.bin/sort/files.c @@ -1,4 +1,4 @@ -/* $OpenBSD: files.c,v 1.6 2000/06/30 16:00:23 millert Exp $ */ +/* $OpenBSD: files.c,v 1.7 2001/02/04 21:27:00 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -40,7 +40,7 @@ #if 0 static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: files.c,v 1.6 2000/06/30 16:00:23 millert Exp $"; +static char rcsid[] = "$OpenBSD: files.c,v 1.7 2001/02/04 21:27:00 ericj Exp $"; #endif #endif /* not lint */ @@ -49,6 +49,8 @@ static char rcsid[] = "$OpenBSD: files.c,v 1.6 2000/06/30 16:00:23 millert Exp $ #include <string.h> +static int seq __P((FILE *, DBT *, DBT *)); + /* * this is the subroutine for file management for fsort(). * It keeps the buffers for all temporary files. @@ -58,13 +60,13 @@ getnext(binno, infl0, nfiles, pos, end, dummy) int binno; union f_handle infl0; int nfiles; - register RECHEADER *pos; - register u_char *end; + RECHEADER *pos; + u_char *end; struct field *dummy; { - register int i; - register u_char *hp; - static int nleft = 0; + int i; + u_char *hp; + static size_t nleft = 0; static int cnt = 0, flag = -1; static u_char maxb = 0; static FILE *fp; @@ -86,9 +88,7 @@ getnext(binno, infl0, nfiles, pos, end, dummy) return (EOF); } fp = fstack[infl0.top + cnt].fp; - hp = (u_char *) &nleft; - for (i = sizeof(TRECHEADER); i; --i) - *hp++ = getc(fp); + fread(&nleft, sizeof(nleft), 1, fp); if (binno < maxb) fstack[infl0.top+cnt].max_o += sizeof(nleft) + nleft; @@ -109,10 +109,9 @@ getnext(binno, infl0, nfiles, pos, end, dummy) } if ((u_char *) pos > end - sizeof(TRECHEADER)) return (BUFFEND); - hp = (u_char *) pos; - for (i = sizeof(TRECHEADER); i ; --i) - *hp++ = (u_char) getc(fp); + fread(pos, sizeof(TRECHEADER), 1, fp); if (end - pos->data < pos->length) { + hp = ((u_char *)pos) + sizeof(TRECHEADER); for (i = sizeof(TRECHEADER); i ; i--) ungetc(*--hp, fp); return (BUFFEND); @@ -137,17 +136,27 @@ makeline(flno, filelist, nfiles, buffer, bufend, dummy2) u_char *bufend; struct field *dummy2; { - static char *opos; - register char *end, *pos; + static u_char *obufend; + static size_t osz; + char *pos; static int fileno = 0, overflow = 0; static FILE *fp = 0; - register int c; + int c; pos = (char *) buffer->data; - end = min((char *) bufend, pos + MAXLLEN); if (overflow) { - memmove(pos, opos, bufend - (u_char *) opos); - pos += ((char *) bufend - opos); + /* + * Buffer shortage is solved by either of two ways: + * * Flush previous buffered data and start using the + * buffer from start (see fsort()) + * * realloc buffer and bump bufend + * + * The former is perferred, realloc is only done when + * there is exactly one item in buffer which does not fit. + */ + if (bufend == obufend) + memmove(pos, bufend - osz, osz); + pos+=osz; overflow = 0; } for (;;) { @@ -160,22 +169,22 @@ makeline(flno, filelist, nfiles, buffer, bufend, dummy2) err(2, "%s", filelist.names[fileno]); fileno++; } - while ((pos < end) && ((c = getc(fp)) != EOF)) { + while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { if ((*pos++ = c) == REC_D) { buffer->offset = 0; buffer->length = pos - (char *) buffer->data; return (0); } } - if (pos >= end && end == (char *) bufend) { - if ((char *) buffer->data < end) { + if (pos >= (char *)bufend) { + if (buffer->data < bufend) { overflow = 1; - opos = (char *) buffer->data; + obufend = bufend; + osz = (pos - (char *)buffer->data); } return (BUFFEND); } else if (c == EOF) { if (buffer->data != (u_char *) pos) { - warnx("last character not record delimiter"); *pos++ = REC_D; buffer->offset = 0; buffer->length = pos - (char *) buffer->data; @@ -186,8 +195,16 @@ makeline(flno, filelist, nfiles, buffer, bufend, dummy2) if (flno >= 0) fstack[flno].fp = 0; } else { - buffer->data[100] = '\000'; - warnx("line too long: ignoring %s...", buffer->data); + warnx("line too long: ignoring %100s...", buffer->data); + + /* Consume the rest of the line from input */ + while((c = getc(fp)) != REC_D && c != EOF) + ; + + buffer->offset = 0; + buffer->length = 0; + + return (BUFFEND); } } } @@ -203,21 +220,23 @@ makekey(flno, filelist, nfiles, buffer, bufend, ftbl) u_char *bufend; struct field *ftbl; { - static int (*get)(); static int fileno = 0; static FILE *dbdesc = 0; static DBT dbkey[1], line[1]; static int overflow = 0; - int c; + static int c; if (overflow) { - overflow = 0; - enterkey(buffer, line, bufend - (u_char *) buffer, ftbl); - return (0); + overflow = enterkey(buffer, line, bufend - (u_char *)buffer, + ftbl); + if (overflow) + return (BUFFEND); + else + return (0); } + for (;;) { if (flno >= 0) { - get = seq; if (!(dbdesc = fstack[flno].fp)) return (EOF); } else if (!dbdesc) { @@ -227,14 +246,14 @@ makekey(flno, filelist, nfiles, buffer, bufend, ftbl) if (!dbdesc) err(2, "%s", filelist.names[fileno]); fileno++; - get = seq; } - if (!(c = get(dbdesc, line, dbkey))) { - if ((signed)line->size > bufend - buffer->data) + if (!(c = seq(dbdesc, line, dbkey))) { + if ((signed)line->size > bufend - buffer->data) { overflow = 1; - else + } else { overflow = enterkey(buffer, line, bufend - (u_char *) buffer, ftbl); + } if (overflow) return (BUFFEND); else @@ -256,20 +275,20 @@ makekey(flno, filelist, nfiles, buffer, bufend, ftbl) /* * get a key/line pair from fp */ -int +static int seq(fp, line, key) FILE *fp; DBT *line; DBT *key; { static char *buf, flag = 1; - register char *end, *pos; - register int c; + char *end, *pos; + int c; if (flag) { flag = 0; buf = (char *) linebuf; - end = buf + MAXLLEN; + end = buf + linebuf_size; line->data = buf; } pos = buf; @@ -279,16 +298,18 @@ seq(fp, line, key) return (0); } if (pos == end) { - line->size = MAXLLEN; - *--pos = REC_D; - while ((c = getc(fp)) != EOF) { - if (c == REC_D) - return (BUFFEND); - } + linebuf_size *= 2; + linebuf = realloc(linebuf, linebuf_size); + if (!linebuf) + err(2, "realloc of linebuf to %lu bytes failed", + (unsigned long)linebuf_size); + end = linebuf + linebuf_size; + pos = linebuf + (pos - buf); + line->data = buf = (char *)linebuf; + continue; } } if (pos != buf) { - warnx("last character not record delimiter"); *pos++ = REC_D; line->size = pos - buf; return (0); @@ -301,8 +322,8 @@ seq(fp, line, key) */ void putrec(rec, fp) - register RECHEADER *rec; - register FILE *fp; + RECHEADER *rec; + FILE *fp; { EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); } @@ -312,8 +333,8 @@ putrec(rec, fp) */ void putline(rec, fp) - register RECHEADER *rec; - register FILE *fp; + RECHEADER *rec; + FILE *fp; { EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); } @@ -325,8 +346,8 @@ int geteasy(flno, filelist, nfiles, rec, end, dummy2) int flno, nfiles; union f_handle filelist; - register RECHEADER *rec; - register u_char *end; + RECHEADER *rec; + u_char *end; struct field *dummy2; { int i; diff --git a/usr.bin/sort/fsort.c b/usr.bin/sort/fsort.c index 7b605d424d8..6036f38fdbb 100644 --- a/usr.bin/sort/fsort.c +++ b/usr.bin/sort/fsort.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fsort.c,v 1.7 1999/05/24 17:57:18 millert Exp $ */ +/* $OpenBSD: fsort.c,v 1.8 2001/02/04 21:27:00 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -40,7 +40,7 @@ #if 0 static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: fsort.c,v 1.7 1999/05/24 17:57:18 millert Exp $"; +static char rcsid[] = "$OpenBSD: fsort.c,v 1.8 2001/02/04 21:27:00 ericj Exp $"; #endif #endif /* not lint */ @@ -58,6 +58,7 @@ static char rcsid[] = "$OpenBSD: fsort.c,v 1.7 1999/05/24 17:57:18 millert Exp $ #include <string.h> u_char **keylist = 0, *buffer = 0, *linebuf = 0; +size_t bufsize, linebuf_size; struct tempfile fstack[MAXFCT]; extern char *toutpath; #define FSORTMAX 4 @@ -65,21 +66,21 @@ int PANIC = FSORTMAX; void fsort(binno, depth, infiles, nfiles, outfp, ftbl) - register int binno, depth; - register union f_handle infiles; - register int nfiles; + int binno, depth; + union f_handle infiles; + int nfiles; FILE *outfp; - register struct field *ftbl; + struct field *ftbl; { - register u_char *bufend, **keypos, *tmpbuf; + u_char *bufend, **keypos, *tmpbuf; u_char *weights; int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; int c, nelem; - int sizes [NBINS+1]; + long sizes[NBINS+1]; union f_handle tfiles, mstart = {MAXFCT-16}; - register int (*get)(int, union f_handle, int, RECHEADER *, + int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *); - register RECHEADER *crec; + RECHEADER *crec; struct field tfield[2]; FILE *prevfp, *tailfp[FSORTMAX+1]; @@ -93,15 +94,17 @@ fsort(binno, depth, infiles, nfiles, outfp, ftbl) tfield[0].icol.num = 1; weights = ftbl[0].weights; if (!buffer) { - if ((buffer = malloc(BUFSIZE + 1)) == NULL || + bufsize = BUFSIZE; + if ((buffer = malloc(bufsize + 1)) == NULL || (keylist = calloc(MAXNUM, sizeof(u_char *))) == NULL) errx(2, "cannot allocate memory"); if (!SINGL_FLD) { - if ((linebuf = malloc(MAXLLEN)) == NULL) + linebuf_size = MAXLLEN; + if ((linebuf = malloc(linebuf_size)) == NULL) errx(2, "cannot allocate memory"); } } - bufend = buffer + BUFSIZE; + bufend = buffer + bufsize; if (binno >= 0) { tfiles.top = infiles.top + nfiles; get = getnext; @@ -139,6 +142,18 @@ fsort(binno, depth, infiles, nfiles, outfp, ftbl) crec =(RECHEADER *) ((char *) crec + SALIGN(crec->length) + sizeof(TRECHEADER)); } + /* + * buffer was too small for data, allocate + * a bigger buffer. + */ + if (c == BUFFEND && nelem == 0) { + bufsize *= 2; + buffer = realloc(buffer, bufsize); + if (!buffer) + err(2, "failed to realloc buffer"); + bufend = buffer + bufsize; + continue; + } if (c == BUFFEND || ntfiles || mfct) { /* push */ if (panic >= PANIC) { fstack[MAXFCT-16+mfct].fp = ftmp(); @@ -253,22 +268,22 @@ void onepass(a, depth, n, sizes, tr, fp) u_char **a; int depth; - int n; - int sizes[]; + long n; + long sizes[]; u_char *tr; FILE *fp; { - int tsizes[NBINS+1]; + size_t tsizes[NBINS+1]; u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp; static int histo[256]; int *hp; - register int c; + int c; u_char **an, *t, **aj; - register u_char **ak, *r; + u_char **ak, *r; memset(tsizes, 0, sizeof(tsizes)); depth += sizeof(TRECHEADER); - an = a + n; + an = &a[n]; for (ak = a; ak < an; ak++) { histo[c = tr[**ak]]++; tsizes[c] += ((RECHEADER *) (*ak -= depth))->length; @@ -292,7 +307,7 @@ onepass(a, depth, n, sizes, tr, fp) n = an - ak; tsizes[c] += n * sizeof(TRECHEADER); /* tell getnext how many elements in this bin, this segment. */ - EWRITE(tsizes+c, sizeof(int), 1, fp); + EWRITE(&tsizes[c], sizeof(size_t), 1, fp); sizes[c] += tsizes[c]; for (; ak < an; ++ak) putrec((RECHEADER *) *ak, fp); diff --git a/usr.bin/sort/fsort.h b/usr.bin/sort/fsort.h index 6aec0b19685..61b8563962e 100644 --- a/usr.bin/sort/fsort.h +++ b/usr.bin/sort/fsort.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fsort.h,v 1.2 1997/06/30 05:36:17 millert Exp $ */ +/* $OpenBSD: fsort.h,v 1.3 2001/02/04 21:27:01 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -42,10 +42,12 @@ #define BUFSIZE (1 << POW) #define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */ #define BUFFEND (EOF-2) +#define BUFFSMALL (EOF-3) /* buffer is too small to hold line */ #define MAXFCT 1000 #define MAXLLEN ((1 << min(POW-4, 16)) - 14) -extern u_char **keylist, **l2buf, *buffer, *linebuf; +extern u_char **keylist, *buffer, *linebuf; +extern size_t bufsize, linebuf_size; /* temp files in the stack have a file descriptor, a largest bin (maxb) * which becomes the last non-empty bin (lastb) when the actual largest diff --git a/usr.bin/sort/msort.c b/usr.bin/sort/msort.c index ff4877aa615..2b6297fef6c 100644 --- a/usr.bin/sort/msort.c +++ b/usr.bin/sort/msort.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msort.c,v 1.8 2000/06/30 16:00:23 millert Exp $ */ +/* $OpenBSD: msort.c,v 1.9 2001/02/04 21:27:01 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -40,7 +40,7 @@ #if 0 static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: msort.c,v 1.8 2000/06/30 16:00:23 millert Exp $"; +static char rcsid[] = "$OpenBSD: msort.c,v 1.9 2001/02/04 21:27:01 ericj Exp $"; #endif #endif /* not lint */ @@ -87,7 +87,7 @@ fmerge(binno, files, nfiles, get, outfp, fput, ftbl) struct tempfile *l_fstack; wts = ftbl->weights; - if (!UNIQUE && SINGL_FLD && ftbl->flags & F) + if (!UNIQUE && SINGL_FLD && (ftbl->flags & F)) wts1 = (ftbl->flags & R) ? Rascii : ascii; if (!cfilebuf) { cfilebuf = malloc(MAXLLEN + sizeof(TMFILE)); @@ -207,8 +207,8 @@ insert(flist, rec, ttop, delete) struct mfile **flist, **rec; int delete, ttop; /* delete = 0 or 1 */ { - register struct mfile *tmprec; - register int top, mid, bot = 0, cmpv = 1; + struct mfile *tmprec; + int top, mid, bot = 0, cmpv = 1; tmprec = *rec; top = ttop; for (mid = top/2; bot +1 != top; mid = (bot+top)/2) { @@ -263,7 +263,7 @@ order(infile, get, ftbl) int (*get)(); struct field *ftbl; { - u_char *end; + u_char *crec_end, *prec_end, *trec_end; int c; RECHEADER *crec, *prec, *trec; @@ -274,16 +274,17 @@ order(infile, get, ftbl) buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER))); if (buffer == NULL) errx(2, "cannot allocate memory"); - end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER)); crec = (RECHEADER *) buffer; + crec_end = buffer + MAXLLEN + sizeof(TRECHEADER); prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER)); + prec_end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER)); wts = ftbl->weights; - if (SINGL_FLD && ftbl->flags & F) + if (SINGL_FLD && (ftbl->flags & F)) wts1 = ftbl->flags & R ? Rascii : ascii; else wts1 = 0; - if (get(-1, infile, 1, prec, end, ftbl) == 0) - while (0 == get(-1, infile, 1, crec, end, ftbl)) { + if (get(-1, infile, 1, prec, prec_end, ftbl) == 0) + while (get(-1, infile, 1, crec, crec_end, ftbl) == 0) { if (0 < (c = cmp(prec, crec))) { crec->data[crec->length-1] = 0; errx(1, "found disorder: %s", @@ -294,9 +295,16 @@ order(infile, get, ftbl) errx(1, "found non-uniqueness: %s", crec->data+crec->offset); } + /* Swap pointers so that this record is on place + * pointed to by prec and new record is read to place + * pointed to by crec. + */ trec = prec; prec = crec; crec = trec; + trec_end = prec_end; + prec_end = crec_end; + crec_end = trec_end; } exit(0); } @@ -305,9 +313,9 @@ static int cmp(rec1, rec2) RECHEADER *rec1, *rec2; { - register int r; - register u_char *pos1, *pos2, *end; - register u_char *cwts; + int r; + u_char *pos1, *pos2, *end; + u_char *cwts; for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) { pos1 = rec1->data; pos2 = rec2->data; diff --git a/usr.bin/sort/sort.1 b/usr.bin/sort/sort.1 index 159c5c0b544..b174c58330e 100644 --- a/usr.bin/sort/sort.1 +++ b/usr.bin/sort/sort.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sort.1,v 1.13 2000/11/09 17:52:39 aaron Exp $ +.\" $OpenBSD: sort.1,v 1.14 2001/02/04 21:27:01 ericj Exp $ .\" .\" Copyright (c) 1991, 1993 .\" The Regents of the University of California. All rights reserved. @@ -364,6 +364,7 @@ already exists .Sh SEE ALSO .Xr comm 1 , .Xr join 1 , +.Xr radixsort 3 , .Xr uniq 1 .Sh HISTORY A @@ -371,6 +372,19 @@ A command appeared in .At v5 . .Sh NOTES +.Nm +has no limits on input line length (other than imposed by available +memory) or any restrictions on bytes allowed within lines. +.Pp +To protect data +.Nm +.Fl o +calls +.Xr link 2 +and +.Xr unlink 2 , +and thus fails on protected directories. +.Pp The current sort command uses lexicographic radix sorting, which requires that sort keys be kept in memory (as opposed to previous versions which used quick and merge sorts and did not). @@ -389,12 +403,7 @@ is equivalent to .Fl f and may take twice as long. .Sh BUGS -Lines longer than 65522 characters are discarded and processing continues. To sort files larger than 60Mb, use .Nm .Fl H ; files larger than 704Mb must be sorted in smaller pieces, then merged. -To protect data -.Nm -.Fl o -calls link and unlink, and thus fails in protected directories. diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c index 7da39b63aeb..d9d1676c202 100644 --- a/usr.bin/sort/sort.c +++ b/usr.bin/sort/sort.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sort.c,v 1.15 2001/01/19 17:58:18 deraadt Exp $ */ +/* $OpenBSD: sort.c,v 1.16 2001/02/04 21:27:01 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -46,7 +46,7 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)sort.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: sort.c,v 1.15 2001/01/19 17:58:18 deraadt Exp $"; +static char rcsid[] = "$OpenBSD: sort.c,v 1.16 2001/02/04 21:27:01 ericj Exp $"; #endif #endif /* not lint */ @@ -62,6 +62,7 @@ static char rcsid[] = "$OpenBSD: sort.c,v 1.15 2001/01/19 17:58:18 deraadt Exp $ #include <sys/types.h> #include <sys/stat.h> +#include <locale.h> #include <paths.h> #include <signal.h> #include <stdlib.h> @@ -119,6 +120,8 @@ main(argc, argv) FILE *outfp = NULL; void *p; + setlocale(LC_ALL, ""); + if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL || (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL) errx(2, "cannot allocate memory"); @@ -331,7 +334,7 @@ usage(msg) { extern char *__progname; - if (msg) + if (msg != NULL) warnx("%s", msg); (void)fprintf(stderr, "usage: %s [-T dir] [-o output] [-cmubdfinrH] " "[-t char] [-R char] [-k keydef] ... [files]\n", __progname); diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h index 6146674bff0..ff450e4f136 100644 --- a/usr.bin/sort/sort.h +++ b/usr.bin/sort/sort.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sort.h,v 1.4 1999/05/24 17:57:19 millert Exp $ */ +/* $OpenBSD: sort.h,v 1.5 2001/02/04 21:27:01 ericj Exp $ */ /*- * Copyright (c) 1993 @@ -79,8 +79,8 @@ err(2, "fwrite"); \ } -/* length of record is currently limited to 2^16 - 1 */ -typedef u_short length_t; +/* length of record is currently limited to maximum string length (size_t) */ +typedef size_t length_t; #define SALIGN(n) ((n+(sizeof(length_t)-1)) & ~(sizeof(length_t)-1)) |