diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 1997-01-20 19:39:58 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 1997-01-20 19:39:58 +0000 |
commit | 3c0b175fc3d554c29f4f3b775ddcb1446094450d (patch) | |
tree | 320261622aef8879e9311e903428351e55581905 /usr.bin | |
parent | 18c3bcb7a6fea4c4a332bf3f4b9121038ceaa362 (diff) |
sort(1) from 4.4BSD-lite2 with minor tweaks and bug fixes by me.
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/sort/Makefile | 7 | ||||
-rw-r--r-- | usr.bin/sort/append.c | 194 | ||||
-rw-r--r-- | usr.bin/sort/extern.h | 69 | ||||
-rw-r--r-- | usr.bin/sort/fields.c | 325 | ||||
-rw-r--r-- | usr.bin/sort/files.c | 344 | ||||
-rw-r--r-- | usr.bin/sort/fsort.c | 292 | ||||
-rw-r--r-- | usr.bin/sort/fsort.h | 62 | ||||
-rw-r--r-- | usr.bin/sort/init.c | 332 | ||||
-rw-r--r-- | usr.bin/sort/msort.c | 310 | ||||
-rw-r--r-- | usr.bin/sort/pathnames.h | 41 | ||||
-rw-r--r-- | usr.bin/sort/sort.1 | 395 | ||||
-rw-r--r-- | usr.bin/sort/sort.c | 297 | ||||
-rw-r--r-- | usr.bin/sort/sort.h | 145 | ||||
-rw-r--r-- | usr.bin/sort/tmp.c | 85 |
14 files changed, 2898 insertions, 0 deletions
diff --git a/usr.bin/sort/Makefile b/usr.bin/sort/Makefile new file mode 100644 index 00000000000..aed00b47835 --- /dev/null +++ b/usr.bin/sort/Makefile @@ -0,0 +1,7 @@ +# $OpenBSD: Makefile,v 1.1 1997/01/20 19:39:46 millert Exp $ +# from: @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= sort +SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c + +.include <bsd.prog.mk> diff --git a/usr.bin/sort/append.c b/usr.bin/sort/append.c new file mode 100644 index 00000000000..867d412c6a7 --- /dev/null +++ b/usr.bin/sort/append.c @@ -0,0 +1,194 @@ +/* $OpenBSD: append.c,v 1.1 1997/01/20 19:39:47 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)append.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: append.c,v 1.1 1997/01/20 19:39:47 millert Exp $"; +#endif +#endif /* not lint */ + +#include "sort.h" + +#include <stdlib.h> +#include <string.h> + +#define OUTPUT { \ + if ((n = cpos - ppos) > 1) { \ + for (; ppos < cpos; ++ppos) \ + *ppos -= odepth; \ + ppos -= n; \ + radixsort((const u_char **)ppos, n, wts1, REC_D); \ + for (; ppos < cpos; ppos++) { \ + prec = (RECHEADER *) (*ppos - sizeof(TRECHEADER));\ + put(prec, fp); \ + } \ + } else put(prec, fp); \ +} + +/* + * copy sorted lines to output; check for uniqueness + */ +void +append(keylist, nelem, depth, fp, put, ftbl) + u_char **keylist; + int nelem; + register int depth; + FILE *fp; + void (*put)(RECHEADER *, FILE *); + struct field *ftbl; +{ + register u_char *wts, *wts1; + register n, odepth; + register u_char **cpos, **ppos, **lastkey; + register u_char *cend, *pend, *start; + register struct recheader *crec, *prec; + + if (*keylist == '\0' && UNIQUE) + return; + wts1 = wts = ftbl[0].weights; + if ((!UNIQUE) && SINGL_FLD) { + if (ftbl[0].flags & F && ftbl[0].flags & R) + wts1 = Rascii; + else if (ftbl[0].flags & F) + wts1 = ascii; + odepth = depth; + } + lastkey = keylist + nelem; + depth += sizeof(TRECHEADER); + if (SINGL_FLD && (UNIQUE || wts1 != wts)) { + ppos = keylist; + prec = (RECHEADER *) (*ppos - depth); + if (UNIQUE) + put(prec, fp); + for (cpos = keylist+1; cpos < lastkey; cpos++) { + crec = (RECHEADER *) (*cpos - depth); + if (crec->length == prec->length) { + pend = (u_char *) &prec->offset + prec->length; + cend = (u_char *) &crec->offset + crec->length; + for (start = *cpos; cend >= start; cend--) { + if (wts[*cend] != wts[*pend]) + break; + pend--; + } + if (pend + 1 != *ppos) { + if (!UNIQUE) { + OUTPUT; + } else + put(crec, fp); + ppos = cpos; + prec = crec; + } + } else { + if (!UNIQUE) { + OUTPUT; + } else + put(crec, fp); + ppos = cpos; + prec = crec; + } + } + if (!UNIQUE) { OUTPUT; } + } else if (UNIQUE) { + ppos = keylist; + prec = (RECHEADER *) (*ppos - depth); + put(prec, fp); + for (cpos = keylist+1; cpos < lastkey; cpos++) { + crec = (RECHEADER *) (*cpos - depth); + if (crec->offset == prec->offset) { + pend = (u_char *) &prec->offset + prec->offset; + cend = (u_char *) &crec->offset + crec->offset; + for (start = *cpos; cend >= start; cend--) { + if (wts[*cend] != wts[*pend]) + break; + pend--; + } + if (pend + 1 != *ppos) { + ppos = cpos; + prec = crec; + put(prec, fp); + } + } else { + ppos = cpos; + prec = crec; + put(prec, fp); + } + } + } else for (cpos = keylist; cpos < lastkey; cpos++) { + crec = (RECHEADER *) (*cpos - depth); + put(crec, fp); + } +} + +/* + * output the already sorted eol bin. + */ +void +rd_append(binno, infl0, nfiles, outfp, buffer, bufend) + u_char *buffer, *bufend; + int binno, nfiles; + union f_handle infl0; + FILE *outfp; +{ + struct recheader *rec; + rec = (RECHEADER *) buffer; + if (!getnext(binno, infl0, nfiles, (RECHEADER *) buffer, bufend, 0)) { + putline(rec, outfp); + while (getnext(binno, infl0, nfiles, (RECHEADER *) buffer, + bufend, 0) == 0) { + if (!UNIQUE) + putline(rec, outfp); + } + } +} + +/* + * append plain text--used after sorting the biggest bin. + */ +void +concat(a, b) + FILE *a, *b; +{ + int nread; + char buffer[4096]; + + rewind(b); + while ((nread = fread(buffer, 1, 4096, b)) > 0) + EWRITE(buffer, 1, nread, a); +} diff --git a/usr.bin/sort/extern.h b/usr.bin/sort/extern.h new file mode 100644 index 00000000000..475251cbda5 --- /dev/null +++ b/usr.bin/sort/extern.h @@ -0,0 +1,69 @@ +/* $OpenBSD: extern.h,v 1.1 1997/01/20 19:39:48 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.1 (Berkeley) 6/6/93 + */ + +void append __P((u_char **, int, int, FILE *, void (*)(), struct field *)); +void concat __P((FILE *, FILE *)); +length_t enterkey __P((struct recheader *, + DBT *, int, struct field *)); +void fixit __P((int *, char **)); +void fldreset __P((struct field *)); +FILE *ftmp __P((void)); +void fmerge __P((int, union f_handle, + int, int (*)(), FILE *, void (*)(), struct field *)); +void fsort __P((int, int, union f_handle, int, FILE *, struct field *)); +int geteasy __P((int, union f_handle, + int, struct recheader *, u_char *, struct field *)); +int getnext __P((int, union f_handle, + int, struct recheader *, u_char *, struct field *)); +int makekey __P((int, union f_handle, + int, struct recheader *, u_char *, struct field *)); +int makeline __P((int, union f_handle, + int, struct recheader *, u_char *, struct field *)); +void merge __P((int, int, int (*)(), FILE *, void (*)(), struct field *)); +void num_init __P((void)); +void onepass __P((u_char **, int, long, long *, u_char *, FILE *)); +int optval __P((int, int)); +void order __P((union f_handle, int (*)(), struct field *)); +void putline __P((struct recheader *, FILE *)); +void putrec __P((struct recheader *, FILE *)); +void rd_append __P((int, union f_handle, int, FILE *, u_char *, u_char *)); +int seq __P((FILE *, DBT *, DBT *)); +int setfield __P((char *, struct field *, int)); +void settables __P((int)); diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c new file mode 100644 index 00000000000..39551254be6 --- /dev/null +++ b/usr.bin/sort/fields.c @@ -0,0 +1,325 @@ +/* $OpenBSD: fields.c,v 1.1 1997/01/20 19:39:49 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)fields.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: fields.c,v 1.1 1997/01/20 19:39:49 millert Exp $"; +#endif +#endif /* not lint */ + +/* Subroutines to generate sort keys. */ + +#include "sort.h" + +#define blancmange(ptr) { \ + if (BLANK & d_mask[*(ptr)]) \ + while (BLANK & d_mask[*(++(ptr))]); \ +} + +#define NEXTCOL(pos) { \ + if (!SEP_FLAG) \ + while (BLANK & l_d_mask[*(++pos)]); \ + while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \ +} + +extern u_char *enterfield __P((u_char *, u_char *, struct field *, int)); + +extern u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); + +extern struct coldesc clist[(ND+1)*2]; +extern int ncols; + +#define DECIMAL '.' +#define OFFSET 128 + +u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ +u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ +u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ +u_char fnum[NBINS], rnum[NBINS]; + +/* + * constructs sort key with leading recheader, followed by the key, + * followed by the original line. + */ +length_t +enterkey(keybuf, line, size, fieldtable) + struct recheader *keybuf; /* pointer to start of key */ + DBT *line; + int size; + struct field fieldtable[]; +{ + int i; + register u_char *l_d_mask; + register u_char *lineend, *pos; + u_char *endkey, *keypos; + register struct coldesc *clpos; + register int col = 1; + struct field *ftpos; + l_d_mask = d_mask; + pos = (u_char *) line->data - 1; + lineend = (u_char *) line->data + line->size-1; + /* don't include rec_delimiter */ + keypos = keybuf->data; + + for (i = 0; i < ncols; i++) { + clpos = clist + i; + for (; (col < clpos->num) && (pos < lineend); col++) + { NEXTCOL(pos); } + if (pos >= lineend) + break; + clpos->start = SEP_FLAG ? pos + 1 : pos; + NEXTCOL(pos); + clpos->end = pos; + col++; + if (pos >= lineend) { + clpos->end = lineend; + ++i; + break; + } + } + for (; i <= ncols; i++) + clist[i].start = clist[i].end = lineend; + if (clist[0].start < (u_char *) line->data) + ++clist[0].start; + endkey = (u_char *) keybuf + size - line->size; + for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) + if ((keypos = enterfield(keypos, endkey, ftpos, + fieldtable->flags)) == NULL) + return (1); + + if (UNIQUE) + *(keypos-1) = REC_D; + keybuf->offset = keypos - keybuf->data; + keybuf->length = keybuf->offset + line->size; + if (keybuf->length + sizeof(TRECHEADER) > size) + return (1); /* line too long for buffer */ + memcpy(keybuf->data + keybuf->offset, line->data, line->size); + return (0); +} + +/* + * constructs a field (as defined by -k) within a key + */ +u_char * +enterfield(tablepos, endkey, cur_fld, gflags) + struct field *cur_fld; + register u_char *tablepos, *endkey; + int gflags; +{ + register u_char *start, *end, *lineend, *mask, *lweight; + struct column icol, tcol; + register u_int flags; + u_int Rflag; + icol = cur_fld->icol; + tcol = cur_fld->tcol; + flags = cur_fld->flags; + start = icol.p->start; + lineend = clist[ncols].end; + if (flags & BI) + blancmange(start); + start += icol.indent; + start = min(start, lineend); + if (!tcol.num) + end = lineend; + else { + if (tcol.indent) { + end = tcol.p->start; + if (flags & BT) blancmange(end); + end += tcol.indent; + end = min(end, lineend); + } else + end = tcol.p->end; + } + if (flags & N) { + Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; + tablepos = number(tablepos, endkey, start, end, Rflag); + return (tablepos); + } + mask = alltable; + mask = cur_fld->mask; + lweight = cur_fld->weights; + for (; start < end; start++) + if (mask[*start]) { + if (*start <= 1) { + if (tablepos+2 >= endkey) + return (NULL); + *tablepos++ = lweight[1]; + *tablepos++ = lweight[*start ? 2 : 1]; + } else { + *tablepos++ = lweight[*start]; + if (tablepos == endkey) + return (NULL); + } + } + *tablepos++ = lweight[0]; + return (tablepos == endkey ? NULL : tablepos); +} + +/* Uses the first bin to assign sign, expsign, 0, and the first + * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). + * When sorting in forward order: + * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; + * else use (0-99)->(2-102). + * If the exponent is >=61, use another byte for each additional 253 + * in the exponent. Cutoff is at 567. + * To avoid confusing the exponent and the mantissa, use a field delimiter + * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the + * only time a field delimiter can come in that position. + * Reverse order is done analagously. +*/ + +u_char * +number(pos, bufend, line, lineend, Rflag) + register u_char *line, *pos, *bufend, *lineend; + int Rflag; +{ + register int or_sign, parity = 0; + register int expincr = 1, exponent = -1; + int bite, expsign = 1, sign = 1; + register u_char lastvalue, *nonzero, *tline, *C_TENS; + u_char *nweights; + + if (Rflag) + nweights = rnum; + else + nweights = fnum; + if (pos > bufend - 8) + return (NULL); + /* or_sign sets the sort direction: + * (-r: +/-)(sign: +/-)(expsign: +/-) */ + or_sign = sign ^ expsign ^ Rflag; + blancmange(line); + if (*line == '-') { /* set the sign */ + or_sign ^= 1; + sign = 0; + line++; + } + /* eat initial zeroes */ + for (; *line == '0' && line < lineend; line++); + /* calculate exponents < 0 */ + if (*line == DECIMAL) { + exponent = 1; + while (*++line == '0' && line < lineend) + exponent++; + expincr = 0; + expsign = 0; + } + /* next character better be a digit */ + if (*line < '1' || *line > '9' || line >= lineend) { + *pos++ = nweights[127]; + return (pos); + } + if (expincr) { + for (tline = line-1; *++tline >= '0' && + *tline <= '9' && tline < lineend;) + exponent++; + } + if (exponent > 567) { + *pos++ = nweights[sign ? (expsign ? 254 : 128) + : (expsign ? 0 : 126)]; + warnx("exponent out of bounds"); + return (pos); + } + bite = min(exponent, 61); + *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) + : (expsign ? 64-bite : 64+bite)]; + if (bite >= 61) { + do { + exponent -= bite; + bite = min(exponent, 254); + *pos++ = nweights[or_sign ? 254-bite : bite]; + } while (bite == 254); + } + C_TENS = or_sign ? OFF_NTENS : OFF_TENS; + for (; line < lineend; line++) { + if (*line >= '0' && *line <= '9') { + if (parity) { + *pos++ = C_TENS[lastvalue] + (or_sign ? - *line + : *line); + if (pos == bufend) + return (NULL); + if (*line != '0' || lastvalue != '0') + nonzero = pos; + } else + lastvalue = *line; + parity ^= 1; + } else if(*line == DECIMAL) { + if(!expincr) /* a decimal already occurred once */ + break; + expincr = 0; + } else + break; + } + if (parity && lastvalue != '0') { + *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : + OFF_TENS[lastvalue] + '0'; + } else + pos = nonzero; + if (pos > bufend-1) + return (NULL); + *pos++ = or_sign ? nweights[254] : nweights[0]; + return (pos); +} + +/* This forces a gap around the record delimiter + * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); + * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) +*/ +void +num_init() +{ + int i; + TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; + NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; + OFF_TENS = TENS - '0'; + OFF_NTENS = NEGTENS - '0'; + for (i = 1; i < 10; i++) { + TENS[i] = TENS[i-1] + 10; + NEGTENS[i] = NEGTENS[i-1] - 10; + } + for (i = 0; i < REC_D; i++) { + fnum[i] = i; + rnum[255-i] = i; + } + for (i = REC_D; i <255; i++) { + fnum[i] = i+1; + rnum[255-i] = i-1; + } +} diff --git a/usr.bin/sort/files.c b/usr.bin/sort/files.c new file mode 100644 index 00000000000..f345877e48e --- /dev/null +++ b/usr.bin/sort/files.c @@ -0,0 +1,344 @@ +/* $OpenBSD: files.c,v 1.1 1997/01/20 19:39:50 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: files.c,v 1.1 1997/01/20 19:39:50 millert Exp $"; +#endif +#endif /* not lint */ + +#include "sort.h" +#include "fsort.h" + +#include <string.h> + +/* + * this is the subroutine for file management for fsort(). + * It keeps the buffers for all temporary files. + */ +int +getnext(binno, infl0, nfiles, pos, end, dummy) + int binno, nfiles; + union f_handle infl0; + register struct recheader *pos; + register u_char *end; + struct field *dummy; +{ + register int i; + register u_char *hp; + static long nleft = 0; + static int cnt = 0, flag = -1; + static u_char maxb = 0; + static FILE *fp; + + if (nleft == 0) { + if (binno < 0) /* reset files. */ { + for (i = 0; i < nfiles; i++) { + rewind(fstack[infl0.top + i].fp); + fstack[infl0.top + i].max_o = 0; + } + flag = -1; + nleft = cnt = 0; + return(-1); + } + maxb = fstack[infl0.top].maxb; + for (; nleft == 0; cnt++) { + if (cnt >= nfiles) { + cnt = 0; + return (EOF); + } + fp = fstack[infl0.top + cnt].fp; + hp = (u_char *) &nleft; + for (i = sizeof(TRECHEADER); i; --i) + *hp++ = getc(fp); + if (binno < maxb) + fstack[infl0.top+cnt].max_o + += sizeof(nleft) + nleft; + else if (binno == maxb) { + if (binno != fstack[infl0.top].lastb) { + fseek(fp, fstack[infl0.top+ + cnt].max_o, SEEK_SET); + fread(&nleft, sizeof(nleft), 1, fp); + } + if (nleft == 0) + fclose(fp); + } else if (binno == maxb + 1) { /* skip a bin */ + fseek(fp, nleft, SEEK_CUR); + fread(&nleft, sizeof(nleft), 1, fp); + flag = cnt; + } + } + } + if ((u_char *) pos > end - sizeof(TRECHEADER)) + return (BUFFEND); + hp = (u_char *) pos; + for (i = sizeof(TRECHEADER); i ; --i) + *hp++ = (u_char) getc(fp); + if (end - pos->data < pos->length) { + for (i = sizeof(TRECHEADER); i ; i--) + ungetc(*--hp, fp); + return (BUFFEND); + } + fread(pos->data, pos->length, 1, fp); + nleft -= pos->length + sizeof(TRECHEADER); + if (nleft == 0 && binno == fstack[infl0.top].maxb) + fclose(fp); + return (0); +} + +/* + * this is called when there is no special key. It's only called + * in the first fsort pass. + */ +int +makeline(flno, filelist, nfiles, buffer, bufend, dummy2) + int flno, nfiles; + union f_handle filelist; + struct recheader *buffer; + u_char *bufend; + struct field *dummy2; +{ + static char *opos; + register char *end, *pos; + static int fileno = 0, overflow = 0; + static FILE *fp = 0; + register int c; + + pos = (char *) buffer->data; + end = min((char *) bufend, pos + MAXLLEN); + if (overflow) { + memmove(pos, opos, bufend - (u_char *) opos); + pos += ((char *) bufend - opos); + overflow = 0; + } + for (;;) { + if (flno >= 0) { + if (!(fp = fstack[flno].fp)) + return (EOF); + } else if (!fp) { + if (fileno >= nfiles) return(EOF); + if (!(fp = fopen(filelist.names[fileno], "r"))) + err(2, "%s", filelist.names[fileno]); + ++fileno; + } + while ((pos < end) && ((c = getc(fp)) != EOF)) { + if ((*pos++ = c) == REC_D) { + buffer->offset = 0; + buffer->length = pos - (char *) buffer->data; + return (0); + } + } + if (pos >= end && end == (char *) bufend) { + if ((char *) buffer->data < end) { + overflow = 1; + opos = (char *) buffer->data; + } + return (BUFFEND); + } else if (c == EOF) { + if (buffer->data != (u_char *) pos) { + warnx("last character not record delimiter"); + *pos++ = REC_D; + buffer->offset = 0; + buffer->length = pos - (char *) buffer->data; + return(0); + } + FCLOSE(fp); + fp = 0; + if(flno >= 0) fstack[flno].fp = 0; + } else { + buffer->data[100] = '\000'; + warnx("line too long:ignoring %s...", buffer->data); + } + } +} + +/* + * This generates keys. It's only called in the first fsort pass + */ +int +makekey(flno, filelist, nfiles, buffer, bufend, ftbl) + int flno, nfiles; + union f_handle filelist; + struct recheader *buffer; + u_char *bufend; + struct field *ftbl; +{ + static int (*get)(); + static int fileno = 0; + static FILE *dbdesc = 0; + static DBT dbkey[1], line[1]; + static int overflow = 0; + int c; + if (overflow) { + overflow = 0; + enterkey(buffer, line, bufend - (u_char *) buffer, ftbl); + return (0); + } + for (;;) { + if (flno >= 0) { + get = seq; + if (!(dbdesc = fstack[flno].fp)) + return(EOF); + } else if (!dbdesc) { + if (fileno >= nfiles) + return (EOF); + dbdesc = fopen(filelist.names[fileno], "r"); + if (!dbdesc) + err(2, "%s", filelist.names[fileno]); + ++fileno; + get = seq; + } + if (!(c = get(dbdesc, line, dbkey))) { + if ((signed)line->size > bufend - buffer->data) + overflow = 1; + else + overflow = enterkey(buffer, line, + bufend - (u_char *) buffer, ftbl); + if (overflow) + return (BUFFEND); + else + return (0); + } + if (c == EOF) { + FCLOSE(dbdesc); + dbdesc = 0; + if (flno >= 0) fstack[flno].fp = 0; + } else { + + ((char *) line->data)[60] = '\000'; + warnx("line too long: ignoring %.100s...", + (char *)line->data); + } + + } +} + +/* + * get a key/line pair from fp + */ +int +seq(fp, line, key) + FILE *fp; + DBT *key, *line; +{ + static char *buf, flag = 1; + register char *end, *pos; + register int c; + if (flag) { + flag = 0; + buf = (char *) linebuf; + end = buf + MAXLLEN; + line->data = buf; + } + pos = buf; + while ((c = getc(fp)) != EOF) { + if ((*pos++ = c) == REC_D) { + line->size = pos - buf; + return (0); + } + if (pos == end) { + line->size = MAXLLEN; + *--pos = REC_D; + while ((c = getc(fp)) != EOF) { + if (c == REC_D) + return (BUFFEND); + } + } + } + if (pos != buf) { + warnx("last character not record delimiter"); + *pos++ = REC_D; + line->size = pos - buf; + return (0); + } else + return (EOF); +} + +/* + * write a key/line pair to a temporary file + */ +void +putrec(rec, fp) + register struct recheader *rec; + register FILE *fp; +{ + EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); +} + +/* + * write a line to output + */ +void +putline(rec, fp) + register struct recheader *rec; + register FILE *fp; +{ + EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); +} + +/* + * get a record from a temporary file. (Used by merge sort.) + */ +int +geteasy(flno, filelist, nfiles, rec, end, dummy2) + int flno, nfiles; + union f_handle filelist; + register struct recheader *rec; + register u_char *end; + struct field *dummy2; +{ + int i; + FILE *fp; + fp = fstack[flno].fp; + if ((u_char *) rec > end - sizeof(TRECHEADER)) + return (BUFFEND); + if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { + fclose(fp); + fstack[flno].fp = 0; + return (EOF); + } + if (end - rec->data < rec->length) { + for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) + ungetc(*((char *) rec + i), fp); + return (BUFFEND); + } + fread(rec->data, rec->length, 1, fp); + return (0); +} diff --git a/usr.bin/sort/fsort.c b/usr.bin/sort/fsort.c new file mode 100644 index 00000000000..8365edde872 --- /dev/null +++ b/usr.bin/sort/fsort.c @@ -0,0 +1,292 @@ +/* $OpenBSD: fsort.c,v 1.1 1997/01/20 19:39:51 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: fsort.c,v 1.1 1997/01/20 19:39:51 millert Exp $"; +#endif +#endif /* not lint */ + +/* + * Read in the next bin. If it fits in one segment sort it; + * otherwise refine it by segment deeper by one character, + * and try again on smaller bins. Sort the final bin at this level + * of recursion to keep the head of fstack at 0. + * After PANIC passes, abort to merge sort. +*/ +#include "sort.h" +#include "fsort.h" + +#include <stdlib.h> +#include <string.h> + +u_char **keylist = 0, *buffer = 0, *linebuf = 0; +struct tempfile fstack[MAXFCT]; +extern char *toutpath; +#define FSORTMAX 4 +int PANIC = FSORTMAX; + +void +fsort(binno, depth, infiles, nfiles, outfp, ftbl) + register int binno, depth, nfiles; + register union f_handle infiles; + FILE *outfp; + register struct field *ftbl; +{ + register u_char *bufend, **keypos, *tmpbuf; + u_char *weights; + int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; + register int c, nelem; + long sizes [NBINS+1]; + union f_handle tfiles, mstart = {MAXFCT-16}; + register int (*get)(int, union f_handle, int, RECHEADER *, + u_char *, struct field *); + register struct recheader *crec; + struct field tfield[2]; + FILE *prevfp, *tailfp[FSORTMAX+1]; + + memset(tailfp, 0, sizeof(tailfp)); + prevfp = outfp; + memset(tfield, 0, sizeof(tfield)); + if (ftbl[0].flags & R) + tfield[0].weights = Rascii; + else + tfield[0].weights = ascii; + tfield[0].icol.num = 1; + weights = ftbl[0].weights; + if (!buffer) { + buffer = malloc(BUFSIZE); + keylist = malloc(MAXNUM * sizeof(u_char *)); + if (!SINGL_FLD) + linebuf = malloc(MAXLLEN); + } + bufend = buffer + BUFSIZE; + if (binno >= 0) { + tfiles.top = infiles.top + nfiles; + get = getnext; + } else { + tfiles.top = 0; + if (SINGL_FLD) + get = makeline; + else + get = makekey; + } + for (;;) { + memset(sizes, 0, sizeof(sizes)); + c = ntfiles = 0; + if (binno == weights[REC_D] && + !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ + rd_append(weights[REC_D], + infiles, nfiles, prevfp, buffer, bufend); + break; + } else if (binno == weights[REC_D]) { + depth = 0; /* start over on flat weights */ + ftbl = tfield; + weights = ftbl[0].weights; + } + while (c != EOF) { + keypos = keylist; + nelem = 0; + crec = (RECHEADER *) buffer; + while((c = get(binno, infiles, nfiles, crec, bufend, + ftbl)) == 0) { + *keypos++ = crec->data + depth; + if (++nelem == MAXNUM) { + c = BUFFEND; + break; + } + crec =(RECHEADER *) ((char *) crec + + SALIGN(crec->length) + sizeof(TRECHEADER)); + } + if (c == BUFFEND || ntfiles || mfct) { /* push */ + if (panic >= PANIC) { + fstack[MAXFCT-16+mfct].fp = ftmp(); + if (radixsort((const u_char **)keylist, + nelem, weights, REC_D)) + err(2, NULL); + append(keylist, nelem, depth, fstack[ + MAXFCT-16+mfct].fp, putrec, ftbl); + mfct++; + /* reduce number of open files */ + if (mfct == 16 ||(c == EOF && ntfiles)) { + tmpbuf = malloc(bufend - + crec->data); + memmove(tmpbuf, crec->data, + bufend - crec->data); + fstack[tfiles.top + ntfiles].fp + = ftmp(); + fmerge(0, mstart, mfct, geteasy, + fstack[tfiles.top+ntfiles].fp, + putrec, ftbl); + ++ntfiles; + mfct = 0; + memmove(crec->data, tmpbuf, + bufend - crec->data); + free(tmpbuf); + } + } else { + fstack[tfiles.top + ntfiles].fp= ftmp(); + onepass(keylist, depth, nelem, sizes, + weights, fstack[tfiles.top+ntfiles].fp); + ++ntfiles; + } + } + } + get = getnext; + if (!ntfiles && !mfct) { /* everything in memory--pop */ + if (nelem > 1 && radixsort((const u_char **)keylist, + nelem, weights, REC_D)) + err(2, NULL); + append(keylist, nelem, depth, outfp, putline, ftbl); + break; /* pop */ + } + if (panic >= PANIC) { + if (!ntfiles) + fmerge(0, mstart, mfct, geteasy, + outfp, putline, ftbl); + else + fmerge(0, tfiles, ntfiles, geteasy, + outfp, putline, ftbl); + break; + + } + total = maxb = lastb = 0; /* find if one bin dominates */ + for (i = 0; i < NBINS; i++) + if (sizes[i]) { + if (sizes[i] > sizes[maxb]) + maxb = i; + lastb = i; + total += sizes[i]; + } + if (sizes[maxb] < max((total / 2) , BUFSIZE)) + maxb = lastb; /* otherwise pop after last bin */ + fstack[tfiles.top].lastb = lastb; + fstack[tfiles.top].maxb = maxb; + + /* start refining next level. */ + get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */ + for (i = 0; i < maxb; i++) { + if (!sizes[i]) /* bin empty; step ahead file offset */ + get(i, tfiles, ntfiles, crec, bufend, 0); + else + fsort(i, depth+1, tfiles, ntfiles, outfp, ftbl); + } + if (lastb != maxb) { + if (prevfp != outfp) + tailfp[panic] = prevfp; + prevfp = ftmp(); + for (i = maxb+1; i <= lastb; i++) + if (!sizes[i]) + get(i, tfiles, ntfiles, crec, bufend,0); + else + fsort(i, depth+1, tfiles, ntfiles, + prevfp, ftbl); + } + + /* sort biggest (or last) bin at this level */ + depth++; + panic++; + binno = maxb; + infiles.top = tfiles.top; /* getnext will free tfiles, */ + nfiles = ntfiles; /* so overwrite them */ + } + if (prevfp != outfp) { + concat(outfp, prevfp); + fclose(prevfp); + } + for (i = panic; i >= 0; --i) + if (tailfp[i]) { + concat(outfp, tailfp[i]); + fclose(tailfp[i]); + } +} + +/* + This is one pass of radix exchange, dumping the bins to disk. + */ +#define swap(a, b, t) t = a, a = b, b = t +void +onepass(a, depth, n, sizes, tr, fp) + u_char **a; + int depth; + long n, sizes[]; + u_char *tr; + FILE *fp; +{ + long tsizes[NBINS+1]; + u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp; + static histo[256]; + int *hp; + register int c; + u_char **an, *t, **aj; + register u_char **ak, *r; + + memset(tsizes, 0, sizeof(tsizes)); + depth += sizeof(TRECHEADER); + an = a + n; + for (ak = a; ak < an; ak++) { + histo[c = tr[**ak]]++; + tsizes[c] += ((RECHEADER *) (*ak -= depth))->length; + } + + bin[0] = a; + bpmax = bin + 256; + tp = top, hp = histo; + for (bp = bin; bp < bpmax; bp++) { + *tp++ = *(bp+1) = *bp + (c = *hp); + *hp++ = 0; + if (c <= 1) + continue; + } + for(aj = a; aj < an; *aj = r, aj = bin[c+1]) + for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;) + swap(*ak, r, t); + + for (ak = a, c = 0; c < 256; c++) { + an = bin[c+1]; + n = an - ak; + tsizes[c] += n * sizeof(TRECHEADER); + /* tell getnext how many elements in this bin, this segment. */ + EWRITE(tsizes+c, sizeof(long), 1, fp); + sizes[c] += tsizes[c]; + for (; ak < an; ++ak) + putrec((RECHEADER *) *ak, fp); + } +} diff --git a/usr.bin/sort/fsort.h b/usr.bin/sort/fsort.h new file mode 100644 index 00000000000..5c7ba7ebea8 --- /dev/null +++ b/usr.bin/sort/fsort.h @@ -0,0 +1,62 @@ +/* $OpenBSD: fsort.h,v 1.1 1997/01/20 19:39:51 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fsort.h 8.1 (Berkeley) 6/6/93 + */ + +#define POW 20 /* exponent for buffer size */ +#define BUFSIZE (1 << POW) +#define MAXNUM (BUFSIZE/10) /* lowish guess at average record size */ +#define BUFFEND (EOF-2) +#define MAXFCT 1000 +#define MAXLLEN ((1 << min(POW-4, 16)) - 14) + +extern u_char **keylist, **l2buf, *buffer, *linebuf; + +/* temp files in the stack have a file descriptor, a largest bin (maxb) + * which becomes the last non-empty bin (lastb) when the actual largest + * bin is smaller than max(half the total file, BUFSIZE) + * Max_o is the offset of maxb so it can be sought after the other bins + * are sorted. +*/ +struct tempfile { + FILE *fp; + u_char maxb; + u_char lastb; + long max_o; +}; +extern struct tempfile fstack[MAXFCT]; diff --git a/usr.bin/sort/init.c b/usr.bin/sort/init.c new file mode 100644 index 00000000000..06724e4f252 --- /dev/null +++ b/usr.bin/sort/init.c @@ -0,0 +1,332 @@ +/* OpenBSD$ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)init.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: init.c,v 1.1 1997/01/20 19:39:52 millert Exp $"; +#endif +#endif /* not lint */ + +#include "sort.h" + +#include <ctype.h> +#include <string.h> + +extern struct coldesc clist[(ND+1)*2]; +extern int ncols; +u_char gweights[NBINS]; + +/* + * clist (list of columns which correspond to one or more icol or tcol) + * is in increasing order of columns. + * Fields are kept in increasing order of fields. + */ + +/* + * keep clist in order--inserts a column in a sorted array + */ +static void +insertcol(field) + struct field *field; +{ + int i; + for (i = 0; i < ncols; i++) + if (field->icol.num <= clist[i].num) + break; + if (field->icol.num != clist[i].num) { + memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->icol.num; + ncols++; + } + if (field->tcol.num && field->tcol.num != field->icol.num) { + for (i = 0; i < ncols; i++) + if (field->tcol.num <= clist[i].num) + break; + if (field->tcol.num != clist[i].num) { + memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->tcol.num; + ncols++; + } + } +} + +/* + * matches fields with the appropriate columns--n^2 but who cares? + */ +void +fldreset(fldtab) + struct field *fldtab; +{ + int i; + fldtab[0].tcol.p = clist+ncols-1; + for (++fldtab; fldtab->icol.num; ++fldtab) { + for (i = 0; fldtab->icol.num != clist[i].num; i++); + fldtab->icol.p = clist + i; + if (!fldtab->tcol.num) + continue; + for (i = 0; fldtab->tcol.num != clist[i].num; i++); + fldtab->tcol.p = clist + i; + } +} + +/* + * interprets a column in a -k field + */ +char * +setcolumn(pos, cur_fld, gflag) + char *pos; + struct field *cur_fld; + int gflag; +{ + struct column *col; + int tmp; + col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol); + pos += sscanf(pos, "%d", &(col->num)); + while (isdigit(*pos)) + pos++; + if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol))) + errx(2, "field numbers must be positive"); + if (*pos == '.') { + if (!col->num) + errx(2, "cannot indent end of line"); + pos += sscanf(++pos, "%d", &(col->indent)); + while (isdigit(*pos)) + pos++; + if (&cur_fld->icol == col) + col->indent--; + if (col->indent < 0) + errx(2, "illegal offset"); + } + if (optval(*pos, cur_fld->tcol.num)) + while ((tmp = optval(*pos, cur_fld->tcol.num))) { + cur_fld->flags |= tmp; + pos++; + } + if (cur_fld->icol.num == 0) + cur_fld->icol.num = 1; + return (pos); +} + +int +setfield(pos, cur_fld, gflag) + char *pos; + struct field *cur_fld; + int gflag; +{ + static int nfields = 0; + int tmp; + char *setcolumn(); + if (++nfields == ND) + errx(2, "too many sort keys. (Limit is %d)", ND-1); + cur_fld->weights = ascii; + cur_fld->mask = alltable; + pos = setcolumn(pos, cur_fld, gflag); + if (*pos == '\0') /* key extends to EOL. */ + cur_fld->tcol.num = 0; + else { + if (*pos != ',') + errx(2, "illegal field descriptor"); + setcolumn((++pos), cur_fld, gflag); + } + if (!cur_fld->flags) + cur_fld->flags = gflag; + tmp = cur_fld->flags; + + /* + * Assign appropriate mask table and weight table. + * If the global weights are reversed, the local field + * must be "re-reversed". + */ + if (((tmp & R) ^ (gflag & R)) && tmp & F) + cur_fld->weights = RFtable; + else if (tmp & F) + cur_fld->weights = Ftable; + else if ((tmp & R) ^ (gflag & R)) + cur_fld->weights = Rascii; + if (tmp & I) + cur_fld->mask = itable; + else if (tmp & D) + cur_fld->mask = dtable; + cur_fld->flags |= (gflag & (BI | BT)); + if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */ + cur_fld->flags &= (D|F|I|N|R|BI); + if (cur_fld->tcol.num && !(!(cur_fld->flags & BI) + && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num + && cur_fld->tcol.indent < cur_fld->icol.indent)) + errx(2, "fields out of order"); + insertcol(cur_fld); + return (cur_fld->tcol.num); +} + +int +optval(desc, tcolflag) + int desc, tcolflag; +{ + switch(desc) { + case 'b': + if (!tcolflag) + return(BI); + else + return(BT); + case 'd': return(D); + case 'f': return(F); + case 'i': return(I); + case 'n': return(N); + case 'r': return(R); + default: return(0); + } +} + +void +fixit(argc, argv) + int *argc; + char **argv; +{ + int i, j, v, w, x; + static char vbuf[ND*20], *vpos, *tpos; + vpos = vbuf; + + for (i = 1; i < *argc; i++) { + if (argv[i][0] == '+') { + tpos = argv[i]+1; + argv[i] = vpos; + vpos += sprintf(vpos, "-k"); + tpos += sscanf(tpos, "%d", &v); + while (isdigit(*tpos)) + tpos++; + vpos += sprintf(vpos, "%d", v+1); + if (*tpos == '.') { + tpos += sscanf(++tpos, "%d", &x); + vpos += sprintf(vpos, ".%d", x+1); + } + while (*tpos) + *vpos++ = *tpos++; + vpos += sprintf(vpos, ","); + if (argv[i+1] && + argv[i+1][0] == '-' && isdigit(argv[i+1][1])) { + tpos = argv[i+1] + 1; + tpos += sscanf(tpos, "%d", &w); + while (isdigit(*tpos)) + tpos++; + x = 0; + if (*tpos == '.') { + tpos += sscanf(++tpos, "%d", &x); + while (isdigit(*tpos)) + tpos++; + } + if (x) { + vpos += sprintf(vpos, "%d", w+1); + vpos += sprintf(vpos, ".%d", x); + } else + vpos += sprintf(vpos, "%d", w); + while (*tpos) + *vpos++ = *tpos++; + for (j= i+1; j < *argc; j++) + argv[j] = argv[j+1]; + *argc -= 1; + } + } + } +} + +/* + * ascii, Rascii, Ftable, and RFtable map + * REC_D -> REC_D; {not REC_D} -> {not REC_D}. + * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}. + * Note: when sorting in forward order, to encode character zero in a key, + * use \001\001; character 1 becomes \001\002. In this case, character 0 + * is reserved for the field delimiter. Analagously for -r (fld_d = 255). + * Note: this is only good for ASCII sorting. For different LC 's, + * all bets are off. See also num_init in number.c + */ +void +settables(gflags) + int gflags; +{ + u_char *wts; + int i, incr; + for (i=0; i < 256; i++) { + ascii[i] = i; + if (i > REC_D && i < 255 - REC_D+1) + Rascii[i] = 255 - i + 1; + else + Rascii[i] = 255 - i; + if (islower(i)) { + Ftable[i] = Ftable[i- ('a' -'A')]; + RFtable[i] = RFtable[i - ('a' - 'A')]; + } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) { + Ftable[i] = i + 1; + RFtable[i] = Rascii[i] - 1; + } else { + Ftable[i] = i; + RFtable[i] = Rascii[i]; + } + alltable[i] = 1; + if (i == '\n' || isprint(i)) + itable[i] = 1; + else itable[i] = 0; + if (i == '\n' || i == '\t' || i == ' ' || isalnum(i)) + dtable[i] = 1; + else dtable[i] = 0; + } + Rascii[REC_D] = RFtable[REC_D] = REC_D; + if (REC_D >= 'A' && REC_D < 'Z') + ++Ftable[REC_D + ('a' - 'A')]; + if (gflags & R && (!(gflags & F) || !SINGL_FLD)) + wts = Rascii; + else if (!(gflags & F) || !SINGL_FLD) + wts = ascii; + else if (gflags & R) + wts = RFtable; + else + wts = Ftable; + memmove(gweights, wts, sizeof(gweights)); + incr = (gflags & R) ? -1 : 1; + for (i = 0; i < REC_D; i++) + gweights[i] += incr; + gweights[REC_D] = ((gflags & R) ? 255 : 0); + if (SINGL_FLD && gflags & F) { + for (i = 0; i < REC_D; i++) { + ascii[i] += incr; + Rascii[i] += incr; + } + ascii[REC_D] = Rascii[REC_D] = gweights[REC_D]; + } +} diff --git a/usr.bin/sort/msort.c b/usr.bin/sort/msort.c new file mode 100644 index 00000000000..09081480e67 --- /dev/null +++ b/usr.bin/sort/msort.c @@ -0,0 +1,310 @@ +/* $OpenBSD: msort.c,v 1.1 1997/01/20 19:39:53 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: msort.c,v 1.1 1997/01/20 19:39:53 millert Exp $"; +#endif +#endif /* not lint */ + +#include "sort.h" +#include "fsort.h" + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Subroutines using comparisons: merge sort and check order */ +#define DELETE (1) +#define LALIGN(n) ((n+3) & ~3) + +typedef struct mfile { + u_char *end; + short flno; + struct recheader rec[1]; +} MFILE; +typedef struct tmfile { + u_char *end; + short flno; + struct trecheader rec[1]; +} TMFILE; +u_char *wts, *wts1 = 0; +struct mfile *cfilebuf; + +static int cmp __P((struct recheader *, struct recheader *)); +static int insert __P((struct mfile **, struct mfile **, int, int)); + +void +fmerge(binno, files, nfiles, get, outfp, fput, ftbl) + union f_handle files; + int binno, nfiles; + int (*get)(); + FILE *outfp; + void (*fput)(); + struct field *ftbl; +{ + FILE *tout; + int i, j, last; + void (*put)(struct recheader *, FILE *); + extern int geteasy(); + struct tempfile *l_fstack; + + wts = ftbl->weights; + if (!UNIQUE && SINGL_FLD && ftbl->flags & F) + wts1 = (ftbl->flags & R) ? Rascii : ascii; + if (!cfilebuf) + cfilebuf = malloc(MAXLLEN + sizeof(TMFILE)); + + i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE)); + if (!buffer || i > BUFSIZE) { + buffer = buffer ? realloc(buffer, i) : malloc(i); + if (!buffer) + err(2, NULL); + if (!SINGL_FLD) + linebuf = malloc(MAXLLEN); + } + + if (binno >= 0) + l_fstack = fstack + files.top; + else + l_fstack = fstack; + while (nfiles) { + put = putrec; + for (j = 0; j < nfiles; j += 16) { + if (nfiles <= 16) { + tout = outfp; + put = fput; + } + else + tout = ftmp(); + last = min(16, nfiles - j); + if (binno < 0) { + for (i = 0; i < last; i++) + if (!(l_fstack[i+MAXFCT-1-16].fp = + fopen(files.names[j + i], "r"))) + err(2, "%s", files.names[j+i]); + merge(MAXFCT-1-16, last, get, tout, put, ftbl); + } + else { + for (i = 0; i< last; i++) + rewind(l_fstack[i+j].fp); + merge(files.top+j, last, get, tout, put, ftbl); + } + if (nfiles > 16) l_fstack[j/16].fp = tout; + } + nfiles = (nfiles + 15) / 16; + if (nfiles == 1) + nfiles = 0; + if (binno < 0) { + binno = 0; + get = geteasy; + files.top = 0; + } + } +} + +void +merge(infl0, nfiles, get, outfp, put, ftbl) + int infl0, nfiles; + int (*get)(); + void (*put)(struct recheader *, FILE *); + FILE *outfp; + struct field *ftbl; +{ + int c, i, j; + union f_handle dummy = {0}; + struct mfile *flist[16], *cfile; + for (i = j = 0; i < nfiles; i++) { + cfile = (MFILE *) (buffer + + i * LALIGN(MAXLLEN + sizeof(TMFILE))); + cfile->flno = j + infl0; + cfile->end = cfile->rec->data + MAXLLEN; + for (c = 1; c == 1;) { + if (EOF == (c = get(j+infl0, dummy, nfiles, + cfile->rec, cfile->end, ftbl))) { + --i; + --nfiles; + break; + } + if (i) + c = insert(flist, &cfile, i, !DELETE); + else + flist[0] = cfile; + } + j++; + } + cfile = cfilebuf; + cfile->flno = flist[0]->flno; + cfile->end = cfile->rec->data + MAXLLEN; + while (nfiles) { + for (c = 1; c == 1;) { + if (EOF == (c = get(cfile->flno, dummy, nfiles, + cfile->rec, cfile->end, ftbl))) { + put(flist[0]->rec, outfp); + memmove(flist, flist + 1, + sizeof(MFILE *) * (--nfiles)); + cfile->flno = flist[0]->flno; + break; + } + if (!(c = insert(flist, &cfile, nfiles, DELETE))) + put(cfile->rec, outfp); + } + } +} + +/* + * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec; + * otherwise just inserts *rec in flist. +*/ +static int +insert(flist, rec, ttop, delete) + struct mfile **flist, **rec; + int delete, ttop; /* delete = 0 or 1 */ +{ + register struct mfile *tmprec; + register int top, mid, bot = 0, cmpv = 1; + tmprec = *rec; + top = ttop; + for (mid = top/2; bot +1 != top; mid = (bot+top)/2) { + cmpv = cmp(tmprec->rec, flist[mid]->rec); + if (cmpv < 0) + top = mid; + else if (cmpv > 0) + bot = mid; + else { + if (!UNIQUE) + bot = mid - 1; + break; + } + } + if (delete) { + if (UNIQUE) { + if (!bot && cmpv) + cmpv = cmp(tmprec->rec, flist[0]->rec); + if (!cmpv) + return(1); + } + tmprec = flist[0]; + if (bot) + memmove(flist, flist+1, bot * sizeof(MFILE **)); + flist[bot] = *rec; + *rec = tmprec; + (*rec)->flno = (*flist)->flno; + return (0); + } + else { + if (!bot && !(UNIQUE && !cmpv)) { + cmpv = cmp(tmprec->rec, flist[0]->rec); + if (cmpv < 0) + bot = -1; + } + if (UNIQUE && !cmpv) + return (1); + bot++; + memmove(flist + bot+1, flist + bot, + (ttop - bot) * sizeof(MFILE **)); + flist[bot] = *rec; + return (0); + } +} + +/* + * check order on one file + */ +void +order(infile, get, ftbl) + union f_handle infile; + int (*get)(); + struct field *ftbl; +{ + u_char *end; + int c; + struct recheader *crec, *prec, *trec; + + if (!SINGL_FLD) + linebuf = malloc(MAXLLEN); + buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER))); + end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER)); + crec = (RECHEADER *) buffer; + prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER)); + wts = ftbl->weights; + if (SINGL_FLD && ftbl->flags & F) + wts1 = ftbl->flags & R ? Rascii : ascii; + else + wts1 = 0; + if (0 == get(-1, infile, 1, prec, end, ftbl)) + while (0 == get(-1, infile, 1, crec, end, ftbl)) { + if (0 < (c = cmp(prec, crec))) { + crec->data[crec->length-1] = 0; + errx(1, "found disorder: %s", crec->data+crec->offset); + } + if (UNIQUE && !c) { + crec->data[crec->length-1] = 0; + errx(1, "found non-uniqueness: %s", + crec->data+crec->offset); + } + trec = prec; + prec = crec; + crec = trec; + } + exit(0); +} + +static int +cmp(rec1, rec2) + struct recheader *rec1, *rec2; +{ + register r; + register u_char *pos1, *pos2, *end; + register u_char *cwts; + for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) { + pos1 = rec1->data; + pos2 = rec2->data; + if (!SINGL_FLD && UNIQUE) + end = pos1 + min(rec1->offset, rec2->offset); + else + end = pos1 + min(rec1->length, rec2->length); + for (; pos1 < end; ) { + if ((r = cwts[*pos1++] - cwts[*pos2++])) + return (r); + } + } + return (0); +} diff --git a/usr.bin/sort/pathnames.h b/usr.bin/sort/pathnames.h new file mode 100644 index 00000000000..1a2ae6b6c56 --- /dev/null +++ b/usr.bin/sort/pathnames.h @@ -0,0 +1,41 @@ +/* $OpenBSD: pathnames.h,v 1.1 1997/01/20 19:39:54 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pathnames.h 8.1 (Berkeley) 6/6/93 + */ + +#define _PATH_STDIN "/dev/stdin" diff --git a/usr.bin/sort/sort.1 b/usr.bin/sort/sort.1 new file mode 100644 index 00000000000..07d6a935c34 --- /dev/null +++ b/usr.bin/sort/sort.1 @@ -0,0 +1,395 @@ +.\" $OpenBSD: sort.1,v 1.1 1997/01/20 19:39:55 millert Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sort.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd June 6, 1993 +.Dt SORT 1 +.Os +.Sh NAME +.Nm sort +.Nd sort or merge text files +.Sh SYNOPSIS +.Nm sort +.Op Fl cmubdfinr +.Op Fl t Ar char +.Op Fl R Ar char +.Oo +.Cm Fl k Ar field1[,field2] +.Oc +.Ar ... +.Op Fl T Ar dir +.Op Fl o Ar output +.Op Ar file +.Ar ... +.Sh DESCRIPTION +The +.Nm sort +utility +sorts text files by lines. +Comparisons are based on one or more sort keys extracted +from each line of input, and are performed +lexicographically. By default, if keys are not given, +.Nm sort +regards each input line as a single field. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl c +Check that the single input file is sorted. +If the file is not sorted, +.Nm sort +produces the appropriate error messages and exits with code 1; +otherwise, +.Nm sort +returns 0. +.Nm Sort +.Fl c +produces no output. +.It Fl m +Merge only; the input files are assumed to be pre-sorted. +.It Fl o Ar output +The argument given is the name of an +.Ar output +file to +be used instead of the standard output. +This file +can be the same as one of the input files. +.It Fl T Ar dir +Use +.Ar dir +as the directory for temporary files. The default is the contents +of the environment variable +.Ev TMPDIR +or +.Pa /var/tmp +if +.Ev TMPDIR +does not exist. +.It Fl u +Unique: suppress all but one in each set of lines +having equal keys. +If used with the +.Fl c +option, +check that there are no lines with duplicate keys. +.El +.Pp +The following options override the default ordering rules. +When ordering options appear independent of key field +specifications, the requested field ordering rules are +applied globally to all sort keys. +When attached to a specific key (see +.Fl k ) , +the ordering options override +all global ordering options for that key. +.Bl -tag -width indent +.It Fl d +Only blank space and alphanumeric characters +.\" according +.\" to the current setting of LC_CTYPE +are used +in making comparisons. +.It Fl f +Considers all lowercase characters that have uppercase +equivalents to be the same for purposes of +comparison. +.It Fl i +Ignore all non-printable characters. +.It Fl n +An initial numeric string, consisting of optional +blank space, optional minus sign, and zero or more +digits (including decimal point) +.\" with +.\" optional radix character and thousands +.\" separator +.\" (as defined in the current locale), +is sorted by arithmetic value. +(The +.Fl n +option no longer implies +the +.Fl b +option.) +.It Fl r +Reverse the sense of comparisons. +.It Fl H +Use a merge sort instead of a radix sort. This option should be +used for files larger than 60Mb. +.El +.Pp +The treatment of field separators can be altered using the +options: +.Bl -tag -width indent +.It Fl b +Ignores leading blank space when determining the start +and end of a restricted sort key. +A +.Fl b +option specified before the first +.Fl k +option applies globally to all +.Fl k +options. +Otherwise, the +.Fl b +option can be +attached independently to each +.Ar field +argument of the +.Fl k +option (see below). +Note that the +.Fl b +option +has no effect unless key fields are specified. +.It Fl t Ar char +.Ar Char +is used as the field separator character. The initial +.Ar char +is not considered to be part of a field when determining +key offsets (see below). +Each occurrence of +.Ar char +is significant (for example, +.Dq Ar charchar +delimits an empty field). +If +.Fl t +is not specified, +blank space characters are used as default field +separators. +.It Fl R Ar char +.Ar Char +is used as the record separator character. +This should be used with discretion; +.Fl R Ar <alphanumeric> +usually produces undesirable results. +The default line separator is newline. +.It Fl k Ar field1[,field2] +Designates the starting position, +.Ar field1 , +and optional ending position, +.Ar field2 , +of a key field. +The +.Fl k +option replaces the obsolescent options +.Cm \(pl Ns Ar pos1 +and +.Fl Ns Ar pos2 . +.El +.Pp +The following operands are available: +.Bl -tag -width indent +.Ar file +The pathname of a file to be sorted, merged, or checked. +If no file +operands are specified, or if +a file operand is +.Fl , +the standard input is used. +.Pp +A field is +defined as a minimal sequence of characters followed by a +field separator or a newline character. +By default, the first +blank space of a sequence of blank spaces acts as the field separator. +All blank spaces in a sequence of blank spaces are considered +as part of the next field; for example, all blank spaces at +the beginning of a line are considered to be part of the +first field. +.Pp +Fields are specified +by the +.Fl k Ar field1[,field2] +argument. A missing +.Ar field2 +argument defaults to the end of a line. +.Pp +The arguments +.Ar field1 +and +.Ar field2 +have the form +.Em m.n +followed by one or more of the options +.Fl b , d , f , i , +.Fl n , r . +A +.Ar field1 +position specified by +.Em m.n +.Em (m,n > 0) +is interpreted as the +.Em n Ns th +character in the +.Em m Ns th +field. +A missing +.Em \&.n +in +.Ar field1 +means +.Ql \&.1 , +indicating the first character of the +.Em m Ns th +field; +If the +.Fl b +option is in effect, +.Em n +is counted from the first +non-blank character in the +.Em m Ns th +field; +.Em m Ns \&.1b +refers to the first +non-blank character in the +.Em m Ns th +field. +.Pp +A +.Ar field2 +position specified by +.Em m.n +is interpreted as +the +.Em n Ns th +character (including separators) of the +.Em m Ns th +field. +A missing +.Em \&.n +indicates the last character of the +.Em m Ns th +field; +.Em m += \&0 +designates the end of a line. +Thus the option +.Fl k Ar v.x,w.y +is synonymous with the obsolescent option +.Cm \(pl Ns Ar v-\&1.x-\&1 +.Fl Ns Ar w-\&1.y ; +when +.Em y +is omitted, +.Fl k Ar v.x,w +is synonymous with +.Cm \(pl Ns Ar v-\&1.x-\&1 +.Fl Ns Ar w+1.0 . +The obsolescent +.Cm \(pl Ns Ar pos1 +.Fl Ns Ar pos2 +option is still supported, except for +.Fl Ns Ar w\&.0b, +which has no +.Fl k +equivalent. +.Sh ENVIRONMENT +If the following environment variable exists, it is utilized by +.Nm sort . +.Bl -tag -width Fl +.It Ev TMPDIR +.Nm Sort +uses the contents of the +.Ev TMPDIR +environment variable as the path in which to store +temporary files. Note that +.Ev TMPDIR +may be overridden by the +.Fl T +option. +.Sh FILES +.Bl -tag -width Pa -compact +.It Pa /var/tmp/sort.* +Default temporary directories. +.It Pa Ar output Ns #PID +Temporary name for +.Ar output +if +.Ar output +already exists. +.El +.Sh SEE ALSO +.Xr comm 1 , +.Xr uniq 1 , +.Xr join 1 +.Sh RETURN VALUES +Sort exits with one of the following values: +.Bl -tag -width flag -compact +.It Pa 0: +normal behavior. +.It Pa 1: +on disorder (or non-uniqueness) with the +.Fl c +option +.It Pa 2: +an error occurred. +.Sh BUGS +Lines longer than 65522 characters are discarded and processing continues. +To sort files larger than 60Mb, use +.Nm sort +.Fl H ; +files larger than 704Mb must be sorted in smaller pieces, then merged. +To protect data +.Nm sort +.Fl o +calls link and unlink, and thus fails in protected directories. +.Sh HISTORY +A +.Nm sort +command appeared in +.At v6 . +.Sh NOTES +The current sort command uses lexicographic radix sorting, which requires +that sort keys be kept in memory (as opposed to previous versions which used quick +and merge sorts and did not.) +Thus performance depends highly on efficient choice of sort keys, and the +.Fl b +option and the +.Ar field2 +argument of the +.Fl k +option should be used whenever possible. +Similarly, +.Nm sort +.Fl k1f +is equivalent to +.Nm sort +.Fl f +and may take twice as long. diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c new file mode 100644 index 00000000000..90a21fedd50 --- /dev/null +++ b/usr.bin/sort/sort.c @@ -0,0 +1,297 @@ +/* $OpenBSD: sort.c,v 1.1 1997/01/20 19:39:55 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)sort.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: sort.c,v 1.1 1997/01/20 19:39:55 millert Exp $"; +#endif +#endif /* not lint */ + +/* Sort sorts a file using an optional user-defined key. + * Sort uses radix sort for internal sorting, and allows + * a choice of merge sort and radix sort for external sorting. + */ + +#include "sort.h" +#include "fsort.h" +#include "pathnames.h" + +#include <paths.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +int REC_D = '\n'; +u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ +/* + * weight tables. Gweights is one of ascii, Rascii.. + * modified to weight rec_d = 0 (or 255) + */ +extern u_char gweights[NBINS]; +u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; +/* + * masks of ignored characters. Alltable is 256 ones + */ +u_char dtable[NBINS], itable[NBINS], alltable[NBINS]; +int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; +struct coldesc clist[(ND+1)*2]; +int ncols = 0; +extern struct coldesc clist[(ND+1)*2]; +extern int ncols; + +char devstdin[] = _PATH_STDIN; +char toutpath[_POSIX_PATH_MAX]; +char *tmpdir = _PATH_VARTMP; + +static void cleanup __P((void)); +static void onsig __P((int)); +static void usage __P((char *)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern int optind; + extern char *optarg; + int (*get)(); + int ch, i, stdinflag = 0, tmp = 0; + char cflag = 0, mflag = 0, nflag = 0; + char *outfile, *outpath = 0; + struct field fldtab[ND+2], *ftpos; + union f_handle filelist; + FILE *outfp = NULL; + memset(fldtab, 0, (ND+2)*sizeof(struct field)); + memset(d_mask, 0, NBINS); + d_mask[REC_D = '\n'] = REC_D_F; + SINGL_FLD = SEP_FLAG = 0; + d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; + ftpos = fldtab; + fixit(&argc, argv); + if (!issetugid() && (outfile = getenv("TMPDIR"))) + tmpdir = outfile; + while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:t:T:uy:")) != -1) { + switch (ch) { + case 'b': fldtab->flags |= BI | BT; + break; + case 'd': + case 'i': + case 'f': + case 'r': tmp |= optval(ch, 0); + if (tmp & R && tmp & F) + fldtab->weights = RFtable; + else if (tmp & F) + fldtab->weights = Ftable; + else if(tmp & R) + fldtab->weights = Rascii; + fldtab->flags |= tmp; + break; + case 'o': + outpath = optarg; + break; + case 'n': + nflag = 1; + setfield("1n", ++ftpos, fldtab->flags&(~R)); + break; + case 'k': + setfield(optarg, ++ftpos, fldtab->flags); + break; + case 't': + if (SEP_FLAG) + usage("multiple field delimiters"); + SEP_FLAG = 1; + d_mask[' '] &= ~FLD_D; + d_mask['\t'] &= ~FLD_D; + d_mask[(int)*optarg] |= FLD_D; + if (d_mask[(int)*optarg] & REC_D_F) + err(2, "record/field delimiter clash"); + break; + case 'R': + if (REC_D != '\n') + usage("multiple record delimiters"); + if ('\n' == (REC_D = *optarg)) + break; + d_mask['\n'] = d_mask[' ']; + d_mask[REC_D] = REC_D_F; + break; + case 'T': + tmpdir = optarg; + break; + case 'u': + UNIQUE = 1; + break; + case 'c': + cflag = 1; + break; + case 'm': + mflag = 1; + break; + case 'H': + PANIC = 0; + break; + case 'y': + /* accept -y for backwards compat. */ + break; + case '?': + default: usage(""); + } + } + if (cflag && argc > optind+1) + errx(2, "too many input files for -c option"); + if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { + outpath = argv[argc-1]; + argc -= 2; + } + if (mflag && argc - optind > (MAXFCT - (16+1))*16) + errx(2, "too many input files for -m option"); + for (i = optind; i < argc; i++) { + /* allow one occurrence of /dev/stdin */ + if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) { + if (stdinflag) + warnx("ignoring extra \"%s\" in file list", + argv[i]); + else { + stdinflag = 1; + argv[i] = devstdin; + } + } else if ((ch = access(argv[i], R_OK))) + err(2, "%s", argv[i]); + } + if (!(fldtab->flags & (I|D) || fldtab[1].icol.num)) { + SINGL_FLD = 1; + fldtab[0].icol.num = 1; + } else { + if (!fldtab[1].icol.num) { + fldtab[0].flags &= ~(BI|BT); + setfield("1", ++ftpos, fldtab->flags); + } + if (nflag) + fldtab[1].flags |= fldtab->flags; + fldreset(fldtab); + fldtab[0].flags &= ~F; + } + settables(fldtab[0].flags); + num_init(); + fldtab->weights = gweights; + if (optind == argc) + argv[--optind] = devstdin; + filelist.names = argv+optind; + if (SINGL_FLD) + get = makeline; + else + get = makekey; + if (cflag) { + order(filelist, get, fldtab); + /* NOT REACHED */ + } + if (!outpath) { + (void)snprintf(toutpath, + sizeof(toutpath), "%sstdout", _PATH_DEV); + outfile = outpath = toutpath; + } else if (!(ch = access(outpath, 0)) && + strncmp(_PATH_DEV, outpath, 5)) { + struct sigaction act = {0, SIG_BLOCK, 6}; + int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, + SIGVTALRM, SIGPROF, 0}; + int outfd; + errno = 0; + if (access(outpath, W_OK)) + err(2, "%s", outpath); + act.sa_handler = onsig; + (void)snprintf(toutpath, sizeof(toutpath), "%sXXXX", outpath); + if ((outfd = mkstemp(toutpath)) < 0 || + (outfp = fdopen(outfd, "w")) == 0) + err(2, toutpath); + outfile = toutpath; + (void)atexit(cleanup); + for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ + sigaction(sigtable[i], &act, 0); + } else + outfile = outpath; + if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) + err(2, outfile); + if (mflag) + fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab); + else + fsort(-1, 0, filelist, argc-optind, outfp, fldtab); + if (outfile != outpath) { + if (access(outfile, 0)) + err(2, "%s", outfile); + (void)unlink(outpath); + if (link(outfile, outpath)) + err(2, "cannot link %s: output left in %s", + outpath, outfile); + (void)unlink(outfile); + } + exit(0); +} + +static void +onsig(s) + int s; +{ + cleanup(); + exit(2); /* return 2 on error/interrupt */ +} + +static void +cleanup() +{ + if (toutpath[0]) + (void)unlink(toutpath); +} + +static void +usage(msg) + char *msg; +{ + if (msg) + (void)fprintf(stderr, "sort: %s\n", msg); + (void)fprintf(stderr, "usage: [-o output] [-cmubdfinr] [-t char] "); + (void)fprintf(stderr, "[-T char] [-k keydef] ... [files]\n"); + exit(2); +} diff --git a/usr.bin/sort/sort.h b/usr.bin/sort/sort.h new file mode 100644 index 00000000000..1aa7ee9ea7d --- /dev/null +++ b/usr.bin/sort/sort.h @@ -0,0 +1,145 @@ +/* $OpenBSD: sort.h,v 1.1 1997/01/20 19:39:56 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sort.h 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/param.h> + +#include <db.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#define NBINS 256 +#define MAXMERGE 16 + +/* values for masks, weights, and other flags. */ +#define I 1 /* mask out non-printable characters */ +#define D 2 /* sort alphanumeric characters only */ +#define N 4 /* Field is a number */ +#define F 8 /* weight lower and upper case the same */ +#define R 16 /* Field is reversed with respect to the global weight */ +#define BI 32 /* ignore blanks in icol */ +#define BT 64 /* ignore blanks in tcol */ + +/* masks for delimiters: blanks, fields, and termination. */ +#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */ +#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */ +#define REC_D_F 4 /* '\n' default; from -T otherwise */ + +#define ND 10 /* limit on number of -k options. */ + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + +#define FCLOSE(file) { \ + if (EOF == fclose(file)) \ + err(2, "fclose"); \ +} + +#define EWRITE(ptr, size, n, f) { \ + if (!fwrite(ptr, size, n, f)) \ + err(2, NULL); \ +} + +/* length of record is currently limited to 2^16 - 1 */ +typedef u_short length_t; + +#define SALIGN(n) ((n+1) & ~1) + +/* a record is a key/line pair starting at rec.data. It has a total length + * and an offset to the start of the line half of the pair. + */ +typedef struct recheader { + length_t length; + length_t offset; + u_char data[1]; +} RECHEADER; + +typedef struct trecheader { + length_t length; + length_t offset; +} TRECHEADER; + +/* This is the column as seen by struct field. It is used by enterfield. + * They are matched with corresponding coldescs during initialization. + */ +struct column { + struct coldesc *p; + int num; + int indent; +}; + +/* a coldesc has a number and pointers to the beginning and end of the + * corresponding column in the current line. This is determined in enterkey. + */ +typedef struct coldesc { + u_char *start; + u_char *end; + int num; +} COLDESC; + +/* A field has an initial and final column; an omitted final column + * implies the end of the line. Flags regulate omission of blanks and + * numerical sorts; mask determines which characters are ignored (from -i, -d); + * weights determines the sort weights of a character (from -f, -r). + */ +struct field { + struct column icol; + struct column tcol; + u_int flags; + u_char *mask; + u_char *weights; +}; + +union f_handle { + int top; + char **names; +}; +extern int PANIC; /* maximum depth of fsort before fmerge is called */ +extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; +extern u_char alltable[NBINS], dtable[NBINS], itable[NBINS]; +extern u_char d_mask[NBINS]; +extern int SINGL_FLD, SEP_FLAG, UNIQUE; +extern int REC_D; +extern char *tmpdir; + +#include "extern.h" diff --git a/usr.bin/sort/tmp.c b/usr.bin/sort/tmp.c new file mode 100644 index 00000000000..a5ca2347f71 --- /dev/null +++ b/usr.bin/sort/tmp.c @@ -0,0 +1,85 @@ +/* $OpenBSD: tmp.c,v 1.1 1997/01/20 19:39:57 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)tmp.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: tmp.c,v 1.1 1997/01/20 19:39:57 millert Exp $"; +#endif +#endif /* not lint */ + +#include <sys/param.h> + +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "sort.h" +#include "pathnames.h" + +#define _NAME_TMP "sort.XXXXXXXX" + +FILE * +ftmp() +{ + sigset_t set, oset; + FILE *fp; + int fd; + char pathb[_POSIX_PATH_MAX], *path; + + path = pathb; + (void)snprintf(path, sizeof(pathb), "%s%s%s", tmpdir, + (tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP); + + sigfillset(&set); + (void)sigprocmask(SIG_BLOCK, &set, &oset); + if ((fd = mkstemp(path)) < 0) + err(2, path); + if (!(fp = fdopen(fd, "w+"))) + err(2, path); + (void)unlink(path); + + (void)sigprocmask(SIG_SETMASK, &oset, NULL); + return (fp); +} |