diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 1997-01-20 19:39:58 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 1997-01-20 19:39:58 +0000 |
commit | 3c0b175fc3d554c29f4f3b775ddcb1446094450d (patch) | |
tree | 320261622aef8879e9311e903428351e55581905 /usr.bin/sort/files.c | |
parent | 18c3bcb7a6fea4c4a332bf3f4b9121038ceaa362 (diff) |
sort(1) from 4.4BSD-lite2 with minor tweaks and bug fixes by me.
Diffstat (limited to 'usr.bin/sort/files.c')
-rw-r--r-- | usr.bin/sort/files.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/usr.bin/sort/files.c b/usr.bin/sort/files.c new file mode 100644 index 00000000000..f345877e48e --- /dev/null +++ b/usr.bin/sort/files.c @@ -0,0 +1,344 @@ +/* $OpenBSD: files.c,v 1.1 1997/01/20 19:39:50 millert Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)files.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: files.c,v 1.1 1997/01/20 19:39:50 millert Exp $"; +#endif +#endif /* not lint */ + +#include "sort.h" +#include "fsort.h" + +#include <string.h> + +/* + * this is the subroutine for file management for fsort(). + * It keeps the buffers for all temporary files. + */ +int +getnext(binno, infl0, nfiles, pos, end, dummy) + int binno, nfiles; + union f_handle infl0; + register struct recheader *pos; + register u_char *end; + struct field *dummy; +{ + register int i; + register u_char *hp; + static long nleft = 0; + static int cnt = 0, flag = -1; + static u_char maxb = 0; + static FILE *fp; + + if (nleft == 0) { + if (binno < 0) /* reset files. */ { + for (i = 0; i < nfiles; i++) { + rewind(fstack[infl0.top + i].fp); + fstack[infl0.top + i].max_o = 0; + } + flag = -1; + nleft = cnt = 0; + return(-1); + } + maxb = fstack[infl0.top].maxb; + for (; nleft == 0; cnt++) { + if (cnt >= nfiles) { + cnt = 0; + return (EOF); + } + fp = fstack[infl0.top + cnt].fp; + hp = (u_char *) &nleft; + for (i = sizeof(TRECHEADER); i; --i) + *hp++ = getc(fp); + if (binno < maxb) + fstack[infl0.top+cnt].max_o + += sizeof(nleft) + nleft; + else if (binno == maxb) { + if (binno != fstack[infl0.top].lastb) { + fseek(fp, fstack[infl0.top+ + cnt].max_o, SEEK_SET); + fread(&nleft, sizeof(nleft), 1, fp); + } + if (nleft == 0) + fclose(fp); + } else if (binno == maxb + 1) { /* skip a bin */ + fseek(fp, nleft, SEEK_CUR); + fread(&nleft, sizeof(nleft), 1, fp); + flag = cnt; + } + } + } + if ((u_char *) pos > end - sizeof(TRECHEADER)) + return (BUFFEND); + hp = (u_char *) pos; + for (i = sizeof(TRECHEADER); i ; --i) + *hp++ = (u_char) getc(fp); + if (end - pos->data < pos->length) { + for (i = sizeof(TRECHEADER); i ; i--) + ungetc(*--hp, fp); + return (BUFFEND); + } + fread(pos->data, pos->length, 1, fp); + nleft -= pos->length + sizeof(TRECHEADER); + if (nleft == 0 && binno == fstack[infl0.top].maxb) + fclose(fp); + return (0); +} + +/* + * this is called when there is no special key. It's only called + * in the first fsort pass. + */ +int +makeline(flno, filelist, nfiles, buffer, bufend, dummy2) + int flno, nfiles; + union f_handle filelist; + struct recheader *buffer; + u_char *bufend; + struct field *dummy2; +{ + static char *opos; + register char *end, *pos; + static int fileno = 0, overflow = 0; + static FILE *fp = 0; + register int c; + + pos = (char *) buffer->data; + end = min((char *) bufend, pos + MAXLLEN); + if (overflow) { + memmove(pos, opos, bufend - (u_char *) opos); + pos += ((char *) bufend - opos); + overflow = 0; + } + for (;;) { + if (flno >= 0) { + if (!(fp = fstack[flno].fp)) + return (EOF); + } else if (!fp) { + if (fileno >= nfiles) return(EOF); + if (!(fp = fopen(filelist.names[fileno], "r"))) + err(2, "%s", filelist.names[fileno]); + ++fileno; + } + while ((pos < end) && ((c = getc(fp)) != EOF)) { + if ((*pos++ = c) == REC_D) { + buffer->offset = 0; + buffer->length = pos - (char *) buffer->data; + return (0); + } + } + if (pos >= end && end == (char *) bufend) { + if ((char *) buffer->data < end) { + overflow = 1; + opos = (char *) buffer->data; + } + return (BUFFEND); + } else if (c == EOF) { + if (buffer->data != (u_char *) pos) { + warnx("last character not record delimiter"); + *pos++ = REC_D; + buffer->offset = 0; + buffer->length = pos - (char *) buffer->data; + return(0); + } + FCLOSE(fp); + fp = 0; + if(flno >= 0) fstack[flno].fp = 0; + } else { + buffer->data[100] = '\000'; + warnx("line too long:ignoring %s...", buffer->data); + } + } +} + +/* + * This generates keys. It's only called in the first fsort pass + */ +int +makekey(flno, filelist, nfiles, buffer, bufend, ftbl) + int flno, nfiles; + union f_handle filelist; + struct recheader *buffer; + u_char *bufend; + struct field *ftbl; +{ + static int (*get)(); + static int fileno = 0; + static FILE *dbdesc = 0; + static DBT dbkey[1], line[1]; + static int overflow = 0; + int c; + if (overflow) { + overflow = 0; + enterkey(buffer, line, bufend - (u_char *) buffer, ftbl); + return (0); + } + for (;;) { + if (flno >= 0) { + get = seq; + if (!(dbdesc = fstack[flno].fp)) + return(EOF); + } else if (!dbdesc) { + if (fileno >= nfiles) + return (EOF); + dbdesc = fopen(filelist.names[fileno], "r"); + if (!dbdesc) + err(2, "%s", filelist.names[fileno]); + ++fileno; + get = seq; + } + if (!(c = get(dbdesc, line, dbkey))) { + if ((signed)line->size > bufend - buffer->data) + overflow = 1; + else + overflow = enterkey(buffer, line, + bufend - (u_char *) buffer, ftbl); + if (overflow) + return (BUFFEND); + else + return (0); + } + if (c == EOF) { + FCLOSE(dbdesc); + dbdesc = 0; + if (flno >= 0) fstack[flno].fp = 0; + } else { + + ((char *) line->data)[60] = '\000'; + warnx("line too long: ignoring %.100s...", + (char *)line->data); + } + + } +} + +/* + * get a key/line pair from fp + */ +int +seq(fp, line, key) + FILE *fp; + DBT *key, *line; +{ + static char *buf, flag = 1; + register char *end, *pos; + register int c; + if (flag) { + flag = 0; + buf = (char *) linebuf; + end = buf + MAXLLEN; + line->data = buf; + } + pos = buf; + while ((c = getc(fp)) != EOF) { + if ((*pos++ = c) == REC_D) { + line->size = pos - buf; + return (0); + } + if (pos == end) { + line->size = MAXLLEN; + *--pos = REC_D; + while ((c = getc(fp)) != EOF) { + if (c == REC_D) + return (BUFFEND); + } + } + } + if (pos != buf) { + warnx("last character not record delimiter"); + *pos++ = REC_D; + line->size = pos - buf; + return (0); + } else + return (EOF); +} + +/* + * write a key/line pair to a temporary file + */ +void +putrec(rec, fp) + register struct recheader *rec; + register FILE *fp; +{ + EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); +} + +/* + * write a line to output + */ +void +putline(rec, fp) + register struct recheader *rec; + register FILE *fp; +{ + EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); +} + +/* + * get a record from a temporary file. (Used by merge sort.) + */ +int +geteasy(flno, filelist, nfiles, rec, end, dummy2) + int flno, nfiles; + union f_handle filelist; + register struct recheader *rec; + register u_char *end; + struct field *dummy2; +{ + int i; + FILE *fp; + fp = fstack[flno].fp; + if ((u_char *) rec > end - sizeof(TRECHEADER)) + return (BUFFEND); + if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { + fclose(fp); + fstack[flno].fp = 0; + return (EOF); + } + if (end - rec->data < rec->length) { + for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) + ungetc(*((char *) rec + i), fp); + return (BUFFEND); + } + fread(rec->data, rec->length, 1, fp); + return (0); +} |