diff options
author | michaels <michaels@cvs.openbsd.org> | 1996-10-20 00:52:59 +0000 |
---|---|---|
committer | michaels <michaels@cvs.openbsd.org> | 1996-10-20 00:52:59 +0000 |
commit | 78d30b21a3fa6e2f7d7fb077246e1e015a2dafdf (patch) | |
tree | d479f38e77fb28e62f88c9f298881d7f0bc9498e /usr.bin/locate/code/locate.code.c | |
parent | 0f3f4f59e71d2e635fe303099f894b4be45d18d3 (diff) |
8-Bit character support. From wosch@freebsd.
Diffstat (limited to 'usr.bin/locate/code/locate.code.c')
-rw-r--r-- | usr.bin/locate/code/locate.code.c | 89 |
1 files changed, 59 insertions, 30 deletions
diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c index 9daa9f5a0d0..b95718ed4be 100644 --- a/usr.bin/locate/code/locate.code.c +++ b/usr.bin/locate/code/locate.code.c @@ -1,6 +1,6 @@ -/* $OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ */ - /* + * $OpenBSD: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $ + * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -35,7 +35,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ + * $Id: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $ */ #ifndef lint @@ -48,7 +48,7 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $"; +static char rcsid[] = "$OpenBSD: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $"; #endif #endif /* not lint */ @@ -78,13 +78,22 @@ static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michael * * 0-28 likeliest differential counts + offset to make nonnegative * 30 switch code for out-of-range count to follow in next word + * 31 an 8 bit char followed * 128-255 bigram codes (128 most common, as determined by 'updatedb') * 32-127 single character (printable) ascii residue (ie, literal) * - * SEE ALSO: updatedb.csh, bigram.c + * The locate database store any character except newline ('\n') + * and NUL ('\0'). The 8-bit character support don't wast extra + * space until you have characters in file names less than 32 + * or greather than 127. + * + * + * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c * * AUTHOR: James A. Woods, Informatics General Corp., * NASA Ames Research Center, 10/82 + * 8-bit file names characters: + * Wolfram Schneider, Berlin September 1996 */ #include <sys/param.h> @@ -99,14 +108,14 @@ static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michael u_char buf1[MAXPATHLEN] = " "; u_char buf2[MAXPATHLEN]; -char bigrams[BGBUFSIZE + 1] = { 0 }; +u_char bigrams[BGBUFSIZE + 1] = { 0 }; #define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ #ifdef LOOKUP -#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)]) -typedef u_char bg_t; -bg_t big[UCHAR_MAX][UCHAR_MAX]; +#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)]) +typedef short bg_t; +bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1]; #else #define BGINDEX(x) bgindex(x) typedef int bg_t; @@ -151,12 +160,13 @@ main(argc, argv) #ifdef LOOKUP /* init lookup table */ - for (i = 0; i < UCHAR_MAX; i++) - for (j = 0; j < UCHAR_MAX; j++) + for (i = 0; i < UCHAR_MAX + 1; i++) + for (j = 0; j < UCHAR_MAX + 1; j++) big[i][j] = (bg_t)-1; for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) - big[(int)*cp][(int)*(cp + 1)] = (bg_t)i; + big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i; + #endif /* LOOKUP */ oldpath = buf1; @@ -165,22 +175,21 @@ main(argc, argv) while (fgets(path, sizeof(buf2), stdin) != NULL) { - /* skip empty lines */ + /* skip empty lines */ if (*path == '\n') continue; - /* Squelch characters that would botch the decoding. */ + /* remove newline */ for (cp = path; *cp != '\0'; cp++) { /* chop newline */ if (*cp == '\n') *cp = '\0'; - /* range */ - else if (*cp < ASCII_MIN || *cp > ASCII_MAX) - *cp = '?'; } /* Skip longest common prefix. */ - for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++); + for (cp = path; *cp == *oldpath; cp++, oldpath++) + if (*cp == '\0') + break; count = cp - path; diffcount = count - oldcount + OFFSET; @@ -194,22 +203,42 @@ main(argc, argv) err(1, "stdout"); while (*cp != '\0') { - if (*(cp + 1) == '\0') { - if (putchar(*cp) == EOF) - err(1, "stdout"); - break; - } - if ((code = BGINDEX(cp)) == (bg_t)-1) { - if (putchar(*cp++) == EOF || - putchar(*cp++) == EOF) - err(1, "stdout"); - } else { - /* Found, so mark byte with parity bit. */ + /* print *two* characters */ + + if ((code = BGINDEX(cp)) != (bg_t)-1) { + /* + * print *one* as bigram + * Found, so mark byte with + * parity bit. + */ if (putchar((code / 2) | PARITY) == EOF) err(1, "stdout"); cp += 2; } + + else { + for (i = 0; i < 2; i++) { + if (*cp == '\0') + break; + + /* print umlauts in file names */ + if (*cp < ASCII_MIN || + *cp > ASCII_MAX) { + if (putchar(UMLAUT) == EOF || + putchar(*cp++) == EOF) + err(1, "stdout"); + } + + else { + /* normal character */ + if(putchar(*cp++) == EOF) + err(1, "stdout"); + } + } + + } } + if (path == buf1) { /* swap pointers */ path = buf2; oldpath = buf1; @@ -236,7 +265,7 @@ bgindex(bg) /* Return location of bg in bigrams or -1. */ for (p = bigrams; *p != NULL; p++) if (*p++ == bg0 && *p == bg1) break; - return (*p == NUL ? -1 : (--p - bigrams)); + return (*p == NULL ? -1 : (--p - bigrams)); } #endif /* !LOOKUP */ |