summaryrefslogtreecommitdiff
path: root/usr.bin/locate/code/locate.code.c
diff options
context:
space:
mode:
authormichaels <michaels@cvs.openbsd.org>1996-10-20 00:52:59 +0000
committermichaels <michaels@cvs.openbsd.org>1996-10-20 00:52:59 +0000
commit78d30b21a3fa6e2f7d7fb077246e1e015a2dafdf (patch)
treed479f38e77fb28e62f88c9f298881d7f0bc9498e /usr.bin/locate/code/locate.code.c
parent0f3f4f59e71d2e635fe303099f894b4be45d18d3 (diff)
8-Bit character support. From wosch@freebsd.
Diffstat (limited to 'usr.bin/locate/code/locate.code.c')
-rw-r--r--usr.bin/locate/code/locate.code.c89
1 files changed, 59 insertions, 30 deletions
diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c
index 9daa9f5a0d0..b95718ed4be 100644
--- a/usr.bin/locate/code/locate.code.c
+++ b/usr.bin/locate/code/locate.code.c
@@ -1,6 +1,6 @@
-/* $OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ */
-
/*
+ * $OpenBSD: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $
+ *
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -35,7 +35,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $
+ * $Id: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $
*/
#ifndef lint
@@ -48,7 +48,7 @@ static char copyright[] =
#if 0
static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93";
#else
-static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $";
+static char rcsid[] = "$OpenBSD: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $";
#endif
#endif /* not lint */
@@ -78,13 +78,22 @@ static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michael
*
* 0-28 likeliest differential counts + offset to make nonnegative
* 30 switch code for out-of-range count to follow in next word
+ * 31 an 8 bit char followed
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
* 32-127 single character (printable) ascii residue (ie, literal)
*
- * SEE ALSO: updatedb.csh, bigram.c
+ * The locate database store any character except newline ('\n')
+ * and NUL ('\0'). The 8-bit character support don't wast extra
+ * space until you have characters in file names less than 32
+ * or greather than 127.
+ *
+ *
+ * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c
*
* AUTHOR: James A. Woods, Informatics General Corp.,
* NASA Ames Research Center, 10/82
+ * 8-bit file names characters:
+ * Wolfram Schneider, Berlin September 1996
*/
#include <sys/param.h>
@@ -99,14 +108,14 @@ static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michael
u_char buf1[MAXPATHLEN] = " ";
u_char buf2[MAXPATHLEN];
-char bigrams[BGBUFSIZE + 1] = { 0 };
+u_char bigrams[BGBUFSIZE + 1] = { 0 };
#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
#ifdef LOOKUP
-#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)])
-typedef u_char bg_t;
-bg_t big[UCHAR_MAX][UCHAR_MAX];
+#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)])
+typedef short bg_t;
+bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1];
#else
#define BGINDEX(x) bgindex(x)
typedef int bg_t;
@@ -151,12 +160,13 @@ main(argc, argv)
#ifdef LOOKUP
/* init lookup table */
- for (i = 0; i < UCHAR_MAX; i++)
- for (j = 0; j < UCHAR_MAX; j++)
+ for (i = 0; i < UCHAR_MAX + 1; i++)
+ for (j = 0; j < UCHAR_MAX + 1; j++)
big[i][j] = (bg_t)-1;
for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
- big[(int)*cp][(int)*(cp + 1)] = (bg_t)i;
+ big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i;
+
#endif /* LOOKUP */
oldpath = buf1;
@@ -165,22 +175,21 @@ main(argc, argv)
while (fgets(path, sizeof(buf2), stdin) != NULL) {
- /* skip empty lines */
+ /* skip empty lines */
if (*path == '\n')
continue;
- /* Squelch characters that would botch the decoding. */
+ /* remove newline */
for (cp = path; *cp != '\0'; cp++) {
/* chop newline */
if (*cp == '\n')
*cp = '\0';
- /* range */
- else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
- *cp = '?';
}
/* Skip longest common prefix. */
- for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);
+ for (cp = path; *cp == *oldpath; cp++, oldpath++)
+ if (*cp == '\0')
+ break;
count = cp - path;
diffcount = count - oldcount + OFFSET;
@@ -194,22 +203,42 @@ main(argc, argv)
err(1, "stdout");
while (*cp != '\0') {
- if (*(cp + 1) == '\0') {
- if (putchar(*cp) == EOF)
- err(1, "stdout");
- break;
- }
- if ((code = BGINDEX(cp)) == (bg_t)-1) {
- if (putchar(*cp++) == EOF ||
- putchar(*cp++) == EOF)
- err(1, "stdout");
- } else {
- /* Found, so mark byte with parity bit. */
+ /* print *two* characters */
+
+ if ((code = BGINDEX(cp)) != (bg_t)-1) {
+ /*
+ * print *one* as bigram
+ * Found, so mark byte with
+ * parity bit.
+ */
if (putchar((code / 2) | PARITY) == EOF)
err(1, "stdout");
cp += 2;
}
+
+ else {
+ for (i = 0; i < 2; i++) {
+ if (*cp == '\0')
+ break;
+
+ /* print umlauts in file names */
+ if (*cp < ASCII_MIN ||
+ *cp > ASCII_MAX) {
+ if (putchar(UMLAUT) == EOF ||
+ putchar(*cp++) == EOF)
+ err(1, "stdout");
+ }
+
+ else {
+ /* normal character */
+ if(putchar(*cp++) == EOF)
+ err(1, "stdout");
+ }
+ }
+
+ }
}
+
if (path == buf1) { /* swap pointers */
path = buf2;
oldpath = buf1;
@@ -236,7 +265,7 @@ bgindex(bg) /* Return location of bg in bigrams or -1. */
for (p = bigrams; *p != NULL; p++)
if (*p++ == bg0 && *p == bg1)
break;
- return (*p == NUL ? -1 : (--p - bigrams));
+ return (*p == NULL ? -1 : (--p - bigrams));
}
#endif /* !LOOKUP */