src - OpenBSD base system

diff options


context:
space:
mode:

author	michaels <michaels@cvs.openbsd.org>	1996-10-20 00:52:59 +0000
committer	michaels <michaels@cvs.openbsd.org>	1996-10-20 00:52:59 +0000
commit	78d30b21a3fa6e2f7d7fb077246e1e015a2dafdf (patch)
tree	d479f38e77fb28e62f88c9f298881d7f0bc9498e /usr.bin/locate/code/locate.code.c
parent	0f3f4f59e71d2e635fe303099f894b4be45d18d3 (diff)

8-Bit character support. From wosch@freebsd.

Diffstat (limited to 'usr.bin/locate/code/locate.code.c')

-rw-r--r--

usr.bin/locate/code/locate.code.c

1 files changed, 59 insertions, 30 deletions

diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c
index 9daa9f5a0d0..b95718ed4be 100644
--- a/usr.bin/locate/code/locate.code.c
+++ b/usr.bin/locate/code/locate.code.c

@@ -1,6 +1,6 @@

-/* $OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $ */

+ * $OpenBSD: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $

+ *

@@ -35,7 +35,7 @@

* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

* SUCH DAMAGE.

- * $Id: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $

+ * $Id: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $

#ifndef lint

@@ -48,7 +48,7 @@ static char copyright[] =

#if 0

static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93";

#else

-static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michaels Exp $";

+static char rcsid[] = "$OpenBSD: locate.code.c,v 1.6 1996/10/20 00:52:53 michaels Exp $";

#endif

#endif /* not lint */

@@ -78,13 +78,22 @@ static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michael

* 0-28 likeliest differential counts + offset to make nonnegative

* 30 switch code for out-of-range count to follow in next word

+ * 31 an 8 bit char followed

* 128-255 bigram codes (128 most common, as determined by 'updatedb')

* 32-127 single character (printable) ascii residue (ie, literal)

- * SEE ALSO: updatedb.csh, bigram.c

+ * The locate database store any character except newline ('\n')

+ * and NUL ('\0'). The 8-bit character support don't wast extra

+ * space until you have characters in file names less than 32

+ * or greather than 127.

+ *

+ * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c

* AUTHOR: James A. Woods, Informatics General Corp.,

* NASA Ames Research Center, 10/82

+ * 8-bit file names characters:

+ * Wolfram Schneider, Berlin September 1996

#include <sys/param.h>

@@ -99,14 +108,14 @@ static char rcsid[] = "$OpenBSD: locate.code.c,v 1.5 1996/09/15 16:50:36 michael

u_char buf1[MAXPATHLEN] = " ";

u_char buf2[MAXPATHLEN];

-char bigrams[BGBUFSIZE + 1] = { 0 };

+u_char bigrams[BGBUFSIZE + 1] = { 0 };

#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */

#ifdef LOOKUP

-#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)])

-typedef u_char bg_t;

-bg_t big[UCHAR_MAX][UCHAR_MAX];

+#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)])

+typedef short bg_t;

+bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1];

#else

#define BGINDEX(x) bgindex(x)

typedef int bg_t;

@@ -151,12 +160,13 @@ main(argc, argv)

#ifdef LOOKUP

/* init lookup table */

- for (i = 0; i < UCHAR_MAX; i++)

- for (j = 0; j < UCHAR_MAX; j++)

+ for (i = 0; i < UCHAR_MAX + 1; i++)

+ for (j = 0; j < UCHAR_MAX + 1; j++)

big[i][j] = (bg_t)-1;

for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)

- big[(int)*cp][(int)*(cp + 1)] = (bg_t)i;

+ big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i;

#endif /* LOOKUP */

oldpath = buf1;

@@ -165,22 +175,21 @@ main(argc, argv)

while (fgets(path, sizeof(buf2), stdin) != NULL) {

- /* skip empty lines */

+ /* skip empty lines */

if (*path == '\n')

continue;

- /* Squelch characters that would botch the decoding. */

+ /* remove newline */

for (cp = path; *cp != '\0'; cp++) {

/* chop newline */

if (*cp == '\n')

*cp = '\0';

- /* range */

- else if (*cp < ASCII_MIN || *cp > ASCII_MAX)

- *cp = '?';

}

/* Skip longest common prefix. */

- for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);

+ for (cp = path; *cp == *oldpath; cp++, oldpath++)

+ if (*cp == '\0')

+ break;

count = cp - path;

diffcount = count - oldcount + OFFSET;

@@ -194,22 +203,42 @@ main(argc, argv)

err(1, "stdout");

while (*cp != '\0') {

- if (*(cp + 1) == '\0') {

- if (putchar(*cp) == EOF)

- err(1, "stdout");

- break;

- }

- if ((code = BGINDEX(cp)) == (bg_t)-1) {

- if (putchar(*cp++) == EOF ||

- putchar(*cp++) == EOF)

- err(1, "stdout");

- } else {

- /* Found, so mark byte with parity bit. */

+ /* print *two* characters */

+ if ((code = BGINDEX(cp)) != (bg_t)-1) {

+ /*

+ * print *one* as bigram

+ * Found, so mark byte with

+ * parity bit.

+ */

if (putchar((code / 2) | PARITY) == EOF)

err(1, "stdout");

cp += 2;

}

+ else {

+ for (i = 0; i < 2; i++) {

+ if (*cp == '\0')

+ break;

+ /* print umlauts in file names */

+ if (*cp < ASCII_MIN ||

+ *cp > ASCII_MAX) {

+ if (putchar(UMLAUT) == EOF ||

+ putchar(*cp++) == EOF)

+ err(1, "stdout");

+ }

+ else {

+ /* normal character */

+ if(putchar(*cp++) == EOF)

+ err(1, "stdout");

+ }

}

if (path == buf1) { /* swap pointers */

path = buf2;

oldpath = buf1;

@@ -236,7 +265,7 @@ bgindex(bg) /* Return location of bg in bigrams or -1. */

for (p = bigrams; *p != NULL; p++)

if (*p++ == bg0 && *p == bg1)

break;

- return (*p == NUL ? -1 : (--p - bigrams));

+ return (*p == NULL ? -1 : (--p - bigrams));

}

#endif /* !LOOKUP */