diff options
Diffstat (limited to 'usr.bin/locate/bigram/locate.bigram.c')
-rw-r--r-- | usr.bin/locate/bigram/locate.bigram.c | 78 |
1 files changed, 53 insertions, 25 deletions
diff --git a/usr.bin/locate/bigram/locate.bigram.c b/usr.bin/locate/bigram/locate.bigram.c index d8c0f0e0204..d0c13d86e26 100644 --- a/usr.bin/locate/bigram/locate.bigram.c +++ b/usr.bin/locate/bigram/locate.bigram.c @@ -1,6 +1,4 @@ -/* $OpenBSD: locate.bigram.c,v 1.2 1996/06/26 05:35:49 deraadt Exp $ */ -/* $NetBSD: locate.bigram.c,v 1.5 1995/09/01 23:48:13 thorpej Exp $ */ - +/* $OpenBSD: locate.bigram.c,v 1.3 1996/08/16 22:00:10 michaels Exp $ */ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -45,47 +43,77 @@ static char copyright[] = #ifndef lint #if 0 -static char sccsid[] = "@(#)locate.bigram.c 8.2 (Berkeley) 4/28/95"; +static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; +#else +static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.3 1996/08/16 22:00:10 michaels Exp $"; #endif -static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.2 1996/06/26 05:35:49 deraadt Exp $"; #endif /* not lint */ /* * bigram < text > bigrams - * + * * List bigrams for 'updatedb' script. * Use 'code' to encode a file using this output. */ #include <stdio.h> #include <sys/param.h> /* for MAXPATHLEN */ +#include <string.h> /* memchr */ +#include "locate.h" -char buf1[MAXPATHLEN] = " "; -char buf2[MAXPATHLEN]; +u_char buf1[MAXPATHLEN] = " "; +u_char buf2[MAXPATHLEN]; +unsigned int bigram[UCHAR_MAX][UCHAR_MAX]; -main ( ) +int main(void) { - register char *cp; - register char *oldpath = buf1, *path = buf2; + register u_char *cp; + register u_char *oldpath = buf1, *path = buf2; + register int i, j; - while ( fgets ( path, sizeof(buf2), stdin ) != NULL ) { + while (fgets(path, sizeof(buf2), stdin) != NULL) { + /* skip empty lines */ + if (*path == '\n') + continue; + /* Squelch characters that would botch the decoding. */ + for (cp = path; *cp != NULL; cp++) { + /* chop newline */ + if (*cp == '\n') + *cp = NULL; + /* range */ + else if (*cp < ASCII_MIN || *cp > ASCII_MAX) + *cp = '?'; + } /* skip longest common prefix */ - for ( cp = path; *cp == *oldpath; cp++, oldpath++ ) - if ( *oldpath == '\0' ) - break; + for (cp = path; *cp == *oldpath && *cp != NULL; cp++, oldpath++) + ; /* * output post-residue bigrams only */ - while ( *cp != '\0' && *(cp + 1) != '\0' ) { - putchar ( *cp++ ); - putchar ( *cp++ ); - putchar ( '\n' ); + + /* check later for boundary */ + while ( *cp != NULL && *(cp+1) != NULL ) { + bigram[*cp][*(cp+1)]++; + cp += 2; } - if ( path == buf1 ) /* swap pointers */ - path = buf2, oldpath = buf1; - else - path = buf1, oldpath = buf2; - } - return (0); + + if ( path == buf1 ) { /* swap pointers */ + path = buf2; + oldpath = buf1; + } + else { + path = buf1; + oldpath = buf2; + } + } + + /* output, boundary check */ + for (i = ASCII_MIN; i <= ASCII_MAX; i++) + for (j = ASCII_MIN; j <= ASCII_MAX; j++) + if (bigram[i][j] != 0) + fprintf(stdout, "%4d %c%c\n", + bigram[i][j], i, j); + + return 0; } |