summaryrefslogtreecommitdiff
path: root/usr.bin/locate/bigram/locate.bigram.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/locate/bigram/locate.bigram.c')
-rw-r--r--usr.bin/locate/bigram/locate.bigram.c78
1 files changed, 53 insertions, 25 deletions
diff --git a/usr.bin/locate/bigram/locate.bigram.c b/usr.bin/locate/bigram/locate.bigram.c
index d8c0f0e0204..d0c13d86e26 100644
--- a/usr.bin/locate/bigram/locate.bigram.c
+++ b/usr.bin/locate/bigram/locate.bigram.c
@@ -1,6 +1,4 @@
-/* $OpenBSD: locate.bigram.c,v 1.2 1996/06/26 05:35:49 deraadt Exp $ */
-/* $NetBSD: locate.bigram.c,v 1.5 1995/09/01 23:48:13 thorpej Exp $ */
-
+/* $OpenBSD: locate.bigram.c,v 1.3 1996/08/16 22:00:10 michaels Exp $ */
/*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
@@ -45,47 +43,77 @@ static char copyright[] =
#ifndef lint
#if 0
-static char sccsid[] = "@(#)locate.bigram.c 8.2 (Berkeley) 4/28/95";
+static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93";
+#else
+static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.3 1996/08/16 22:00:10 michaels Exp $";
#endif
-static char rcsid[] = "$OpenBSD: locate.bigram.c,v 1.2 1996/06/26 05:35:49 deraadt Exp $";
#endif /* not lint */
/*
* bigram < text > bigrams
- *
+ *
* List bigrams for 'updatedb' script.
* Use 'code' to encode a file using this output.
*/
#include <stdio.h>
#include <sys/param.h> /* for MAXPATHLEN */
+#include <string.h> /* memchr */
+#include "locate.h"
-char buf1[MAXPATHLEN] = " ";
-char buf2[MAXPATHLEN];
+u_char buf1[MAXPATHLEN] = " ";
+u_char buf2[MAXPATHLEN];
+unsigned int bigram[UCHAR_MAX][UCHAR_MAX];
-main ( )
+int main(void)
{
- register char *cp;
- register char *oldpath = buf1, *path = buf2;
+ register u_char *cp;
+ register u_char *oldpath = buf1, *path = buf2;
+ register int i, j;
- while ( fgets ( path, sizeof(buf2), stdin ) != NULL ) {
+ while (fgets(path, sizeof(buf2), stdin) != NULL) {
+ /* skip empty lines */
+ if (*path == '\n')
+ continue;
+ /* Squelch characters that would botch the decoding. */
+ for (cp = path; *cp != NULL; cp++) {
+ /* chop newline */
+ if (*cp == '\n')
+ *cp = NULL;
+ /* range */
+ else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
+ *cp = '?';
+ }
/* skip longest common prefix */
- for ( cp = path; *cp == *oldpath; cp++, oldpath++ )
- if ( *oldpath == '\0' )
- break;
+ for (cp = path; *cp == *oldpath && *cp != NULL; cp++, oldpath++)
+ ;
/*
* output post-residue bigrams only
*/
- while ( *cp != '\0' && *(cp + 1) != '\0' ) {
- putchar ( *cp++ );
- putchar ( *cp++ );
- putchar ( '\n' );
+
+ /* check later for boundary */
+ while ( *cp != NULL && *(cp+1) != NULL ) {
+ bigram[*cp][*(cp+1)]++;
+ cp += 2;
}
- if ( path == buf1 ) /* swap pointers */
- path = buf2, oldpath = buf1;
- else
- path = buf1, oldpath = buf2;
- }
- return (0);
+
+ if ( path == buf1 ) { /* swap pointers */
+ path = buf2;
+ oldpath = buf1;
+ }
+ else {
+ path = buf1;
+ oldpath = buf2;
+ }
+ }
+
+ /* output, boundary check */
+ for (i = ASCII_MIN; i <= ASCII_MAX; i++)
+ for (j = ASCII_MIN; j <= ASCII_MAX; j++)
+ if (bigram[i][j] != 0)
+ fprintf(stdout, "%4d %c%c\n",
+ bigram[i][j], i, j);
+
+ return 0;
}