diff options
Diffstat (limited to 'usr.bin/file/ascmagic.c')
-rw-r--r-- | usr.bin/file/ascmagic.c | 144 |
1 files changed, 86 insertions, 58 deletions
diff --git a/usr.bin/file/ascmagic.c b/usr.bin/file/ascmagic.c index 23542545e7f..b5cc8b991d6 100644 --- a/usr.bin/file/ascmagic.c +++ b/usr.bin/file/ascmagic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ascmagic.c,v 1.8 2004/05/19 02:32:35 tedu Exp $ */ +/* $OpenBSD: ascmagic.c,v 1.9 2008/05/08 01:40:56 chl Exp $ */ /* * Copyright (c) Ian F. Darwin 1986-1995. * Software written by Ian F. Darwin and others; @@ -50,7 +50,7 @@ #include "names.h" #ifndef lint -FILE_RCSID("@(#)$Id: ascmagic.c,v 1.8 2004/05/19 02:32:35 tedu Exp $") +FILE_RCSID("@(#)$Id: ascmagic.c,v 1.9 2008/05/08 01:40:56 chl Exp $") #endif /* lint */ typedef unsigned long unichar; @@ -72,10 +72,11 @@ protected int file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) { size_t i; - unsigned char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */ - unichar ubuf[HOWMANY+1]; /* one extra for terminating '\0' */ + unsigned char *nbuf = NULL; + unichar *ubuf = NULL; size_t ulen; struct names *p; + int rv = -1; const char *code = NULL; const char *code_mime = NULL; @@ -85,26 +86,27 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) int has_escapes = 0; int has_backspace = 0; + int seen_cr = 0; int n_crlf = 0; int n_lf = 0; int n_cr = 0; int n_nel = 0; - int last_line_end = -1; + size_t last_line_end = (size_t)-1; int has_long_lines = 0; /* * Undo the NUL-termination kindly provided by process() * but leave at least one byte to look at */ - while (nbytes > 1 && buf[nbytes - 1] == '\0') nbytes--; - /* nbuf and ubuf relies on this */ - if (nbytes > HOWMANY) - nbytes = HOWMANY; + if ((nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))) == NULL) + goto done; + if ((ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))) == NULL) + goto done; /* * Then try to determine whether it's any character code we can @@ -148,10 +150,16 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) type = "character data"; code_mime = "ebcdic"; } else { - return 0; /* doesn't look like text at all */ + rv = 0; + goto done; /* doesn't look like text at all */ } } + if (nbytes <= 1) { + rv = 0; + goto done; + } + /* * for troff, look for . + letter + letter or .\"; * this must be done to disambiguate tar archives' ./file @@ -160,7 +168,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) * I believe Plan 9 troff allows non-ASCII characters in the names * of macros, so this test might possibly fail on such a file. */ - if (*ubuf == '.') { + if ((ms->flags & MAGIC_NO_CHECK_TROFF) == 0 && *ubuf == '.') { unichar *tp = ubuf + 1; while (ISSPC(*tp)) @@ -177,7 +185,8 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) } } - if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { + if ((ms->flags & MAGIC_NO_CHECK_FORTRAN) == 0 && + (*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { subtype_mime = "text/fortran"; subtype = "fortran program"; goto subtype_identified; @@ -185,6 +194,9 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) /* look for tokens from names.h - this is expensive! */ + if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0) + goto subtype_identified; + i = 0; while (i < ulen) { size_t end; @@ -225,66 +237,75 @@ subtype_identified: * Now try to discover other details about the file. */ for (i = 0; i < ulen; i++) { - if (i > last_line_end + MAXLINELEN) - has_long_lines = 1; - - if (ubuf[i] == '\033') - has_escapes = 1; - if (ubuf[i] == '\b') - has_backspace = 1; - - if (ubuf[i] == '\r' && (i + 1 < ulen && ubuf[i + 1] == '\n')) { - n_crlf++; + if (ubuf[i] == '\n') { + if (seen_cr) + n_crlf++; + else + n_lf++; last_line_end = i; - } - if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) { + } else if (seen_cr) n_cr++; + + seen_cr = (ubuf[i] == '\r'); + if (seen_cr) last_line_end = i; - } - if (ubuf[i] == '\n' && ((int)i - 1 < 0 || ubuf[i - 1] != '\r')){ - n_lf++; - last_line_end = i; - } + if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ n_nel++; last_line_end = i; } + + /* If this line is _longer_ than MAXLINELEN, remember it. */ + if (i > last_line_end + MAXLINELEN) + has_long_lines = 1; + + if (ubuf[i] == '\033') + has_escapes = 1; + if (ubuf[i] == '\b') + has_backspace = 1; } + /* Beware, if the data has been truncated, the final CR could have + been followed by a LF. If we have HOWMANY bytes, it indicates + that the data might have been truncated, probably even before + this function was called. */ + if (seen_cr && nbytes < HOWMANY) + n_cr++; + if ((ms->flags & MAGIC_MIME)) { if (subtype_mime) { if (file_printf(ms, subtype_mime) == -1) - return -1; + goto done; } else { if (file_printf(ms, "text/plain") == -1) - return -1; + goto done; } if (code_mime) { if (file_printf(ms, "; charset=") == -1) - return -1; + goto done; if (file_printf(ms, code_mime) == -1) - return -1; + goto done; } } else { if (file_printf(ms, code) == -1) - return -1; + goto done; if (subtype) { if (file_printf(ms, " ") == -1) - return -1; + goto done; if (file_printf(ms, subtype) == -1) - return -1; + goto done; } if (file_printf(ms, " ") == -1) - return -1; + goto done; if (file_printf(ms, type) == -1) - return -1; + goto done; if (has_long_lines) if (file_printf(ms, ", with very long lines") == -1) - return -1; + goto done; /* * Only report line terminators if we find one other than LF, @@ -293,51 +314,57 @@ subtype_identified: if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) || (n_crlf != 0 || n_cr != 0 || n_nel != 0)) { if (file_printf(ms, ", with") == -1) - return -1; + goto done; if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) { if (file_printf(ms, " no") == -1) - return -1; + goto done; } else { if (n_crlf) { if (file_printf(ms, " CRLF") == -1) - return -1; + goto done; if (n_cr || n_lf || n_nel) if (file_printf(ms, ",") == -1) - return -1; + goto done; } if (n_cr) { if (file_printf(ms, " CR") == -1) - return -1; + goto done; if (n_lf || n_nel) if (file_printf(ms, ",") == -1) - return -1; + goto done; } if (n_lf) { if (file_printf(ms, " LF") == -1) - return -1; + goto done; if (n_nel) if (file_printf(ms, ",") == -1) - return -1; + goto done; } if (n_nel) if (file_printf(ms, " NEL") == -1) - return -1; + goto done; } if (file_printf(ms, " line terminators") == -1) - return -1; + goto done; } if (has_escapes) if (file_printf(ms, ", with escape sequences") == -1) - return -1; + goto done; if (has_backspace) if (file_printf(ms, ", with overstriking") == -1) - return -1; + goto done; } + rv = 1; +done: + if (nbuf) + free(nbuf); + if (ubuf) + free(ubuf); - return 1; + return rv; } private int @@ -439,7 +466,7 @@ private int looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { - int i; + size_t i; *ulen = 0; @@ -458,7 +485,7 @@ looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, private int looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { - int i; + size_t i; *ulen = 0; @@ -478,7 +505,7 @@ private int looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { - int i; + size_t i; *ulen = 0; @@ -497,7 +524,8 @@ looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, private int looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { - int i, n; + size_t i; + int n; unichar c; int gotone = 0; @@ -561,7 +589,7 @@ looks_unicode(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { int bigend; - int i; + size_t i; if (nbytes < 2) return 0; @@ -680,7 +708,7 @@ private unsigned char ebcdic_1047_to_8859[] = { private void from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) { - int i; + size_t i; for (i = 0; i < nbytes; i++) { out[i] = ebcdic_to_ascii[buf[i]]; |