summaryrefslogtreecommitdiff
path: root/usr.bin/file/ascmagic.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/file/ascmagic.c')
-rw-r--r--usr.bin/file/ascmagic.c144
1 files changed, 86 insertions, 58 deletions
diff --git a/usr.bin/file/ascmagic.c b/usr.bin/file/ascmagic.c
index 23542545e7f..b5cc8b991d6 100644
--- a/usr.bin/file/ascmagic.c
+++ b/usr.bin/file/ascmagic.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ascmagic.c,v 1.8 2004/05/19 02:32:35 tedu Exp $ */
+/* $OpenBSD: ascmagic.c,v 1.9 2008/05/08 01:40:56 chl Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
@@ -50,7 +50,7 @@
#include "names.h"
#ifndef lint
-FILE_RCSID("@(#)$Id: ascmagic.c,v 1.8 2004/05/19 02:32:35 tedu Exp $")
+FILE_RCSID("@(#)$Id: ascmagic.c,v 1.9 2008/05/08 01:40:56 chl Exp $")
#endif /* lint */
typedef unsigned long unichar;
@@ -72,10 +72,11 @@ protected int
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
{
size_t i;
- unsigned char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */
- unichar ubuf[HOWMANY+1]; /* one extra for terminating '\0' */
+ unsigned char *nbuf = NULL;
+ unichar *ubuf = NULL;
size_t ulen;
struct names *p;
+ int rv = -1;
const char *code = NULL;
const char *code_mime = NULL;
@@ -85,26 +86,27 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
int has_escapes = 0;
int has_backspace = 0;
+ int seen_cr = 0;
int n_crlf = 0;
int n_lf = 0;
int n_cr = 0;
int n_nel = 0;
- int last_line_end = -1;
+ size_t last_line_end = (size_t)-1;
int has_long_lines = 0;
/*
* Undo the NUL-termination kindly provided by process()
* but leave at least one byte to look at
*/
-
while (nbytes > 1 && buf[nbytes - 1] == '\0')
nbytes--;
- /* nbuf and ubuf relies on this */
- if (nbytes > HOWMANY)
- nbytes = HOWMANY;
+ if ((nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))) == NULL)
+ goto done;
+ if ((ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))) == NULL)
+ goto done;
/*
* Then try to determine whether it's any character code we can
@@ -148,10 +150,16 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
type = "character data";
code_mime = "ebcdic";
} else {
- return 0; /* doesn't look like text at all */
+ rv = 0;
+ goto done; /* doesn't look like text at all */
}
}
+ if (nbytes <= 1) {
+ rv = 0;
+ goto done;
+ }
+
/*
* for troff, look for . + letter + letter or .\";
* this must be done to disambiguate tar archives' ./file
@@ -160,7 +168,7 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
* I believe Plan 9 troff allows non-ASCII characters in the names
* of macros, so this test might possibly fail on such a file.
*/
- if (*ubuf == '.') {
+ if ((ms->flags & MAGIC_NO_CHECK_TROFF) == 0 && *ubuf == '.') {
unichar *tp = ubuf + 1;
while (ISSPC(*tp))
@@ -177,7 +185,8 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
}
}
- if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) {
+ if ((ms->flags & MAGIC_NO_CHECK_FORTRAN) == 0 &&
+ (*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) {
subtype_mime = "text/fortran";
subtype = "fortran program";
goto subtype_identified;
@@ -185,6 +194,9 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
/* look for tokens from names.h - this is expensive! */
+ if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
+ goto subtype_identified;
+
i = 0;
while (i < ulen) {
size_t end;
@@ -225,66 +237,75 @@ subtype_identified:
* Now try to discover other details about the file.
*/
for (i = 0; i < ulen; i++) {
- if (i > last_line_end + MAXLINELEN)
- has_long_lines = 1;
-
- if (ubuf[i] == '\033')
- has_escapes = 1;
- if (ubuf[i] == '\b')
- has_backspace = 1;
-
- if (ubuf[i] == '\r' && (i + 1 < ulen && ubuf[i + 1] == '\n')) {
- n_crlf++;
+ if (ubuf[i] == '\n') {
+ if (seen_cr)
+ n_crlf++;
+ else
+ n_lf++;
last_line_end = i;
- }
- if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) {
+ } else if (seen_cr)
n_cr++;
+
+ seen_cr = (ubuf[i] == '\r');
+ if (seen_cr)
last_line_end = i;
- }
- if (ubuf[i] == '\n' && ((int)i - 1 < 0 || ubuf[i - 1] != '\r')){
- n_lf++;
- last_line_end = i;
- }
+
if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
n_nel++;
last_line_end = i;
}
+
+ /* If this line is _longer_ than MAXLINELEN, remember it. */
+ if (i > last_line_end + MAXLINELEN)
+ has_long_lines = 1;
+
+ if (ubuf[i] == '\033')
+ has_escapes = 1;
+ if (ubuf[i] == '\b')
+ has_backspace = 1;
}
+ /* Beware, if the data has been truncated, the final CR could have
+ been followed by a LF. If we have HOWMANY bytes, it indicates
+ that the data might have been truncated, probably even before
+ this function was called. */
+ if (seen_cr && nbytes < HOWMANY)
+ n_cr++;
+
if ((ms->flags & MAGIC_MIME)) {
if (subtype_mime) {
if (file_printf(ms, subtype_mime) == -1)
- return -1;
+ goto done;
} else {
if (file_printf(ms, "text/plain") == -1)
- return -1;
+ goto done;
}
if (code_mime) {
if (file_printf(ms, "; charset=") == -1)
- return -1;
+ goto done;
if (file_printf(ms, code_mime) == -1)
- return -1;
+ goto done;
}
} else {
if (file_printf(ms, code) == -1)
- return -1;
+ goto done;
if (subtype) {
if (file_printf(ms, " ") == -1)
- return -1;
+ goto done;
if (file_printf(ms, subtype) == -1)
- return -1;
+ goto done;
}
if (file_printf(ms, " ") == -1)
- return -1;
+ goto done;
if (file_printf(ms, type) == -1)
- return -1;
+ goto done;
if (has_long_lines)
if (file_printf(ms, ", with very long lines") == -1)
- return -1;
+ goto done;
/*
* Only report line terminators if we find one other than LF,
@@ -293,51 +314,57 @@ subtype_identified:
if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
(n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
if (file_printf(ms, ", with") == -1)
- return -1;
+ goto done;
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
if (file_printf(ms, " no") == -1)
- return -1;
+ goto done;
} else {
if (n_crlf) {
if (file_printf(ms, " CRLF") == -1)
- return -1;
+ goto done;
if (n_cr || n_lf || n_nel)
if (file_printf(ms, ",") == -1)
- return -1;
+ goto done;
}
if (n_cr) {
if (file_printf(ms, " CR") == -1)
- return -1;
+ goto done;
if (n_lf || n_nel)
if (file_printf(ms, ",") == -1)
- return -1;
+ goto done;
}
if (n_lf) {
if (file_printf(ms, " LF") == -1)
- return -1;
+ goto done;
if (n_nel)
if (file_printf(ms, ",") == -1)
- return -1;
+ goto done;
}
if (n_nel)
if (file_printf(ms, " NEL") == -1)
- return -1;
+ goto done;
}
if (file_printf(ms, " line terminators") == -1)
- return -1;
+ goto done;
}
if (has_escapes)
if (file_printf(ms, ", with escape sequences") == -1)
- return -1;
+ goto done;
if (has_backspace)
if (file_printf(ms, ", with overstriking") == -1)
- return -1;
+ goto done;
}
+ rv = 1;
+done:
+ if (nbuf)
+ free(nbuf);
+ if (ubuf)
+ free(ubuf);
- return 1;
+ return rv;
}
private int
@@ -439,7 +466,7 @@ private int
looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
- int i;
+ size_t i;
*ulen = 0;
@@ -458,7 +485,7 @@ looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
private int
looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
- int i;
+ size_t i;
*ulen = 0;
@@ -478,7 +505,7 @@ private int
looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
- int i;
+ size_t i;
*ulen = 0;
@@ -497,7 +524,8 @@ looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
private int
looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
- int i, n;
+ size_t i;
+ int n;
unichar c;
int gotone = 0;
@@ -561,7 +589,7 @@ looks_unicode(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
int bigend;
- int i;
+ size_t i;
if (nbytes < 2)
return 0;
@@ -680,7 +708,7 @@ private unsigned char ebcdic_1047_to_8859[] = {
private void
from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
{
- int i;
+ size_t i;
for (i = 0; i < nbytes; i++) {
out[i] = ebcdic_to_ascii[buf[i]];