diff options
author | Ted Unangst <tedu@cvs.openbsd.org> | 2004-05-19 02:32:37 +0000 |
---|---|---|
committer | Ted Unangst <tedu@cvs.openbsd.org> | 2004-05-19 02:32:37 +0000 |
commit | 05cabd309934408959a36a9feb018f1bf1e897ee (patch) | |
tree | a528b45949c0f3f8e8f232f70213ce635a357a5d /usr.bin/file/names.h | |
parent | 2243525ab32c3771528c08a31aa8d25645ee7abc (diff) |
big update to file 4.09. ok ian@
Diffstat (limited to 'usr.bin/file/names.h')
-rw-r--r-- | usr.bin/file/names.h | 125 |
1 files changed, 96 insertions, 29 deletions
diff --git a/usr.bin/file/names.h b/usr.bin/file/names.h index d421f832720..3367cfe1294 100644 --- a/usr.bin/file/names.h +++ b/usr.bin/file/names.h @@ -1,12 +1,5 @@ -/* $OpenBSD: names.h,v 1.5 2003/06/13 18:31:14 deraadt Exp $ */ - +/* $OpenBSD: names.h,v 1.6 2004/05/19 02:32:35 tedu Exp $ */ /* - * Names.h - names and types used by ascmagic in file(1). - * These tokens are here because they can appear anywhere in - * the first HOWMANY bytes, while tokens in /etc/magic must - * appear at fixed offsets into the file. Don't make HOWMANY - * too high unless you have a very fast CPU. - * * Copyright (c) Ian F. Darwin 1986-1995. * Software written by Ian F. Darwin and others; * maintained 1995-present by Christos Zoulas and others. @@ -33,39 +26,108 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +/* + * Names.h - names and types used by ascmagic in file(1). + * These tokens are here because they can appear anywhere in + * the first HOWMANY bytes, while tokens in MAGIC must + * appear at fixed offsets into the file. Don't make HOWMANY + * too high unless you have a very fast CPU. + * + * $Id: names.h,v 1.6 2004/05/19 02:32:35 tedu Exp $ + */ + +/* + modified by Chris Lowth - 9 April 2000 + to add mime type strings to the types table. +*/ /* these types are used to index the table 'types': keep em in sync! */ -#define L_C 0 /* first and foremost on UNIX */ -#define L_CC 1 /* Bjarne's postincrement */ +#define L_C 0 /* first and foremost on UNIX */ +#define L_CC 1 /* Bjarne's postincrement */ #define L_FORT 2 /* the oldest one */ -#define L_MAKE 3 /* Makefiles */ -#define L_PLI 4 /* PL/1 */ -#define L_MACH 5 /* some kinda assembler */ -#define L_ENG 6 /* English */ +#define L_MAKE 3 /* Makefiles */ +#define L_PLI 4 /* PL/1 */ +#define L_MACH 5 /* some kinda assembler */ +#define L_ENG 6 /* English */ #define L_PAS 7 /* Pascal */ #define L_MAIL 8 /* Electronic mail */ #define L_NEWS 9 /* Usenet Netnews */ +#define L_JAVA 10 /* Java code */ +#define L_HTML 11 /* HTML */ +#define L_BCPL 12 /* BCPL */ +#define L_M4 13 /* M4 */ +#define L_PO 14 /* PO */ -static char *types[] = { - "C program text", - "C++ program text", - "FORTRAN program text", - "make commands text" , - "PL/1 program text", - "assembler program text", - "English text", - "Pascal program text", - "mail text", - "news text", - "can't happen error on names.h/types", - 0}; +static const struct { + const char *human; + const char *mime; +} types[] = { + { "C program", "text/x-c", }, + { "C++ program", "text/x-c++" }, + { "FORTRAN program", "text/x-fortran" }, + { "make commands", "text/x-makefile" }, + { "PL/1 program", "text/x-pl1" }, + { "assembler program", "text/x-asm" }, + { "English", "text/plain" }, + { "Pascal program", "text/x-pascal" }, + { "mail", "text/x-mail" }, + { "news", "text/x-news" }, + { "Java program", "text/x-java" }, + { "HTML document", "text/html", }, + { "BCPL program", "text/x-bcpl" }, + { "M4 macro language pre-processor", "text/x-m4" }, + { "PO (gettext message catalogue)", "text/x-po" }, + { "cannot happen error on names.h/types", "error/x-error" }, + { 0, 0} +}; +/* + * XXX - how should we distinguish Java from C++? + * The trick used in a Debian snapshot, of having "extends" or "implements" + * as tags for Java, doesn't work very well, given that those keywords + * are often preceded by "class", which flags it as C++. + * + * Perhaps we need to be able to say + * + * If "class" then + * + * if "extends" or "implements" then + * Java + * else + * C++ + * endif + * + * Or should we use other keywords, such as "package" or "import"? + * Unfortunately, Ada95 uses "package", and Modula-3 uses "import", + * although I infer from the language spec at + * + * http://www.research.digital.com/SRC/m3defn/html/m3.html + * + * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be + * in all caps. + * + * So, for now, we go with "import". We must put it before the C++ + * stuff, so that we don't misidentify Java as C++. Not using "package" + * means we won't identify stuff that defines a package but imports + * nothing; hopefully, very little Java code imports nothing (one of the + * reasons for doing OO programming is to import as much as possible + * and write only what you need to, right?). + * + * Unfortunately, "import" may cause us to misidentify English text + * as Java, as it comes after "the" and "The". Perhaps we need a fancier + * heuristic to identify Java? + */ static struct names { - char *name; + const char *name; short type; } names[] = { /* These must be sorted by eye for optimal hit rate */ /* Add to this list only after substantial meditation */ + {"msgid", L_PO}, + {"dnl", L_M4}, + {"import", L_JAVA}, + {"\"libhdr\"", L_BCPL}, + {"\"LIBHDR\"", L_BCPL}, {"//", L_CC}, {"template", L_CC}, {"virtual", L_CC}, @@ -80,7 +142,6 @@ static struct names { {"double", L_C}, {"extern", L_C}, {"float", L_C}, - {"real", L_C}, {"struct", L_C}, {"union", L_C}, {"CFLAGS", L_MAKE}, @@ -115,6 +176,12 @@ static struct names { {"Newsgroups:", L_NEWS}, {"Path:", L_NEWS}, {"Organization:",L_NEWS}, + {"href=", L_HTML}, + {"HREF=", L_HTML}, + {"<body", L_HTML}, + {"<BODY", L_HTML}, + {"<html", L_HTML}, + {"<HTML", L_HTML}, {NULL, 0} }; #define NNAMES ((sizeof(names)/sizeof(struct names)) - 1) |