summaryrefslogtreecommitdiff
path: root/usr.bin/file/names.h
diff options
context:
space:
mode:
authorTed Unangst <tedu@cvs.openbsd.org>2004-05-19 02:32:37 +0000
committerTed Unangst <tedu@cvs.openbsd.org>2004-05-19 02:32:37 +0000
commit05cabd309934408959a36a9feb018f1bf1e897ee (patch)
treea528b45949c0f3f8e8f232f70213ce635a357a5d /usr.bin/file/names.h
parent2243525ab32c3771528c08a31aa8d25645ee7abc (diff)
big update to file 4.09. ok ian@
Diffstat (limited to 'usr.bin/file/names.h')
-rw-r--r--usr.bin/file/names.h125
1 files changed, 96 insertions, 29 deletions
diff --git a/usr.bin/file/names.h b/usr.bin/file/names.h
index d421f832720..3367cfe1294 100644
--- a/usr.bin/file/names.h
+++ b/usr.bin/file/names.h
@@ -1,12 +1,5 @@
-/* $OpenBSD: names.h,v 1.5 2003/06/13 18:31:14 deraadt Exp $ */
-
+/* $OpenBSD: names.h,v 1.6 2004/05/19 02:32:35 tedu Exp $ */
/*
- * Names.h - names and types used by ascmagic in file(1).
- * These tokens are here because they can appear anywhere in
- * the first HOWMANY bytes, while tokens in /etc/magic must
- * appear at fixed offsets into the file. Don't make HOWMANY
- * too high unless you have a very fast CPU.
- *
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
@@ -33,39 +26,108 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+/*
+ * Names.h - names and types used by ascmagic in file(1).
+ * These tokens are here because they can appear anywhere in
+ * the first HOWMANY bytes, while tokens in MAGIC must
+ * appear at fixed offsets into the file. Don't make HOWMANY
+ * too high unless you have a very fast CPU.
+ *
+ * $Id: names.h,v 1.6 2004/05/19 02:32:35 tedu Exp $
+ */
+
+/*
+ modified by Chris Lowth - 9 April 2000
+ to add mime type strings to the types table.
+*/
/* these types are used to index the table 'types': keep em in sync! */
-#define L_C 0 /* first and foremost on UNIX */
-#define L_CC 1 /* Bjarne's postincrement */
+#define L_C 0 /* first and foremost on UNIX */
+#define L_CC 1 /* Bjarne's postincrement */
#define L_FORT 2 /* the oldest one */
-#define L_MAKE 3 /* Makefiles */
-#define L_PLI 4 /* PL/1 */
-#define L_MACH 5 /* some kinda assembler */
-#define L_ENG 6 /* English */
+#define L_MAKE 3 /* Makefiles */
+#define L_PLI 4 /* PL/1 */
+#define L_MACH 5 /* some kinda assembler */
+#define L_ENG 6 /* English */
#define L_PAS 7 /* Pascal */
#define L_MAIL 8 /* Electronic mail */
#define L_NEWS 9 /* Usenet Netnews */
+#define L_JAVA 10 /* Java code */
+#define L_HTML 11 /* HTML */
+#define L_BCPL 12 /* BCPL */
+#define L_M4 13 /* M4 */
+#define L_PO 14 /* PO */
-static char *types[] = {
- "C program text",
- "C++ program text",
- "FORTRAN program text",
- "make commands text" ,
- "PL/1 program text",
- "assembler program text",
- "English text",
- "Pascal program text",
- "mail text",
- "news text",
- "can't happen error on names.h/types",
- 0};
+static const struct {
+ const char *human;
+ const char *mime;
+} types[] = {
+ { "C program", "text/x-c", },
+ { "C++ program", "text/x-c++" },
+ { "FORTRAN program", "text/x-fortran" },
+ { "make commands", "text/x-makefile" },
+ { "PL/1 program", "text/x-pl1" },
+ { "assembler program", "text/x-asm" },
+ { "English", "text/plain" },
+ { "Pascal program", "text/x-pascal" },
+ { "mail", "text/x-mail" },
+ { "news", "text/x-news" },
+ { "Java program", "text/x-java" },
+ { "HTML document", "text/html", },
+ { "BCPL program", "text/x-bcpl" },
+ { "M4 macro language pre-processor", "text/x-m4" },
+ { "PO (gettext message catalogue)", "text/x-po" },
+ { "cannot happen error on names.h/types", "error/x-error" },
+ { 0, 0}
+};
+/*
+ * XXX - how should we distinguish Java from C++?
+ * The trick used in a Debian snapshot, of having "extends" or "implements"
+ * as tags for Java, doesn't work very well, given that those keywords
+ * are often preceded by "class", which flags it as C++.
+ *
+ * Perhaps we need to be able to say
+ *
+ * If "class" then
+ *
+ * if "extends" or "implements" then
+ * Java
+ * else
+ * C++
+ * endif
+ *
+ * Or should we use other keywords, such as "package" or "import"?
+ * Unfortunately, Ada95 uses "package", and Modula-3 uses "import",
+ * although I infer from the language spec at
+ *
+ * http://www.research.digital.com/SRC/m3defn/html/m3.html
+ *
+ * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be
+ * in all caps.
+ *
+ * So, for now, we go with "import". We must put it before the C++
+ * stuff, so that we don't misidentify Java as C++. Not using "package"
+ * means we won't identify stuff that defines a package but imports
+ * nothing; hopefully, very little Java code imports nothing (one of the
+ * reasons for doing OO programming is to import as much as possible
+ * and write only what you need to, right?).
+ *
+ * Unfortunately, "import" may cause us to misidentify English text
+ * as Java, as it comes after "the" and "The". Perhaps we need a fancier
+ * heuristic to identify Java?
+ */
static struct names {
- char *name;
+ const char *name;
short type;
} names[] = {
/* These must be sorted by eye for optimal hit rate */
/* Add to this list only after substantial meditation */
+ {"msgid", L_PO},
+ {"dnl", L_M4},
+ {"import", L_JAVA},
+ {"\"libhdr\"", L_BCPL},
+ {"\"LIBHDR\"", L_BCPL},
{"//", L_CC},
{"template", L_CC},
{"virtual", L_CC},
@@ -80,7 +142,6 @@ static struct names {
{"double", L_C},
{"extern", L_C},
{"float", L_C},
- {"real", L_C},
{"struct", L_C},
{"union", L_C},
{"CFLAGS", L_MAKE},
@@ -115,6 +176,12 @@ static struct names {
{"Newsgroups:", L_NEWS},
{"Path:", L_NEWS},
{"Organization:",L_NEWS},
+ {"href=", L_HTML},
+ {"HREF=", L_HTML},
+ {"<body", L_HTML},
+ {"<BODY", L_HTML},
+ {"<html", L_HTML},
+ {"<HTML", L_HTML},
{NULL, 0}
};
#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)