summaryrefslogtreecommitdiff
path: root/usr.bin/file
diff options
context:
space:
mode:
authorNicholas Marriott <nicm@cvs.openbsd.org>2015-04-24 16:24:12 +0000
committerNicholas Marriott <nicm@cvs.openbsd.org>2015-04-24 16:24:12 +0000
commita553ff22a8cd53c3eb353b19a1a0a362908494a9 (patch)
tree269caf640a9fa91c70d408594cf95449e6121854 /usr.bin/file
parent10a4abcb01b177650f75566f3a39cefe7a4948ec (diff)
New implementation of the file(1) utility. This is a simplified,
modernised version with a nearly complete magic(5) parser but omits some of the complex builtin tests (notably ELF) and has a reduced set of options. ok deraadt
Diffstat (limited to 'usr.bin/file')
-rw-r--r--usr.bin/file/LEGAL.NOTICE29
-rw-r--r--usr.bin/file/MAINT44
-rw-r--r--usr.bin/file/Makefile41
-rw-r--r--usr.bin/file/README108
-rw-r--r--usr.bin/file/apprentice.c2129
-rw-r--r--usr.bin/file/ascmagic.c789
-rw-r--r--usr.bin/file/compress.c479
-rw-r--r--usr.bin/file/config.h38
-rw-r--r--usr.bin/file/elfclass.h68
-rw-r--r--usr.bin/file/file.1490
-rw-r--r--usr.bin/file/file.c822
-rw-r--r--usr.bin/file/file.h404
-rw-r--r--usr.bin/file/file_opts.h49
-rw-r--r--usr.bin/file/fsmagic.c309
-rw-r--r--usr.bin/file/funcs.c332
-rw-r--r--usr.bin/file/is_tar.c153
-rw-r--r--usr.bin/file/magic-common.c83
-rw-r--r--usr.bin/file/magic-dump.c53
-rw-r--r--usr.bin/file/magic-load.c1020
-rw-r--r--usr.bin/file/magic-test.c1121
-rw-r--r--usr.bin/file/magic.c395
-rw-r--r--usr.bin/file/magic.h255
-rw-r--r--usr.bin/file/names.h174
-rw-r--r--usr.bin/file/patchlevel.h348
-rw-r--r--usr.bin/file/print.c233
-rw-r--r--usr.bin/file/readelf.c1020
-rw-r--r--usr.bin/file/readelf.h237
-rw-r--r--usr.bin/file/softmagic.c1821
-rw-r--r--usr.bin/file/tar.h74
-rw-r--r--usr.bin/file/text.c168
-rw-r--r--usr.bin/file/xmalloc.c103
-rw-r--r--usr.bin/file/xmalloc.h31
32 files changed, 3287 insertions, 10133 deletions
diff --git a/usr.bin/file/LEGAL.NOTICE b/usr.bin/file/LEGAL.NOTICE
deleted file mode 100644
index 630960a1411..00000000000
--- a/usr.bin/file/LEGAL.NOTICE
+++ /dev/null
@@ -1,29 +0,0 @@
-$OpenBSD: LEGAL.NOTICE,v 1.8 2008/05/08 01:40:56 chl Exp $
-Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
-Software written by Ian F. Darwin and others;
-maintained 1994- Christos Zoulas.
-
-This software is not subject to any export provision of the United States
-Department of Commerce, and may be exported to any country or planet.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-1. Redistributions of source code must retain the above copyright
- notice immediately at the beginning of the file, without modification,
- this list of conditions, and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
diff --git a/usr.bin/file/MAINT b/usr.bin/file/MAINT
deleted file mode 100644
index a7079c4b41f..00000000000
--- a/usr.bin/file/MAINT
+++ /dev/null
@@ -1,44 +0,0 @@
-$OpenBSD: MAINT,v 1.5 2009/04/24 18:54:34 chl Exp $
-
-Maintenance notes:
-
-I am continuing to maintain the file command. I welcome your help,
-but to make my life easier I'd like to request the following:
-
-- Do not distribute changed versions.
-
-People trying to be helpful occasionally put up their hacked versions
-of the file command for anonymous FTP, and people all over the
-world get copies of the hacked versions. Within a day or two I am
-getting email from around the world asking me why "my" file command
-won't compile!!! Needless to say this detracts from the limited
-time I have available to work on the actual software. Therefore I
-ask you again to please NOT distribute your changed version. If
-you need to make changes, please add a patch file next to the
-distribution tar, and a README file that clearly explains what you
-are trying to fix.
-
-Thank you for your assistance and cooperation.
-
-Code Overview
-
-This is a rough idea of the control flow from the main program:
-
-file.c main()
-file.c process (called for each file)
- printf file name
-magic.c magic_file()
-fsmagic.c file_fsmagic()
- (handles statbuf modes for DEV)
- (handles statbuf modes for executable &c.
- reads data from file.
-funcs.c: file_buffer()
-compress.c file_zmagic()
-is_tar.c file_is_tar()
-softmagic.c file_softmagic()
- match() - looks for match against main magic database
-ascmagic.c file_ascmagic()
-readelf.c file_tryelf()
- "unknown"
-
-Christos Zoulas (see README for email address)
diff --git a/usr.bin/file/Makefile b/usr.bin/file/Makefile
index 28cf145e4f1..5c65f930493 100644
--- a/usr.bin/file/Makefile
+++ b/usr.bin/file/Makefile
@@ -1,36 +1,39 @@
-# $OpenBSD: Makefile,v 1.12 2009/04/14 21:28:10 chl Exp $
+# $OpenBSD: Makefile,v 1.13 2015/04/24 16:24:11 nicm Exp $
+
+PROG= file
+SRCS= file.c magic-dump.c magic-load.c magic-test.c magic-common.c text.c \
+ xmalloc.c
+MAN= file.1 magic.5
+
+CDIAGFLAGS+= -Wno-long-long -Wall -W -Wnested-externs -Wformat=2
+CDIAGFLAGS+= -Wmissing-prototypes -Wstrict-prototypes -Wmissing-declarations
+CDIAGFLAGS+= -Wwrite-strings -Wshadow -Wpointer-arith -Wsign-compare
+CDIAGFLAGS+= -Wundef -Wbad-function-cast -Winline -Wcast-align
MAGIC= /etc/magic
MAGICOWN= root
MAGICGRP= bin
MAGICMODE= 444
-PROG= file
-SRCS= file.c apprentice.c fsmagic.c softmagic.c ascmagic.c is_tar.c \
- print.c compress.c readelf.c magic.c funcs.c
-CFLAGS+= -DMAGIC='"$(MAGIC)"' -DUSE_UTIMES -DHAVE_CONFIG_H
-MAN= file.1 magic.5
-
CLEANFILES+= magic post-magic
-all: file magic
-MAG1= $(.CURDIR)/magdir/Header\
- $(.CURDIR)/magdir/Localstuff\
+MAG1= $(.CURDIR)/magdir/Header \
+ $(.CURDIR)/magdir/Localstuff \
$(.CURDIR)/magdir/OpenBSD
MAGFILES= $(.CURDIR)/magdir/[0-9a-z]*
-post-magic: $(MAGFILES)
- for i in ${.ALLSRC:N*.orig}; \
- do \
+post-magic: $(MAGFILES)
+ for i in ${.ALLSRC:N*.orig}; do \
echo $$i; \
- done|sort|xargs -n 1024 cat > $(.TARGET)
-
-magic: $(MAG1) post-magic
- cat ${MAG1} post-magic > $(.TARGET)
+ done|sort|xargs -n 1024 cat >$(.TARGET)
+magic: $(MAG1) post-magic
+ cat ${MAG1} post-magic >$(.TARGET)
afterinstall:
- ${INSTALL} ${INSTALL_COPY} -o $(MAGICOWN) -g $(MAGICGRP) -m $(MAGICMODE) magic \
- $(DESTDIR)$(MAGIC)
+ ${INSTALL} ${INSTALL_COPY} -o $(MAGICOWN) -g $(MAGICGRP) \
+ -m $(MAGICMODE) magic $(DESTDIR)$(MAGIC)
+
+all: file magic
.include <bsd.prog.mk>
diff --git a/usr.bin/file/README b/usr.bin/file/README
deleted file mode 100644
index 0fada4f9f4f..00000000000
--- a/usr.bin/file/README
+++ /dev/null
@@ -1,108 +0,0 @@
-** README for file(1) Command **
-@(#) $OpenBSD: README,v 1.5 2009/04/24 18:54:34 chl Exp $
-
-E-mail: christos@astron.com
-Mailing List: file@mx.gw.com
-
-Phone: Do not even think of telephoning me about this program. Send cash first!
-
-This is Release 4.x of Ian Darwin's (copyright but distributable)
-file(1) command. This version is the standard "file" command for Linux,
-*BSD, and other systems. (See "patchlevel.h" for the exact release number).
-
-The major feature of 4.x is the refactoring of the code into a library,
-and the re-write of the file command in terms of that library. The library
-itself, libmagic can be used by 3rd party programs that wish to identify
-file types without having to fork() and exec() file. The prime contributor
-for 4.0 was M\xe5ns Rullg\xe5rd.
-
-UNIX is a trademark of UNIX System Laboratories.
-
-The prime contributor to Release 3.8 was Guy Harris, who put in megachanges
-including byte-order independence.
-
-The prime contributor to Release 3.0 was Christos Zoulas, who put
-in hundreds of lines of source code changes, including his own
-ANSIfication of the code (I liked my own ANSIfication better, but
-his (__P()) is the "Berkeley standard" way of doing it, and I wanted UCB
-to include the code...), his HP-like "indirection" (a feature of
-the HP file command, I think), and his mods that finally got the
-uncompress (-z) mode finished and working.
-
-This release has compiled in numerous environments; see PORTING
-for a list and problems.
-
-This fine freeware file(1) follows the USG (System V) model of the file
-command, rather than the Research (V7) version or the V7-derived 4.[23]
-Berkeley one. That is, the file /etc/magic contains much of the ritual
-information that is the source of this program's power. My version
-knows a little more magic (including tar archives) than System V; the
-/etc/magic parsing seems to be compatible with the (poorly documented)
-System V /etc/magic format (with one exception; see the man page).
-
-In addition, the /etc/magic file is built from a subdirectory
-for easier(?) maintenance. I will act as a clearinghouse for
-magic numbers assigned to all sorts of data files that
-are in reasonable circulation. Send your magic numbers,
-in magic(5) format please, to the maintainer, Christos Zoulas.
-
-LEGAL.NOTICE - read this first.
-README - read this second (you are currently reading this file).
-PORTING - read this only if the program won't compile.
-Makefile - read this next, adapt it as needed (particularly
- the location of the old existing file command and
- the man page layouts), type "make" to compile,
- "make try" to try it out against your old version.
- Expect some diffs, particularly since your original
- file(1) may not grok the embedded-space ("\ ") in
- the current magic file, or may even not use the
- magic file.
-apprentice.c - parses /etc/magic to learn magic
-ascmagic.c - third & last set of tests, based on hardwired assumptions.
-core - not included in distribution due to mailer limitations.
-debug.c - includes -c printout routine
-file.1 - man page for the command
-magic.4 - man page for the magic file, courtesy Guy Harris.
- Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile.
-file.c - main program
-file.h - header file
-fsmagic.c - first set of tests the program runs, based on filesystem info
-is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore).
-magdir - directory of /etc/magic pieces
- magdir/Makefile - ADJUST THIS FOR YOUR CONFIGURATION
-names.h - header file for ascmagic.c
-softmagic.c - 2nd set of tests, based on /etc/magic
-readelf.[ch] - Stand-alone elf parsing code.
-compress.c - on-the-fly decompression.
-print.c - print results, errors, warnings.
-
-You can download the latest version of file from:
-
- ftp://ftp.astron.com/pub/file/
-
-If your gzip sometimes fails to decompress things complaining about a short
-file, apply this patch [which is going to be in the next version of gzip]:
-*** - Tue Oct 29 02:06:35 1996
---- util.c Sun Jul 21 21:51:38 1996
-*** 106,111 ****
---- 108,114 ----
-
- if (insize == 0) {
- if (eof_ok) return EOF;
-+ flush_window();
- read_error();
- }
- bytes_in += (ulg)insize;
-
-Parts of this software were developed at SoftQuad Inc., developers
-of SGML/HTML/XML publishing software, in Toronto, Canada.
-SoftQuad was swallowed up by Corel in 2002
-and does not exist any longer.
-
-From: Kees Zeelenberg
-
-An MS-Windows (Win32) port of File-4.17 is available from
-http://gnuwin32.sourceforge.net/
-
-File is an implementation of the Unix File(1) command.
-It knows the 'magic number' of several thousands of file types.
diff --git a/usr.bin/file/apprentice.c b/usr.bin/file/apprentice.c
deleted file mode 100644
index 295c899abf4..00000000000
--- a/usr.bin/file/apprentice.c
+++ /dev/null
@@ -1,2129 +0,0 @@
-/* $OpenBSD: apprentice.c,v 1.34 2015/01/16 18:08:15 millert Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * apprentice - make one pass through /etc/magic, learning its secrets.
- */
-
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "file.h"
-#include "magic.h"
-#include "patchlevel.h"
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <limits.h>
-#include <string.h>
-#include <assert.h>
-#include <ctype.h>
-#include <fcntl.h>
-#ifdef QUICK
-#include <sys/mman.h>
-#endif
-#include <dirent.h>
-
-#define EATAB {while (isascii((unsigned char) *l) && \
- isspace((unsigned char) *l)) ++l;}
-#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
- tolower((unsigned char) (l)) : (l))
-/*
- * Work around a bug in headers on Digital Unix.
- * At least confirmed for: OSF1 V4.0 878
- */
-#if defined(__osf__) && defined(__DECC)
-#ifdef MAP_FAILED
-#undef MAP_FAILED
-#endif
-#endif
-
-#ifndef MAP_FAILED
-#define MAP_FAILED (void *) -1
-#endif
-
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-
-struct magic_entry {
- struct magic *mp;
- uint32_t cont_count;
- uint32_t max_count;
-};
-
-int file_formats[FILE_NAMES_SIZE];
-const size_t file_nformats = FILE_NAMES_SIZE;
-const char *file_names[FILE_NAMES_SIZE];
-const size_t file_nnames = FILE_NAMES_SIZE;
-
-private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
-private int hextoint(int);
-private const char *getstr(struct magic_set *, const char *, char *, int,
- int *, int);
-private int parse(struct magic_set *, struct magic_entry **, uint32_t *,
- const char *, size_t, int);
-private int parse_mime(struct magic_set *, struct magic_entry **, uint32_t *,
- const char *);
-private void eatsize(const char **);
-private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
-private size_t apprentice_magic_strength(const struct magic *);
-private int apprentice_sort(const void *, const void *);
-private int apprentice_load(struct magic_set *, struct magic **, uint32_t *,
- const char *, int);
-private void byteswap(struct magic *, uint32_t);
-private void bs1(struct magic *);
-private uint16_t swap2(uint16_t);
-private uint32_t swap4(uint32_t);
-private uint64_t swap8(uint64_t);
-private void mkdbname(const char *, char **, int);
-private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
- const char *);
-private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
- const char *);
-private int check_format_type(const char *, int);
-private int check_format(struct magic_set *, struct magic *);
-private int get_op(char);
-
-private size_t maxmagic = 0;
-private size_t magicsize = sizeof(struct magic);
-
-private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
-private const char mime_marker[] = "!:mime";
-private const size_t mime_marker_len = sizeof(mime_marker) - 1;
-
-#ifdef COMPILE_ONLY
-
-int main(int, char *[]);
-
-int
-main(int argc, char *argv[])
-{
- int ret;
- struct magic_set *ms;
- char *progname;
-
- if ((progname = strrchr(argv[0], '/')) != NULL)
- progname++;
- else
- progname = argv[0];
-
- if (argc != 2) {
- (void)fprintf(stderr, "Usage: %s file\n", progname);
- return 1;
- }
-
- if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
- (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
- return 1;
- }
- ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
- if (ret == 1)
- (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
- magic_close(ms);
- return ret;
-}
-#endif /* COMPILE_ONLY */
-
-static const struct type_tbl_s {
- const char name[16];
- const size_t len;
- const int type;
- const int format;
-} type_tbl[] = {
-# define XX(s) s, (sizeof(s) - 1)
-# define XX_NULL "", 0
- { XX("byte"), FILE_BYTE, FILE_FMT_NUM },
- { XX("short"), FILE_SHORT, FILE_FMT_NUM },
- { XX("default"), FILE_DEFAULT, FILE_FMT_STR },
- { XX("long"), FILE_LONG, FILE_FMT_NUM },
- { XX("string"), FILE_STRING, FILE_FMT_STR },
- { XX("date"), FILE_DATE, FILE_FMT_STR },
- { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM },
- { XX("belong"), FILE_BELONG, FILE_FMT_NUM },
- { XX("bedate"), FILE_BEDATE, FILE_FMT_STR },
- { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM },
- { XX("lelong"), FILE_LELONG, FILE_FMT_NUM },
- { XX("ledate"), FILE_LEDATE, FILE_FMT_STR },
- { XX("pstring"), FILE_PSTRING, FILE_FMT_STR },
- { XX("ldate"), FILE_LDATE, FILE_FMT_STR },
- { XX("beldate"), FILE_BELDATE, FILE_FMT_STR },
- { XX("leldate"), FILE_LELDATE, FILE_FMT_STR },
- { XX("regex"), FILE_REGEX, FILE_FMT_STR },
- { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR },
- { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR },
- { XX("search"), FILE_SEARCH, FILE_FMT_STR },
- { XX("medate"), FILE_MEDATE, FILE_FMT_STR },
- { XX("meldate"), FILE_MELDATE, FILE_FMT_STR },
- { XX("melong"), FILE_MELONG, FILE_FMT_NUM },
- { XX("quad"), FILE_QUAD, FILE_FMT_QUAD },
- { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD },
- { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD },
- { XX("qdate"), FILE_QDATE, FILE_FMT_STR },
- { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR },
- { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR },
- { XX("qldate"), FILE_QLDATE, FILE_FMT_STR },
- { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR },
- { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR },
- { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT },
- { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT },
- { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT },
- { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE },
- { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE },
- { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE },
- { XX_NULL, FILE_INVALID, FILE_FMT_NONE },
-# undef XX
-# undef XX_NULL
-};
-
-private int
-get_type(const char *l, const char **t)
-{
- const struct type_tbl_s *p;
-
- for (p = type_tbl; p->len; p++) {
- if (strncmp(l, p->name, p->len) == 0) {
- if (t)
- *t = l + p->len;
- break;
- }
- }
- return p->type;
-}
-
-private void
-init_file_tables(void)
-{
- static int done = 0;
- const struct type_tbl_s *p;
-
- if (done)
- return;
- done++;
-
- for (p = type_tbl; p->len; p++) {
- assert(p->type < FILE_NAMES_SIZE);
- file_names[p->type] = p->name;
- file_formats[p->type] = p->format;
- }
-}
-
-/*
- * Handle one file or directory.
- */
-private int
-apprentice_1(struct magic_set *ms, const char *fn, int action,
- struct mlist *mlist)
-{
- struct magic *magic = NULL;
- uint32_t nmagic = 0;
- struct mlist *ml;
- int rv = -1;
- int mapped;
-
- if (magicsize != FILE_MAGICSIZE) {
- file_error(ms, 0, "magic element size %lu != %lu",
- (unsigned long)sizeof(*magic),
- (unsigned long)FILE_MAGICSIZE);
- return -1;
- }
-
- if (action == FILE_COMPILE) {
- rv = apprentice_load(ms, &magic, &nmagic, fn, action);
- if (rv != 0)
- return -1;
- rv = apprentice_compile(ms, &magic, &nmagic, fn);
- free(magic);
- return rv;
- }
-
-#ifndef COMPILE_ONLY
- if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "using regular magic file `%s'", fn);
- rv = apprentice_load(ms, &magic, &nmagic, fn, action);
- if (rv != 0)
- return -1;
- }
-
- mapped = rv;
-
- if (magic == NULL) {
- file_delmagic(magic, mapped, nmagic);
- return -1;
- }
-
- if ((ml = malloc(sizeof(*ml))) == NULL) {
- file_delmagic(magic, mapped, nmagic);
- file_oomem(ms, sizeof(*ml));
- return -1;
- }
-
- ml->magic = magic;
- ml->nmagic = nmagic;
- ml->mapped = mapped;
-
- mlist->prev->next = ml;
- ml->prev = mlist->prev;
- ml->next = mlist;
- mlist->prev = ml;
-
- return 0;
-#endif /* COMPILE_ONLY */
-}
-
-protected void
-file_delmagic(struct magic *p, int type, size_t entries)
-{
- if (p == NULL)
- return;
- switch (type) {
-#ifdef QUICK
- case 2:
- p--;
- (void)munmap((void *)p, sizeof(*p) * (entries + 1));
- break;
-#endif
- case 1:
- p--;
- /*FALLTHROUGH*/
- case 0:
- free(p);
- break;
- default:
- abort();
- }
-}
-
-/* const char *fn: list of magic files and directories */
-protected struct mlist *
-file_apprentice(struct magic_set *ms, const char *fn, int action)
-{
- char *p, *mfn;
- int file_err, errs = -1;
- struct mlist *mlist;
-
- init_file_tables();
-
- if (fn == NULL)
- fn = getenv("MAGIC");
- if (fn == NULL)
- fn = MAGIC;
-
- if ((mfn = strdup(fn)) == NULL) {
- file_oomem(ms, strlen(fn));
- return NULL;
- }
- fn = mfn;
-
- if ((mlist = malloc(sizeof(*mlist))) == NULL) {
- free(mfn);
- file_oomem(ms, sizeof(*mlist));
- return NULL;
- }
- mlist->next = mlist->prev = mlist;
-
- while (fn) {
- p = strchr(fn, PATHSEP);
- if (p)
- *p++ = '\0';
- if (*fn == '\0')
- break;
- file_err = apprentice_1(ms, fn, action, mlist);
- errs = MAX(errs, file_err);
- fn = p;
- }
- if (errs == -1) {
- free(mfn);
- free(mlist);
- mlist = NULL;
- file_error(ms, 0, "could not find any magic files!");
- return NULL;
- }
- free(mfn);
- return mlist;
-}
-
-/*
- * Get weight of this magic entry, for sorting purposes.
- */
-private size_t
-apprentice_magic_strength(const struct magic *m)
-{
-#define MULT 10
- size_t val = 2 * MULT; /* baseline strength */
-
- switch (m->type) {
- case FILE_DEFAULT: /* make sure this sorts last */
- return 0;
-
- case FILE_BYTE:
- val += 1 * MULT;
- break;
-
- case FILE_SHORT:
- case FILE_LESHORT:
- case FILE_BESHORT:
- val += 2 * MULT;
- break;
-
- case FILE_LONG:
- case FILE_LELONG:
- case FILE_BELONG:
- case FILE_MELONG:
- val += 4 * MULT;
- break;
-
- case FILE_PSTRING:
- case FILE_STRING:
- val += m->vallen * MULT;
- break;
-
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- val += m->vallen * MULT / 2;
- break;
-
- case FILE_SEARCH:
- case FILE_REGEX:
- val += m->vallen * MAX(MULT / m->vallen, 1);
- break;
-
- case FILE_DATE:
- case FILE_LEDATE:
- case FILE_BEDATE:
- case FILE_MEDATE:
- case FILE_LDATE:
- case FILE_LELDATE:
- case FILE_BELDATE:
- case FILE_MELDATE:
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- val += 4 * MULT;
- break;
-
- case FILE_QUAD:
- case FILE_BEQUAD:
- case FILE_LEQUAD:
- case FILE_QDATE:
- case FILE_LEQDATE:
- case FILE_BEQDATE:
- case FILE_QLDATE:
- case FILE_LEQLDATE:
- case FILE_BEQLDATE:
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- val += 8 * MULT;
- break;
-
- default:
- val = 0;
- (void)fprintf(stderr, "Bad type %d\n", m->type);
- abort();
- }
-
- switch (m->reln) {
- case 'x': /* matches anything penalize */
- case '!': /* matches almost anything penalize */
- val = 0;
- break;
-
- case '=': /* Exact match, prefer */
- val += MULT;
- break;
-
- case '>':
- case '<': /* comparison match reduce strength */
- val -= 2 * MULT;
- break;
-
- case '^':
- case '&': /* masking bits, we could count them too */
- val -= MULT;
- break;
-
- default:
- (void)fprintf(stderr, "Bad relation %c\n", m->reln);
- abort();
- }
-
- if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */
- val = 1;
-
- return val;
-}
-
-/*
- * Sort callback for sorting entries by "strength" (basically length)
- */
-private int
-apprentice_sort(const void *a, const void *b)
-{
- const struct magic_entry *ma = a;
- const struct magic_entry *mb = b;
- size_t sa = apprentice_magic_strength(ma->mp);
- size_t sb = apprentice_magic_strength(mb->mp);
- if (sa == sb)
- return 0;
- else if (sa > sb)
- return -1;
- else
- return 1;
-}
-
-private void
-set_test_type(struct magic *mstart, struct magic *m)
-{
- switch (m->type) {
- case FILE_BYTE:
- case FILE_SHORT:
- case FILE_LONG:
- case FILE_DATE:
- case FILE_BESHORT:
- case FILE_BELONG:
- case FILE_BEDATE:
- case FILE_LESHORT:
- case FILE_LELONG:
- case FILE_LEDATE:
- case FILE_LDATE:
- case FILE_BELDATE:
- case FILE_LELDATE:
- case FILE_MEDATE:
- case FILE_MELDATE:
- case FILE_MELONG:
- case FILE_QUAD:
- case FILE_LEQUAD:
- case FILE_BEQUAD:
- case FILE_QDATE:
- case FILE_LEQDATE:
- case FILE_BEQDATE:
- case FILE_QLDATE:
- case FILE_LEQLDATE:
- case FILE_BEQLDATE:
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- case FILE_STRING:
- case FILE_PSTRING:
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- /* binary test, set flag */
- mstart->flag |= BINTEST;
- break;
- case FILE_REGEX:
- case FILE_SEARCH:
- /* binary test if pattern is not text */
- if (file_looks_utf8(m->value.s, m->vallen, NULL, NULL) == 0)
- mstart->flag |= BINTEST;
- break;
- case FILE_DEFAULT:
- /* can't deduce anything; we shouldn't see this at the
- top level anyway */
- break;
- case FILE_INVALID:
- default:
- /* invalid search type, but no need to complain here */
- break;
- }
-}
-
-/*
- * Load and parse one file.
- */
-private void
-load_1(struct magic_set *ms, int action, const char *fn, int *errs,
- struct magic_entry **marray, uint32_t *marraycount)
-{
- char line[BUFSIZ];
- size_t lineno = 0;
- FILE *f = fopen(ms->file = fn, "r");
- if (f == NULL) {
- if (errno != ENOENT)
- file_error(ms, errno, "cannot read magic file `%s'",
- fn);
- (*errs)++;
- } else {
- /* read and parse this file */
- for (ms->line = 1; fgets(line, sizeof(line), f) != NULL; ms->line++) {
- size_t len;
- len = strlen(line);
- if (len == 0) /* null line, garbage, etc */
- continue;
- if (line[len - 1] == '\n') {
- lineno++;
- line[len - 1] = '\0'; /* delete newline */
- }
- if (line[0] == '\0') /* empty, do not parse */
- continue;
- if (line[0] == '#') /* comment, do not parse */
- continue;
- if (len > mime_marker_len &&
- memcmp(line, mime_marker, mime_marker_len) == 0) {
- /* MIME type */
- if (parse_mime(ms, marray, marraycount,
- line + mime_marker_len) != 0)
- (*errs)++;
- continue;
- }
- if (parse(ms, marray, marraycount, line, lineno, action) != 0)
- (*errs)++;
- }
-
- (void)fclose(f);
- }
-}
-
-/*
- * parse a file or directory of files
- * const char *fn: name of magic file or directory
- */
-private int
-apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
- const char *fn, int action)
-{
- int errs = 0;
- struct magic_entry *marray;
- uint32_t marraycount, i, mentrycount = 0, starttest;
- char subfn[PATH_MAX];
- struct stat st;
- DIR *dir;
- struct dirent *d;
-
- ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */
-
- maxmagic = MAXMAGIS;
- if ((marray = calloc(maxmagic, sizeof(*marray))) == NULL) {
- file_oomem2(ms, maxmagic, sizeof(*marray));
- return -1;
- }
- marraycount = 0;
-
- /* print silly verbose header for USG compat. */
- if (action == FILE_CHECK)
- (void)fprintf(stderr, "%s\n", usg_hdr);
-
- /* load directory or file */
- if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
- dir = opendir(fn);
- if (dir) {
- while ((d = readdir(dir)) != NULL) {
- snprintf(subfn, sizeof(subfn), "%s/%s",
- fn, d->d_name);
- if (stat(subfn, &st) == 0 && S_ISREG(st.st_mode)) {
- load_1(ms, action, subfn, &errs,
- &marray, &marraycount);
- }
- }
- closedir(dir);
- } else
- errs++;
- } else
- load_1(ms, action, fn, &errs, &marray, &marraycount);
- if (errs)
- goto out;
-
- /* Set types of tests */
- for (i = 0; i < marraycount; ) {
- if (marray[i].mp->cont_level != 0) {
- i++;
- continue;
- }
-
- starttest = i;
- do {
- set_test_type(marray[starttest].mp, marray[i].mp);
- if (ms->flags & MAGIC_DEBUG) {
- (void)fprintf(stderr, "%s%s%s: %s\n",
- marray[i].mp->mimetype,
- marray[i].mp->mimetype[0] == '\0' ? "" : "; ",
- marray[i].mp->desc[0] ? marray[i].mp->desc : "(no description)",
- marray[i].mp->flag & BINTEST ? "binary" : "text");
- if (marray[i].mp->flag & BINTEST) {
-#define SYMBOL "text"
-#define SYMLEN sizeof(SYMBOL)
- char *p = strstr(marray[i].mp->desc, "text");
- if (p && (p == marray[i].mp->desc || isspace(p[-1])) &&
- (p + SYMLEN - marray[i].mp->desc == MAXstring ||
- (p[SYMLEN] == '\0' || isspace(p[SYMLEN])))) {
- (void)fprintf(stderr,
- "*** Possible binary test for text type\n");
- }
-#undef SYMBOL
-#undef SYMLEN
- }
- }
- } while (++i < marraycount && marray[i].mp->cont_level != 0);
- }
-
- qsort(marray, marraycount, sizeof(*marray), apprentice_sort);
-
- /*
- * Make sure that any level 0 "default" line is last (if one exists).
- */
- for (i = 0; i < marraycount; i++) {
- if (marray[i].mp->cont_level == 0 &&
- marray[i].mp->type == FILE_DEFAULT) {
- while (++i < marraycount)
- if (marray[i].mp->cont_level == 0)
- break;
- if (i != marraycount) {
- ms->line = marray[i].mp->lineno; /* XXX - Ugh! */
- file_magwarn(ms,
- "level 0 \"default\" did not sort last");
- }
- break;
- }
- }
-
- for (i = 0; i < marraycount; i++)
- mentrycount += marray[i].cont_count;
-
- if ((*magicp = reallocarray(NULL, mentrycount, sizeof(**magicp))) == NULL) {
- file_oomem2(ms, mentrycount, sizeof(**magicp));
- errs++;
- goto out;
- }
-
- mentrycount = 0;
- for (i = 0; i < marraycount; i++) {
- (void)memcpy(*magicp + mentrycount, marray[i].mp,
- marray[i].cont_count * sizeof(**magicp));
- mentrycount += marray[i].cont_count;
- }
-out:
- for (i = 0; i < marraycount; i++)
- free(marray[i].mp);
- free(marray);
- if (errs) {
- *magicp = NULL;
- *nmagicp = 0;
- return errs;
- } else {
- *nmagicp = mentrycount;
- return 0;
- }
-
-}
-
-/*
- * extend the sign bit if the comparison is to be signed
- */
-protected uint64_t
-file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
-{
- if (!(m->flag & UNSIGNED)) {
- switch(m->type) {
- /*
- * Do not remove the casts below. They are
- * vital. When later compared with the data,
- * the sign extension must have happened.
- */
- case FILE_BYTE:
- v = (char) v;
- break;
- case FILE_SHORT:
- case FILE_BESHORT:
- case FILE_LESHORT:
- v = (short) v;
- break;
- case FILE_DATE:
- case FILE_BEDATE:
- case FILE_LEDATE:
- case FILE_MEDATE:
- case FILE_LDATE:
- case FILE_BELDATE:
- case FILE_LELDATE:
- case FILE_MELDATE:
- case FILE_LONG:
- case FILE_BELONG:
- case FILE_LELONG:
- case FILE_MELONG:
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- v = (int32_t) v;
- break;
- case FILE_QUAD:
- case FILE_BEQUAD:
- case FILE_LEQUAD:
- case FILE_QDATE:
- case FILE_QLDATE:
- case FILE_BEQDATE:
- case FILE_BEQLDATE:
- case FILE_LEQDATE:
- case FILE_LEQLDATE:
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- v = (int64_t) v;
- break;
- case FILE_STRING:
- case FILE_PSTRING:
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- case FILE_REGEX:
- case FILE_SEARCH:
- case FILE_DEFAULT:
- break;
- default:
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "cannot happen: m->type=%d\n",
- m->type);
- return ~0U;
- }
- }
- return v;
-}
-
-private int
-string_modifier_check(struct magic_set *ms, struct magic *m)
-{
- if ((ms->flags & MAGIC_CHECK) == 0)
- return 0;
-
- switch (m->type) {
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- if (m->str_flags != 0) {
- file_magwarn(ms,
- "no modifiers allowed for 16-bit strings\n");
- return -1;
- }
- break;
- case FILE_STRING:
- case FILE_PSTRING:
- if ((m->str_flags & REGEX_OFFSET_START) != 0) {
- file_magwarn(ms,
- "'/%c' only allowed on regex and search\n",
- CHAR_REGEX_OFFSET_START);
- return -1;
- }
- break;
- case FILE_SEARCH:
- if (m->str_range == 0) {
- file_magwarn(ms,
- "missing range; defaulting to %d\n",
- STRING_DEFAULT_RANGE);
- m->str_range = STRING_DEFAULT_RANGE;
- return -1;
- }
- break;
- case FILE_REGEX:
- if ((m->str_flags & STRING_COMPACT_BLANK) != 0) {
- file_magwarn(ms, "'/%c' not allowed on regex\n",
- CHAR_COMPACT_BLANK);
- return -1;
- }
- if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) {
- file_magwarn(ms, "'/%c' not allowed on regex\n",
- CHAR_COMPACT_OPTIONAL_BLANK);
- return -1;
- }
- break;
- default:
- file_magwarn(ms, "coding error: m->type=%d\n",
- m->type);
- return -1;
- }
- return 0;
-}
-
-private int
-get_op(char c)
-{
- switch (c) {
- case '&':
- return FILE_OPAND;
- case '|':
- return FILE_OPOR;
- case '^':
- return FILE_OPXOR;
- case '+':
- return FILE_OPADD;
- case '-':
- return FILE_OPMINUS;
- case '*':
- return FILE_OPMULTIPLY;
- case '/':
- return FILE_OPDIVIDE;
- case '%':
- return FILE_OPMODULO;
- default:
- return -1;
- }
-}
-
-#ifdef ENABLE_CONDITIONALS
-private int
-get_cond(const char *l, const char **t)
-{
- static const struct cond_tbl_s {
- char name[8];
- size_t len;
- int cond;
- } cond_tbl[] = {
- { "if", 2, COND_IF },
- { "elif", 4, COND_ELIF },
- { "else", 4, COND_ELSE },
- { "", 0, COND_NONE },
- };
- const struct cond_tbl_s *p;
-
- for (p = cond_tbl; p->len; p++) {
- if (strncmp(l, p->name, p->len) == 0 &&
- isspace((unsigned char)l[p->len])) {
- if (t)
- *t = l + p->len;
- break;
- }
- }
- return p->cond;
-}
-
-private int
-check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
-{
- int last_cond;
- last_cond = ms->c.li[cont_level].last_cond;
-
- switch (cond) {
- case COND_IF:
- if (last_cond != COND_NONE && last_cond != COND_ELIF) {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "syntax error: `if'");
- return -1;
- }
- last_cond = COND_IF;
- break;
-
- case COND_ELIF:
- if (last_cond != COND_IF && last_cond != COND_ELIF) {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "syntax error: `elif'");
- return -1;
- }
- last_cond = COND_ELIF;
- break;
-
- case COND_ELSE:
- if (last_cond != COND_IF && last_cond != COND_ELIF) {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "syntax error: `else'");
- return -1;
- }
- last_cond = COND_NONE;
- break;
-
- case COND_NONE:
- last_cond = COND_NONE;
- break;
- }
-
- ms->c.li[cont_level].last_cond = last_cond;
- return 0;
-}
-#endif /* ENABLE_CONDITIONALS */
-
-/*
- * parse one line from magic file, put into magic[index++] if valid
- */
-private int
-parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
- const char *line, size_t lineno, int action)
-{
-#ifdef ENABLE_CONDITIONALS
- static uint32_t last_cont_level = 0;
-#endif
- size_t i;
- struct magic_entry *me;
- struct magic *m;
- const char *l = line;
- char *t;
- int op;
- uint32_t cont_level;
-
- cont_level = 0;
-
- while (*l == '>') {
- ++l; /* step over */
- cont_level++;
- }
-#ifdef ENABLE_CONDITIONALS
- if (cont_level == 0 || cont_level > last_cont_level)
- if (file_check_mem(ms, cont_level) == -1)
- return -1;
- last_cont_level = cont_level;
-#endif
-
-#define ALLOC_CHUNK (size_t)10
-#define ALLOC_INCR (size_t)200
-
- if (cont_level != 0) {
- if (*nmentryp == 0) {
- file_error(ms, 0, "No current entry for continuation");
- return -1;
- }
- me = &(*mentryp)[*nmentryp - 1];
- if (me->cont_count == me->max_count) {
- struct magic *nm;
- size_t cnt = me->max_count + ALLOC_CHUNK;
- if ((nm = reallocarray(me->mp, cnt, sizeof(*nm)))
- == NULL) {
- file_oomem2(ms, cnt, sizeof(*nm));
- return -1;
- }
- me->mp = m = nm;
- me->max_count = cnt;
- }
- m = &me->mp[me->cont_count++];
- (void)memset(m, 0, sizeof(*m));
- m->cont_level = cont_level;
- } else {
- if (*nmentryp == maxmagic) {
- struct magic_entry *mp;
-
- maxmagic += ALLOC_INCR;
- if ((mp = reallocarray(*mentryp, maxmagic,
- sizeof(*mp))) == NULL) {
- file_oomem2(ms, maxmagic, sizeof(*mp));
- return -1;
- }
- (void)memset(&mp[*nmentryp], 0, sizeof(*mp) *
- ALLOC_INCR);
- *mentryp = mp;
- }
- me = &(*mentryp)[*nmentryp];
- if (me->mp == NULL) {
- if ((m = reallocarray(NULL, ALLOC_CHUNK, sizeof(*m))) == NULL) {
- file_oomem2(ms, ALLOC_CHUNK, sizeof(*m));
- return -1;
- }
- me->mp = m;
- me->max_count = ALLOC_CHUNK;
- } else
- m = me->mp;
- (void)memset(m, 0, sizeof(*m));
- m->cont_level = 0;
- me->cont_count = 1;
- }
- m->lineno = lineno;
-
- if (*l == '&') { /* m->cont_level == 0 checked below. */
- ++l; /* step over */
- m->flag |= OFFADD;
- }
- if (*l == '(') {
- ++l; /* step over */
- m->flag |= INDIR;
- if (m->flag & OFFADD)
- m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
-
- if (*l == '&') { /* m->cont_level == 0 checked below */
- ++l; /* step over */
- m->flag |= OFFADD;
- }
- }
- /* Indirect offsets are not valid at level 0. */
- if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "relative offset at level 0");
-
- /* get offset, then skip over it */
- m->offset = (uint32_t)strtoul(l, &t, 0);
- if (l == t)
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "offset `%s' invalid", l);
- l = t;
-
- if (m->flag & INDIR) {
- m->in_type = FILE_LONG;
- m->in_offset = 0;
- /*
- * read [.lbs][+-]nnnnn)
- */
- if (*l == '.') {
- l++;
- switch (*l) {
- case 'l':
- m->in_type = FILE_LELONG;
- break;
- case 'L':
- m->in_type = FILE_BELONG;
- break;
- case 'm':
- m->in_type = FILE_MELONG;
- break;
- case 'h':
- case 's':
- m->in_type = FILE_LESHORT;
- break;
- case 'H':
- case 'S':
- m->in_type = FILE_BESHORT;
- break;
- case 'c':
- case 'b':
- case 'C':
- case 'B':
- m->in_type = FILE_BYTE;
- break;
- case 'e':
- case 'f':
- case 'g':
- m->in_type = FILE_LEDOUBLE;
- break;
- case 'E':
- case 'F':
- case 'G':
- m->in_type = FILE_BEDOUBLE;
- break;
- default:
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms,
- "indirect offset type `%c' invalid",
- *l);
- break;
- }
- l++;
- }
-
- m->in_op = 0;
- if (*l == '~') {
- m->in_op |= FILE_OPINVERSE;
- l++;
- }
- if ((op = get_op(*l)) != -1) {
- m->in_op |= op;
- l++;
- }
- if (*l == '(') {
- m->in_op |= FILE_OPINDIRECT;
- l++;
- }
- if (isdigit((unsigned char)*l) || *l == '-') {
- m->in_offset = (int32_t)strtol(l, &t, 0);
- if (l == t)
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms,
- "in_offset `%s' invalid", l);
- l = t;
- }
- if (*l++ != ')' ||
- ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms,
- "missing ')' in indirect offset");
- }
- EATAB;
-
-#ifdef ENABLE_CONDITIONALS
- m->cond = get_cond(l, &l);
- if (check_cond(ms, m->cond, cont_level) == -1)
- return -1;
-
- EATAB;
-#endif
-
- if (*l == 'u') {
- ++l;
- m->flag |= UNSIGNED;
- }
-
- m->type = get_type(l, &l);
- if (m->type == FILE_INVALID) {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "type `%s' invalid", l);
- return -1;
- }
-
- /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
- /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
-
- m->mask_op = 0;
- if (*l == '~') {
- if (!IS_STRING(m->type))
- m->mask_op |= FILE_OPINVERSE;
- else if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "'~' invalid for string types");
- ++l;
- }
- m->str_range = 0;
- m->str_flags = 0;
- m->num_mask = 0;
- if ((op = get_op(*l)) != -1) {
- if (!IS_STRING(m->type)) {
- uint64_t val;
- ++l;
- m->mask_op |= op;
- val = (uint64_t)strtoull(l, &t, 0);
- l = t;
- m->num_mask = file_signextend(ms, m, val);
- eatsize(&l);
- }
- else if (op == FILE_OPDIVIDE) {
- int have_range = 0;
- while (!isspace((unsigned char)*++l)) {
- switch (*l) {
- case '0': case '1': case '2':
- case '3': case '4': case '5':
- case '6': case '7': case '8':
- case '9':
- if (have_range &&
- (ms->flags & MAGIC_CHECK))
- file_magwarn(ms,
- "multiple ranges");
- have_range = 1;
- m->str_range = strtoul(l, &t, 0);
- if (m->str_range == 0)
- file_magwarn(ms,
- "zero range");
- l = t - 1;
- break;
- case CHAR_COMPACT_BLANK:
- m->str_flags |= STRING_COMPACT_BLANK;
- break;
- case CHAR_COMPACT_OPTIONAL_BLANK:
- m->str_flags |=
- STRING_COMPACT_OPTIONAL_BLANK;
- break;
- case CHAR_IGNORE_LOWERCASE:
- m->str_flags |= STRING_IGNORE_LOWERCASE;
- break;
- case CHAR_IGNORE_UPPERCASE:
- m->str_flags |= STRING_IGNORE_UPPERCASE;
- break;
- case CHAR_REGEX_OFFSET_START:
- m->str_flags |= REGEX_OFFSET_START;
- break;
- default:
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms,
- "string extension `%c' invalid",
- *l);
- return -1;
- }
- /* allow multiple '/' for readability */
- if (l[1] == '/' &&
- !isspace((unsigned char)l[2]))
- l++;
- }
- if (string_modifier_check(ms, m) == -1)
- return -1;
- }
- else {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "invalid string op: %c", *t);
- return -1;
- }
- }
- /*
- * We used to set mask to all 1's here, instead let's just not do
- * anything if mask = 0 (unless you have a better idea)
- */
- EATAB;
-
- switch (*l) {
- case '>':
- case '<':
- /* Old-style anding: "0 byte &0x80 dynamically linked" */
- case '&':
- case '^':
- case '=':
- m->reln = *l;
- ++l;
- if (*l == '=') {
- /* HP compat: ignore &= etc. */
- ++l;
- }
- break;
- case '!':
- m->reln = *l;
- ++l;
- break;
- default:
- m->reln = '='; /* the default relation */
- if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
- isspace((unsigned char)l[1])) || !l[1])) {
- m->reln = *l;
- ++l;
- }
- break;
- }
- /*
- * Grab the value part, except for an 'x' reln.
- */
- if (m->reln != 'x' && getvalue(ms, m, &l, action))
- return -1;
-
- /*
- * TODO finish this macro and start using it!
- * #define offsetcheck {if (offset > HOWMANY-1)
- * magwarn("offset too big"); }
- */
-
- /*
- * Now get last part - the description
- */
- EATAB;
- if (l[0] == '\b') {
- ++l;
- m->flag |= NOSPACE;
- } else if ((l[0] == '\\') && (l[1] == 'b')) {
- ++l;
- ++l;
- m->flag |= NOSPACE;
- }
- for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
- continue;
- if (i == sizeof(m->desc)) {
- m->desc[sizeof(m->desc) - 1] = '\0';
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "description `%s' truncated", m->desc);
- }
-
- /*
- * We only do this check while compiling, or if any of the magic
- * files were not compiled.
- */
- if (ms->flags & MAGIC_CHECK) {
- if (check_format(ms, m) == -1)
- return -1;
- }
-#ifndef COMPILE_ONLY
- if (action == FILE_CHECK) {
- file_mdump(m);
- }
-#endif
- m->mimetype[0] = '\0'; /* initialise MIME type to none */
- if (m->cont_level == 0)
- ++(*nmentryp); /* make room for next */
- return 0;
-}
-
-/*
- * parse a MIME annotation line from magic file, put into magic[index - 1]
- * if valid
- */
-private int
-parse_mime(struct magic_set *ms, struct magic_entry **mentryp,
- uint32_t *nmentryp, const char *line)
-{
- size_t i;
- const char *l = line;
- struct magic *m;
- struct magic_entry *me;
-
- if (*nmentryp == 0) {
- file_error(ms, 0, "No current entry for MIME type");
- return -1;
- }
-
- me = &(*mentryp)[*nmentryp - 1];
- m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
-
- if (m->mimetype[0] != '\0') {
- file_error(ms, 0, "Current entry already has a MIME type: %s\n"
- "Description: %s\nNew type: %s", m->mimetype, m->desc, l);
- return -1;
- }
-
- EATAB;
- for (i = 0;
- *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
- || strchr("-+/.", *l)) && i < sizeof(m->mimetype);
- m->mimetype[i++] = *l++)
- continue;
- if (i == sizeof(m->mimetype)) {
- m->desc[sizeof(m->mimetype) - 1] = '\0';
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "MIME type `%s' truncated %zu",
- m->mimetype, i);
- } else
- m->mimetype[i] = '\0';
-
- if (i > 0)
- return 0;
- else
- return -1;
-}
-
-private int
-check_format_type(const char *ptr, int type)
-{
- int quad = 0;
- if (*ptr == '\0') {
- /* Missing format string; bad */
- return -1;
- }
-
- switch (type) {
- case FILE_FMT_QUAD:
- quad = 1;
- /*FALLTHROUGH*/
- case FILE_FMT_NUM:
- if (*ptr == '-')
- ptr++;
- if (*ptr == '.')
- ptr++;
- while (isdigit((unsigned char)*ptr)) ptr++;
- if (*ptr == '.')
- ptr++;
- while (isdigit((unsigned char)*ptr)) ptr++;
- if (quad) {
- if (*ptr++ != 'l')
- return -1;
- if (*ptr++ != 'l')
- return -1;
- }
-
- switch (*ptr++) {
- case 'l':
- switch (*ptr++) {
- case 'i':
- case 'd':
- case 'u':
- case 'x':
- case 'X':
- return 0;
- default:
- return -1;
- }
-
- case 'h':
- switch (*ptr++) {
- case 'h':
- switch (*ptr++) {
- case 'i':
- case 'd':
- case 'u':
- case 'x':
- case 'X':
- return 0;
- default:
- return -1;
- }
- case 'd':
- return 0;
- default:
- return -1;
- }
-
- case 'i':
- case 'c':
- case 'd':
- case 'u':
- case 'x':
- case 'X':
- return 0;
-
- default:
- return -1;
- }
-
- case FILE_FMT_FLOAT:
- case FILE_FMT_DOUBLE:
- if (*ptr == '-')
- ptr++;
- if (*ptr == '.')
- ptr++;
- while (isdigit((unsigned char)*ptr)) ptr++;
- if (*ptr == '.')
- ptr++;
- while (isdigit((unsigned char)*ptr)) ptr++;
-
- switch (*ptr++) {
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- return 0;
-
- default:
- return -1;
- }
-
-
- case FILE_FMT_STR:
- if (*ptr == '-')
- ptr++;
- while (isdigit((unsigned char )*ptr))
- ptr++;
- if (*ptr == '.') {
- ptr++;
- while (isdigit((unsigned char )*ptr))
- ptr++;
- }
-
- switch (*ptr++) {
- case 's':
- return 0;
- default:
- return -1;
- }
-
- default:
- /* internal error */
- abort();
- }
- /*NOTREACHED*/
- return -1;
-}
-
-/*
- * Check that the optional printf format in description matches
- * the type of the magic.
- */
-private int
-check_format(struct magic_set *ms, struct magic *m)
-{
- char *ptr;
-
- for (ptr = m->desc; *ptr; ptr++)
- if (*ptr == '%')
- break;
- if (*ptr == '\0') {
- /* No format string; ok */
- return 1;
- }
-
- assert(file_nformats == file_nnames);
-
- if (m->type >= file_nformats) {
- file_magwarn(ms, "Internal error inconsistency between "
- "m->type and format strings");
- return -1;
- }
- if (file_formats[m->type] == FILE_FMT_NONE) {
- file_magwarn(ms, "No format string for `%s' with description "
- "`%s'", m->desc, file_names[m->type]);
- return -1;
- }
-
- ptr++;
- if (check_format_type(ptr, file_formats[m->type]) == -1) {
- /*
- * TODO: this error message is unhelpful if the format
- * string is not one character long
- */
- file_magwarn(ms, "Printf format `%c' is not valid for type "
- "`%s' in description `%s'",
- ptr && *ptr ? *ptr : '?',
- file_names[m->type], m->desc);
- return -1;
- }
-
- for (; *ptr; ptr++) {
- if (*ptr == '%') {
- file_magwarn(ms,
- "Too many format strings (should have at most one) "
- "for `%s' with description `%s'",
- file_names[m->type], m->desc);
- return -1;
- }
- }
- return 0;
-}
-
-/*
- * Read a numeric value from a pointer, into the value union of a magic
- * pointer, according to the magic type. Update the string pointer to point
- * just after the number read. Return 0 for success, non-zero for failure.
- */
-private int
-getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
-{
- int slen;
-
- switch (m->type) {
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- case FILE_STRING:
- case FILE_PSTRING:
- case FILE_REGEX:
- case FILE_SEARCH:
- *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen, action);
- if (*p == NULL) {
- if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "cannot get string from `%s'",
- m->value.s);
- return -1;
- }
- m->vallen = slen;
- if (m->type == FILE_PSTRING)
- m->vallen++;
- return 0;
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- if (m->reln != 'x') {
- char *ep;
-#ifdef HAVE_STRTOF
- m->value.f = strtof(*p, &ep);
-#else
- m->value.f = (float)strtod(*p, &ep);
-#endif
- *p = ep;
- }
- return 0;
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- if (m->reln != 'x') {
- char *ep;
- m->value.d = strtod(*p, &ep);
- *p = ep;
- }
- return 0;
- default:
- if (m->reln != 'x') {
- char *ep;
- m->value.q = file_signextend(ms, m,
- (uint64_t)strtoull(*p, &ep, 0));
- *p = ep;
- eatsize(p);
- }
- return 0;
- }
-}
-
-/*
- * Convert a string containing C character escapes. Stop at an unescaped
- * space or tab.
- * Copy the converted version to "p", returning its length in *slen.
- * Return updated scan pointer as function result.
- */
-private const char *
-getstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen, int action)
-{
- const char *origs = s;
- char *origp = p;
- char *pmax = p + plen - 1;
- int c;
- int val;
-
- while ((c = *s++) != '\0') {
- if (isspace((unsigned char) c))
- break;
- if (p >= pmax) {
- file_error(ms, 0, "string too long: `%s'", origs);
- return NULL;
- }
- if (c == '\\') {
- switch(c = *s++) {
-
- case '\0':
- if (action == FILE_COMPILE)
- file_magwarn(ms, "incomplete escape");
- goto out;
-
- case '\t':
- if (action == FILE_COMPILE) {
- file_magwarn(ms,
- "escaped tab found, use \\t instead");
- action++;
- }
- /*FALLTHROUGH*/
- default:
- if (action == FILE_COMPILE) {
- if (isprint((unsigned char)c))
- file_magwarn(ms,
- "no need to escape `%c'", c);
- else
- file_magwarn(ms,
- "unknown escape sequence: \\%03o", c);
- }
- /*FALLTHROUGH*/
- /* space, perhaps force people to use \040? */
- case ' ':
-#if 0
- /*
- * Other things people escape, but shouldn't need to,
- * so we disallow them
- */
- case '\'':
- case '"':
- case '?':
-#endif
- /* Relations */
- case '>':
- case '<':
- case '&':
- case '^':
- case '=':
- case '!':
- /* and baskslash itself */
- case '\\':
- *p++ = (char) c;
- break;
-
- case 'a':
- *p++ = '\a';
- break;
-
- case 'b':
- *p++ = '\b';
- break;
-
- case 'f':
- *p++ = '\f';
- break;
-
- case 'n':
- *p++ = '\n';
- break;
-
- case 'r':
- *p++ = '\r';
- break;
-
- case 't':
- *p++ = '\t';
- break;
-
- case 'v':
- *p++ = '\v';
- break;
-
- /* \ and up to 3 octal digits */
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- val = c - '0';
- c = *s++; /* try for 2 */
- if (c >= '0' && c <= '7') {
- val = (val << 3) | (c - '0');
- c = *s++; /* try for 3 */
- if (c >= '0' && c <= '7')
- val = (val << 3) | (c-'0');
- else
- --s;
- }
- else
- --s;
- *p++ = (char)val;
- break;
-
- /* \x and up to 2 hex digits */
- case 'x':
- val = 'x'; /* Default if no digits */
- c = hextoint(*s++); /* Get next char */
- if (c >= 0) {
- val = c;
- c = hextoint(*s++);
- if (c >= 0)
- val = (val << 4) + c;
- else
- --s;
- } else
- --s;
- *p++ = (char)val;
- break;
- }
- } else
- *p++ = (char)c;
- }
-out:
- *p = '\0';
- *slen = p - origp;
- return s;
-}
-
-
-/* Single hex char to int; -1 if not a hex char. */
-private int
-hextoint(int c)
-{
- if (!isascii((unsigned char) c))
- return -1;
- if (isdigit((unsigned char) c))
- return c - '0';
- if ((c >= 'a') && (c <= 'f'))
- return c + 10 - 'a';
- if (( c>= 'A') && (c <= 'F'))
- return c + 10 - 'A';
- return -1;
-}
-
-
-/*
- * Print a string containing C character escapes.
- */
-protected void
-file_showstr(FILE *fp, const char *s, size_t len)
-{
- char c;
-
- for (;;) {
- c = *s++;
- if (len == ~0U) {
- if (c == '\0')
- break;
- }
- else {
- if (len-- == 0)
- break;
- }
- if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
- (void) fputc(c, fp);
- else {
- (void) fputc('\\', fp);
- switch (c) {
- case '\a':
- (void) fputc('a', fp);
- break;
-
- case '\b':
- (void) fputc('b', fp);
- break;
-
- case '\f':
- (void) fputc('f', fp);
- break;
-
- case '\n':
- (void) fputc('n', fp);
- break;
-
- case '\r':
- (void) fputc('r', fp);
- break;
-
- case '\t':
- (void) fputc('t', fp);
- break;
-
- case '\v':
- (void) fputc('v', fp);
- break;
-
- default:
- (void) fprintf(fp, "%.3o", c & 0377);
- break;
- }
- }
- }
-}
-
-/*
- * eatsize(): Eat the size spec from a number [eg. 10UL]
- */
-private void
-eatsize(const char **p)
-{
- const char *l = *p;
-
- if (LOWCASE(*l) == 'u')
- l++;
-
- switch (LOWCASE(*l)) {
- case 'l': /* long */
- case 's': /* short */
- case 'h': /* short */
- case 'b': /* char/byte */
- case 'c': /* char/byte */
- l++;
- /*FALLTHROUGH*/
- default:
- break;
- }
-
- *p = l;
-}
-
-/*
- * handle a compiled file.
- */
-private int
-apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
- const char *fn)
-{
- int fd;
- struct stat st;
- uint32_t *ptr;
- uint32_t version;
- int needsbyteswap;
- char *dbname = NULL;
- void *mm = NULL;
-
- mkdbname(fn, &dbname, 0);
- if (dbname == NULL)
- goto error2;
-
- if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
- goto error2;
-
- if (fstat(fd, &st) == -1) {
- file_error(ms, errno, "cannot stat `%s'", dbname);
- goto error1;
- }
- if (st.st_size < 8 || st.st_size > SIZE_MAX) {
- file_error(ms, 0, "file `%s' is too %s", dbname,
- st.st_size > SIZE_MAX ? "large" : "small");
- goto error1;
- }
-
-#ifdef QUICK
- if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
- file_error(ms, errno, "cannot map `%s'", dbname);
- goto error1;
- }
-#define RET 2
-#else
- if ((mm = malloc((size_t)st.st_size)) == NULL) {
- file_oomem(ms, (size_t)st.st_size);
- goto error1;
- }
- if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
- file_badread(ms);
- goto error1;
- }
-#define RET 1
-#endif
- *magicp = mm;
- (void)close(fd);
- fd = -1;
- ptr = (uint32_t *)(void *)*magicp;
- if (*ptr != MAGICNO) {
- if (swap4(*ptr) != MAGICNO) {
- file_error(ms, 0, "bad magic in `%s'");
- goto error1;
- }
- needsbyteswap = 1;
- } else
- needsbyteswap = 0;
- if (needsbyteswap)
- version = swap4(ptr[1]);
- else
- version = ptr[1];
- if (version != VERSIONNO) {
- file_error(ms, 0, "File %d.%d supports only %d version magic "
- "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
- VERSIONNO, dbname, version);
- goto error1;
- }
- *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic));
- if (*nmagicp > 0)
- (*nmagicp)--;
- (*magicp)++;
- if (needsbyteswap)
- byteswap(*magicp, *nmagicp);
- free(dbname);
- return RET;
-
-error1:
- if (fd != -1)
- (void)close(fd);
- if (mm) {
-#ifdef QUICK
- (void)munmap((void *)mm, (size_t)st.st_size);
-#else
- free(mm);
-#endif
- } else {
- *magicp = NULL;
- *nmagicp = 0;
- }
-error2:
- free(dbname);
- return -1;
-}
-
-private const uint32_t ar[] = {
- MAGICNO, VERSIONNO
-};
-/*
- * handle an mmaped file.
- */
-private int
-apprentice_compile(struct magic_set *ms, struct magic **magicp,
- uint32_t *nmagicp, const char *fn)
-{
- int fd;
- char *dbname;
- int rv = -1;
-
- mkdbname(fn, &dbname, 1);
-
- if (dbname == NULL)
- goto out;
-
- if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) {
- file_error(ms, errno, "cannot open `%s'", dbname);
- goto out;
- }
-
- if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
- file_error(ms, errno, "error writing `%s'", dbname);
- goto out;
- }
-
- if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
- != sizeof(struct magic)) {
- file_error(ms, errno, "error seeking `%s'", dbname);
- goto out;
- }
-
- if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp))
- != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
- file_error(ms, errno, "error writing `%s'", dbname);
- goto out;
- }
-
- (void)close(fd);
- rv = 0;
-out:
- free(dbname);
- return rv;
-}
-
-private const char ext[] = ".mgc";
-/*
- * make a dbname
- */
-private void
-mkdbname(const char *fn, char **buf, int strip)
-{
- if (strip) {
- const char *p;
- if ((p = strrchr(fn, '/')) != NULL)
- fn = ++p;
- }
-
- (void)asprintf(buf, "%s%s", fn, ext);
- if (*buf && strlen(*buf) > PATH_MAX) {
- free(*buf);
- *buf = NULL;
- }
-}
-
-/*
- * Byteswap an mmap'ed file if needed
- */
-private void
-byteswap(struct magic *magic, uint32_t nmagic)
-{
- uint32_t i;
- for (i = 0; i < nmagic; i++)
- bs1(&magic[i]);
-}
-
-/*
- * swap a short
- */
-private uint16_t
-swap2(uint16_t sv)
-{
- uint16_t rv;
- uint8_t *s = (uint8_t *)(void *)&sv;
- uint8_t *d = (uint8_t *)(void *)&rv;
- d[0] = s[1];
- d[1] = s[0];
- return rv;
-}
-
-/*
- * swap an int
- */
-private uint32_t
-swap4(uint32_t sv)
-{
- uint32_t rv;
- uint8_t *s = (uint8_t *)(void *)&sv;
- uint8_t *d = (uint8_t *)(void *)&rv;
- d[0] = s[3];
- d[1] = s[2];
- d[2] = s[1];
- d[3] = s[0];
- return rv;
-}
-
-/*
- * swap a quad
- */
-private uint64_t
-swap8(uint64_t sv)
-{
- uint64_t rv;
- uint8_t *s = (uint8_t *)(void *)&sv;
- uint8_t *d = (uint8_t *)(void *)&rv;
-#if 0
- d[0] = s[3];
- d[1] = s[2];
- d[2] = s[1];
- d[3] = s[0];
- d[4] = s[7];
- d[5] = s[6];
- d[6] = s[5];
- d[7] = s[4];
-#else
- d[0] = s[7];
- d[1] = s[6];
- d[2] = s[5];
- d[3] = s[4];
- d[4] = s[3];
- d[5] = s[2];
- d[6] = s[1];
- d[7] = s[0];
-#endif
- return rv;
-}
-
-/*
- * byteswap a single magic entry
- */
-private void
-bs1(struct magic *m)
-{
- m->cont_level = swap2(m->cont_level);
- m->offset = swap4((uint32_t)m->offset);
- m->in_offset = swap4((uint32_t)m->in_offset);
- m->lineno = swap4((uint32_t)m->lineno);
- if (IS_STRING(m->type)) {
- m->str_range = swap4(m->str_range);
- m->str_flags = swap4(m->str_flags);
- }
- else {
- m->value.q = swap8(m->value.q);
- m->num_mask = swap8(m->num_mask);
- }
-}
diff --git a/usr.bin/file/ascmagic.c b/usr.bin/file/ascmagic.c
deleted file mode 100644
index a5d09dc4931..00000000000
--- a/usr.bin/file/ascmagic.c
+++ /dev/null
@@ -1,789 +0,0 @@
-/* $OpenBSD: ascmagic.c,v 1.12 2014/05/18 17:50:11 espie Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * ASCII magic -- file types that we know based on keywords
- * that can appear anywhere in the file.
- *
- * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
- * to handle character codes other than ASCII on a unified basis.
- *
- * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
- * international characters, now subsumed into this file.
- */
-
-#include "file.h"
-#include "magic.h"
-#include <stdio.h>
-#include <string.h>
-#include <memory.h>
-#include <ctype.h>
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include "names.h"
-
-#define MAXLINELEN 300 /* longest sane line length */
-#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
- || (x) == 0x85 || (x) == '\f')
-
-private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
- size_t *);
-protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
-private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
-private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
-private int ascmatch(const unsigned char *, const unichar *, size_t);
-private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
-
-
-protected int
-file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
-{
- size_t i;
- unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
- unichar *ubuf = NULL;
- size_t ulen, mlen;
- const struct names *p;
- int rv = -1;
- int mime = ms->flags & MAGIC_MIME;
-
- const char *code = NULL;
- const char *code_mime = NULL;
- const char *type = NULL;
- const char *subtype = NULL;
- const char *subtype_mime = NULL;
-
- int has_escapes = 0;
- int has_backspace = 0;
- int seen_cr = 0;
-
- int n_crlf = 0;
- int n_lf = 0;
- int n_cr = 0;
- int n_nel = 0;
-
- size_t last_line_end = (size_t)-1;
- int has_long_lines = 0;
-
- /*
- * Undo the NUL-termination kindly provided by process()
- * but leave at least one byte to look at
- */
- while (nbytes > 1 && buf[nbytes - 1] == '\0')
- nbytes--;
-
- if ((nbuf = calloc((nbytes + 1), sizeof(nbuf[0]))) == NULL)
- goto done;
- if ((ubuf = calloc((nbytes + 1), sizeof(ubuf[0]))) == NULL)
- goto done;
-
- /*
- * Then try to determine whether it's any character code we can
- * identify. Each of these tests, if it succeeds, will leave
- * the text converted into one-unichar-per-character Unicode in
- * ubuf, and the number of characters converted in ulen.
- */
- if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
- code = "ASCII";
- code_mime = "us-ascii";
- type = "text";
- } else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
- code = "UTF-8 Unicode (with BOM)";
- code_mime = "utf-8";
- type = "text";
- } else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
- code = "UTF-8 Unicode";
- code_mime = "utf-8";
- type = "text";
- } else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
- if (i == 1)
- code = "Little-endian UTF-16 Unicode";
- else
- code = "Big-endian UTF-16 Unicode";
-
- type = "character data";
- code_mime = "utf-16"; /* is this defined? */
- } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
- code = "ISO-8859";
- type = "text";
- code_mime = "iso-8859-1";
- } else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
- code = "Non-ISO extended-ASCII";
- type = "text";
- code_mime = "unknown";
- } else {
- from_ebcdic(buf, nbytes, nbuf);
-
- if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
- code = "EBCDIC";
- type = "character data";
- code_mime = "ebcdic";
- } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
- code = "International EBCDIC";
- type = "character data";
- code_mime = "ebcdic";
- } else {
- rv = 0;
- goto done; /* doesn't look like text at all */
- }
- }
-
- if (nbytes <= 1) {
- rv = 0;
- goto done;
- }
-
- /* Convert ubuf to UTF-8 and try text soft magic */
- /* If original was ASCII or UTF-8, could use nbuf instead of
- re-converting. */
- /* malloc size is a conservative overestimate; could be
- re-converting improved, or at least realloced after
- re-converting conversion. */
- mlen = ulen * 6;
- if ((utf8_buf = malloc(mlen)) == NULL) {
- file_oomem(ms, mlen);
- goto done;
- }
- if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
- goto done;
- if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
- rv = 1;
- goto done;
- }
-
- /* look for tokens from names.h - this is expensive! */
- if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
- goto subtype_identified;
-
- i = 0;
- while (i < ulen) {
- size_t end;
-
- /* skip past any leading space */
- while (i < ulen && ISSPC(ubuf[i]))
- i++;
- if (i >= ulen)
- break;
-
- /* find the next whitespace */
- for (end = i + 1; end < nbytes; end++)
- if (ISSPC(ubuf[end]))
- break;
-
- /* compare the word thus isolated against the token list */
- for (p = names; p < names + NNAMES; p++) {
- if (ascmatch((const unsigned char *)p->name, ubuf + i,
- end - i)) {
- subtype = types[p->type].human;
- subtype_mime = types[p->type].mime;
- goto subtype_identified;
- }
- }
-
- i = end;
- }
-
-subtype_identified:
-
- /* Now try to discover other details about the file. */
- for (i = 0; i < ulen; i++) {
- if (ubuf[i] == '\n') {
- if (seen_cr)
- n_crlf++;
- else
- n_lf++;
- last_line_end = i;
- } else if (seen_cr)
- n_cr++;
-
- seen_cr = (ubuf[i] == '\r');
- if (seen_cr)
- last_line_end = i;
-
- if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
- n_nel++;
- last_line_end = i;
- }
-
- /* If this line is _longer_ than MAXLINELEN, remember it. */
- if (i > last_line_end + MAXLINELEN)
- has_long_lines = 1;
-
- if (ubuf[i] == '\033')
- has_escapes = 1;
- if (ubuf[i] == '\b')
- has_backspace = 1;
- }
-
- /* Beware, if the data has been truncated, the final CR could have
- been followed by a LF. If we have HOWMANY bytes, it indicates
- that the data might have been truncated, probably even before
- this function was called. */
- if (seen_cr && nbytes < HOWMANY)
- n_cr++;
-
- if (mime) {
- if (mime & MAGIC_MIME_TYPE) {
- if (subtype_mime) {
- if (file_printf(ms, subtype_mime) == -1)
- goto done;
- } else {
- if (file_printf(ms, "text/plain") == -1)
- goto done;
- }
- }
-
- if ((mime == 0 || mime == MAGIC_MIME) && code_mime) {
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, " charset=") == -1)
- goto done;
- if (file_printf(ms, code_mime) == -1)
- goto done;
- }
-
- if (mime == MAGIC_MIME_ENCODING)
- file_printf(ms, "binary");
- } else {
- if (file_printf(ms, code) == -1)
- goto done;
-
- if (subtype) {
- if (file_printf(ms, " ") == -1)
- goto done;
- if (file_printf(ms, subtype) == -1)
- goto done;
- }
-
- if (file_printf(ms, " ") == -1)
- goto done;
- if (file_printf(ms, type) == -1)
- goto done;
-
- if (has_long_lines)
- if (file_printf(ms, ", with very long lines") == -1)
- goto done;
-
- /*
- * Only report line terminators if we find one other than LF,
- * or if we find none at all.
- */
- if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
- (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
- if (file_printf(ms, ", with") == -1)
- goto done;
-
- if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
- if (file_printf(ms, " no") == -1)
- goto done;
- } else {
- if (n_crlf) {
- if (file_printf(ms, " CRLF") == -1)
- goto done;
- if (n_cr || n_lf || n_nel)
- if (file_printf(ms, ",") == -1)
- goto done;
- }
- if (n_cr) {
- if (file_printf(ms, " CR") == -1)
- goto done;
- if (n_lf || n_nel)
- if (file_printf(ms, ",") == -1)
- goto done;
- }
- if (n_lf) {
- if (file_printf(ms, " LF") == -1)
- goto done;
- if (n_nel)
- if (file_printf(ms, ",") == -1)
- goto done;
- }
- if (n_nel)
- if (file_printf(ms, " NEL") == -1)
- goto done;
- }
-
- if (file_printf(ms, " line terminators") == -1)
- goto done;
- }
-
- if (has_escapes)
- if (file_printf(ms, ", with escape sequences") == -1)
- goto done;
- if (has_backspace)
- if (file_printf(ms, ", with overstriking") == -1)
- goto done;
- }
- rv = 1;
-done:
- if (nbuf)
- free(nbuf);
- if (ubuf)
- free(ubuf);
- if (utf8_buf)
- free(utf8_buf);
-
- return rv;
-}
-
-private int
-ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
-{
- size_t i;
-
- for (i = 0; i < ulen; i++) {
- if (s[i] != us[i])
- return 0;
- }
-
- if (s[i])
- return 0;
- else
- return 1;
-}
-
-/*
- * This table reflects a particular philosophy about what constitutes
- * "text," and there is room for disagreement about it.
- *
- * Version 3.31 of the file command considered a file to be ASCII if
- * each of its characters was approved by either the isascii() or
- * isalpha() function. On most systems, this would mean that any
- * file consisting only of characters in the range 0x00 ... 0x7F
- * would be called ASCII text, but many systems might reasonably
- * consider some characters outside this range to be alphabetic,
- * so the file command would call such characters ASCII. It might
- * have been more accurate to call this "considered textual on the
- * local system" than "ASCII."
- *
- * It considered a file to be "International language text" if each
- * of its characters was either an ASCII printing character (according
- * to the real ASCII standard, not the above test), a character in
- * the range 0x80 ... 0xFF, or one of the following control characters:
- * backspace, tab, line feed, vertical tab, form feed, carriage return,
- * escape. No attempt was made to determine the language in which files
- * of this type were written.
- *
- *
- * The table below considers a file to be ASCII if all of its characters
- * are either ASCII printing characters (again, according to the X3.4
- * standard, not isascii()) or any of the following controls: bell,
- * backspace, tab, line feed, form feed, carriage return, esc, nextline.
- *
- * I include bell because some programs (particularly shell scripts)
- * use it literally, even though it is rare in normal text. I exclude
- * vertical tab because it never seems to be used in real text. I also
- * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
- * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
- * character to. It might be more appropriate to include it in the 8859
- * set instead of the ASCII set, but it's got to be included in *something*
- * we recognize or EBCDIC files aren't going to be considered textual.
- * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
- * and Latin characters, so these should possibly be allowed. But they
- * make a real mess on VT100-style displays if they're not paired properly,
- * so we are probably better off not calling them text.
- *
- * A file is considered to be ISO-8859 text if its characters are all
- * either ASCII, according to the above definition, or printing characters
- * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
- *
- * Finally, a file is considered to be international text from some other
- * character code if its characters are all either ISO-8859 (according to
- * the above definition) or characters in the range 0x80 ... 0x9F, which
- * ISO-8859 considers to be control characters but the IBM PC and Macintosh
- * consider to be printing characters.
- */
-
-#define F 0 /* character never appears in text */
-#define T 1 /* character appears in plain ASCII text */
-#define I 2 /* character appears in ISO-8859 text */
-#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
-
-private char text_chars[256] = {
- /* BEL BS HT LF FF CR */
- F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
- /* ESC */
- F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
- T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
- /* NEL */
- X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
- X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
- I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
-};
-
-private int
-looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- size_t i;
-
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- int t = text_chars[buf[i]];
-
- if (t != T)
- return 0;
-
- ubuf[(*ulen)++] = buf[i];
- }
-
- return 1;
-}
-
-private int
-looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
-{
- size_t i;
-
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- int t = text_chars[buf[i]];
-
- if (t != T && t != I)
- return 0;
-
- ubuf[(*ulen)++] = buf[i];
- }
-
- return 1;
-}
-
-private int
-looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- size_t i;
-
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- int t = text_chars[buf[i]];
-
- if (t != T && t != I && t != X)
- return 0;
-
- ubuf[(*ulen)++] = buf[i];
- }
-
- return 1;
-}
-
-/*
- * Encode Unicode string as UTF-8, returning pointer to character
- * after end of string, or NULL if an invalid character is found.
- */
-private unsigned char *
-encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen)
-{
- size_t i;
- unsigned char *end = buf + len;
-
- for (i = 0; i < ulen; i++) {
- if (ubuf[i] <= 0x7f) {
- if (end - buf < 1)
- return NULL;
- *buf++ = (unsigned char)ubuf[i];
- } else if (ubuf[i] <= 0x7ff) {
- if (end - buf < 2)
- return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 6) + 0xc0);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
- } else if (ubuf[i] <= 0xffff) {
- if (end - buf < 3)
- return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 12) + 0xe0);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
- } else if (ubuf[i] <= 0x1fffff) {
- if (end - buf < 4)
- return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 18) + 0xf0);
- *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
- } else if (ubuf[i] <= 0x3ffffff) {
- if (end - buf < 5)
- return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 24) + 0xf8);
- *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
- } else if (ubuf[i] <= 0x7fffffff) {
- if (end - buf < 6)
- return NULL;
- *buf++ = (unsigned char)((ubuf[i] >> 30) + 0xfc);
- *buf++ = (unsigned char)(((ubuf[i] >> 24) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
- *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
- *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
- } else /* Invalid character */
- return NULL;
- }
-
- return buf;
-}
-
-/*
- * Decide whether some text looks like UTF-8. Returns:
- *
- * -1: invalid UTF-8
- * 0: uses odd control characters, so doesn't look like text
- * 1: 7-bit text
- * 2: definitely UTF-8 text (valid high-bit set bytes)
- *
- * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
- * ubuf must be big enough!
- */
-protected int
-file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
-{
- size_t i;
- int n;
- unichar c;
- int gotone = 0, ctrl = 0;
-
- if (ubuf)
- *ulen = 0;
-
- for (i = 0; i < nbytes; i++) {
- if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
- /*
- * Even if the whole file is valid UTF-8 sequences,
- * still reject it if it uses weird control characters.
- */
-
- if (text_chars[buf[i]] != T)
- ctrl = 1;
-
- if (ubuf)
- ubuf[(*ulen)++] = buf[i];
- } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
- return -1;
- } else { /* 11xxxxxx begins UTF-8 */
- int following;
-
- if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
- c = buf[i] & 0x1f;
- following = 1;
- } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
- c = buf[i] & 0x0f;
- following = 2;
- } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
- c = buf[i] & 0x07;
- following = 3;
- } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
- c = buf[i] & 0x03;
- following = 4;
- } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
- c = buf[i] & 0x01;
- following = 5;
- } else
- return -1;
-
- for (n = 0; n < following; n++) {
- i++;
- if (i >= nbytes)
- goto done;
-
- if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
- return -1;
-
- c = (c << 6) + (buf[i] & 0x3f);
- }
-
- if (ubuf)
- ubuf[(*ulen)++] = c;
- gotone = 1;
- }
- }
-done:
- return ctrl ? 0 : (gotone ? 2 : 1);
-}
-
-/*
- * Decide whether some text looks like UTF-8 with BOM. If there is no
- * BOM, return -1; otherwise return the result of looks_utf8 on the
- * rest of the text.
- */
-private int
-looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
- return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
- else
- return -1;
-}
-
-private int
-looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
- size_t *ulen)
-{
- int bigend;
- size_t i;
-
- if (nbytes < 2)
- return 0;
-
- if (buf[0] == 0xff && buf[1] == 0xfe)
- bigend = 0;
- else if (buf[0] == 0xfe && buf[1] == 0xff)
- bigend = 1;
- else
- return 0;
-
- *ulen = 0;
-
- for (i = 2; i + 1 < nbytes; i += 2) {
- /* XXX fix to properly handle chars > 65536 */
-
- if (bigend)
- ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
- else
- ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
-
- if (ubuf[*ulen - 1] == 0xfffe)
- return 0;
- if (ubuf[*ulen - 1] < 128 &&
- text_chars[(size_t)ubuf[*ulen - 1]] != T)
- return 0;
- }
-
- return 1 + bigend;
-}
-
-#undef F
-#undef T
-#undef I
-#undef X
-
-/*
- * This table maps each EBCDIC character to an (8-bit extended) ASCII
- * character, as specified in the rationale for the dd(1) command in
- * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
- *
- * Unfortunately it does not seem to correspond exactly to any of the
- * five variants of EBCDIC documented in IBM's _Enterprise Systems
- * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
- * Edition, July, 1999, pp. I-1 - I-4.
- *
- * Fortunately, though, all versions of EBCDIC, including this one, agree
- * on most of the printing characters that also appear in (7-bit) ASCII.
- * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
- *
- * Fortunately too, there is general agreement that codes 0x00 through
- * 0x3F represent control characters, 0x41 a nonbreaking space, and the
- * remainder printing characters.
- *
- * This is sufficient to allow us to identify EBCDIC text and to distinguish
- * between old-style and internationalized examples of text.
- */
-
-private unsigned char ebcdic_to_ascii[] = {
- 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
-128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
-144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
-' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
-'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
-'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
-186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
-195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
-202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
-209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
-216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
-'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
-'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
-'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
-'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
-};
-
-#ifdef notdef
-/*
- * The following EBCDIC-to-ASCII table may relate more closely to reality,
- * or at least to modern reality. It comes from
- *
- * http://ftp.s390.ibm.com/products/oe/bpxqp9.html
- *
- * and maps the characters of EBCDIC code page 1047 (the code used for
- * Unix-derived software on IBM's 390 systems) to the corresponding
- * characters from ISO 8859-1.
- *
- * If this table is used instead of the above one, some of the special
- * cases for the NEL character can be taken out of the code.
- */
-
-private unsigned char ebcdic_1047_to_8859[] = {
-0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
-0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
-0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
-0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
-0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
-0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
-0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
-0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
-0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
-0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
-0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
-0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
-0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
-0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
-0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
-0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
-};
-#endif
-
-/*
- * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
- */
-private void
-from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
-{
- size_t i;
-
- for (i = 0; i < nbytes; i++) {
- out[i] = ebcdic_to_ascii[buf[i]];
- }
-}
diff --git a/usr.bin/file/compress.c b/usr.bin/file/compress.c
deleted file mode 100644
index 773d49746c1..00000000000
--- a/usr.bin/file/compress.c
+++ /dev/null
@@ -1,479 +0,0 @@
-/* $OpenBSD: compress.c,v 1.16 2013/04/20 19:02:57 deraadt Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * compress routines:
- * zmagic() - returns 0 if not recognized, uncompresses and prints
- * information if recognized
- * uncompress(method, old, n, newch) - uncompress old into new,
- * using method, return sizeof new
- */
-#include "file.h"
-#include "magic.h"
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <string.h>
-#include <errno.h>
-#include <poll.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#ifdef HAVE_SYS_WAIT_H
-#include <sys/wait.h>
-#endif
-#if defined(HAVE_SYS_TIME_H)
-#include <sys/time.h>
-#endif
-#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
-#define BUILTIN_DECOMPRESS
-#include <zlib.h>
-#endif
-
-
-private const struct {
- const char magic[8];
- size_t maglen;
- const char *argv[3];
- int silent;
-} compr[] = {
- { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */
- /* Uncompress can get stuck; so use gzip first if we have it
- * Idea from Damien Clark, thanks! */
- { "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */
- { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */
- { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */
- { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */
- /* the standard pack utilities do not accept standard input */
- { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */
- { "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */
- /* ...only first file examined */
- { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */
-};
-
-private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
-
-#define NODATA ((size_t)~0)
-
-
-private ssize_t swrite(int, const void *, size_t);
-private size_t uncompressbuf(struct magic_set *, int, size_t,
- const unsigned char *, unsigned char **, size_t);
-#ifdef BUILTIN_DECOMPRESS
-private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
- unsigned char **, size_t);
-#endif
-
-protected int
-file_zmagic(struct magic_set *ms, int fd, const char *name,
- const unsigned char *buf, size_t nbytes)
-{
- unsigned char *newbuf = NULL;
- size_t i, nsz;
- int rv = 0;
- int mime = ms->flags & MAGIC_MIME;
-
- if ((ms->flags & MAGIC_COMPRESS) == 0)
- return 0;
-
- for (i = 0; i < ncompr; i++) {
- if (nbytes < compr[i].maglen)
- continue;
- if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
- (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
- nbytes)) != NODATA) {
- ms->flags &= ~MAGIC_COMPRESS;
- rv = -1;
- if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
- goto error;
-
- if (mime == MAGIC_MIME || mime == 0) {
- if (file_printf(ms, mime ?
- " compressed-encoding=" : " (") == -1)
- goto error;
- }
-
- if ((mime == 0 || mime & MAGIC_MIME_ENCODING) &&
- file_buffer(ms, -1, NULL, buf, nbytes) == -1)
- goto error;
-
- if (!mime && file_printf(ms, ")") == -1)
- goto error;
- rv = 1;
- break;
- }
- }
-error:
- if (newbuf)
- free(newbuf);
- ms->flags |= MAGIC_COMPRESS;
- return rv;
-}
-
-/*
- * `safe' write for sockets and pipes.
- */
-private ssize_t
-swrite(int fd, const void *buf, size_t n)
-{
- int rv;
- size_t rn = n;
-
- do
- switch (rv = write(fd, buf, n)) {
- case -1:
- if (errno == EINTR)
- continue;
- return -1;
- default:
- n -= rv;
- buf = ((const char *)buf) + rv;
- break;
- }
- while (n > 0);
- return rn;
-}
-
-
-/*
- * `safe' read for sockets and pipes.
- */
-protected ssize_t
-sread(int fd, void *buf, size_t n, int canbepipe)
-{
- int rv, cnt;
-#ifdef FIONREAD
- int t = 0;
-#endif
- size_t rn = n;
-
- if (fd == STDIN_FILENO)
- goto nocheck;
-
-#ifdef FIONREAD
- if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) {
-#ifdef FD_ZERO
- for (cnt = 0;; cnt++) {
- struct pollfd pfd[1];
- int rv;
-
- pfd[0].fd = fd;
- pfd[0].events = POLLIN;
-
- /*
- * Avoid soft deadlock: do not read if there
- * is nothing to read from sockets and pipes.
- */
- rv = poll(pfd, 1, 100);
- if (rv == -1) {
- if (errno == EINTR || errno == EAGAIN)
- continue;
- } else if (rv == 0 && cnt >= 5) {
- return 0;
- } else
- break;
- }
-#endif
- (void)ioctl(fd, FIONREAD, &t);
- }
-
- if (t > 0 && (size_t)t < n) {
- n = t;
- rn = n;
- }
-#endif
-
-nocheck:
- do
- switch ((rv = read(fd, buf, n))) {
- case -1:
- if (errno == EINTR)
- continue;
- return -1;
- case 0:
- return rn - n;
- default:
- n -= rv;
- buf = ((char *)buf) + rv;
- break;
- }
- while (n > 0);
- return rn;
-}
-
-protected int
-file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
- size_t nbytes)
-{
- char buf[4096];
- int r, tfd;
-
- (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
- tfd = mkstemp(buf);
- r = errno;
- (void)unlink(buf);
- errno = r;
- if (tfd == -1) {
- file_error(ms, errno,
- "cannot create temporary file for pipe copy");
- return -1;
- }
-
- if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
- r = 1;
- else {
- while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
- if (swrite(tfd, buf, (size_t)r) != r)
- break;
- }
-
- switch (r) {
- case -1:
- file_error(ms, errno, "error copying from pipe to temp file");
- return -1;
- case 0:
- break;
- default:
- file_error(ms, errno, "error while writing to temp file");
- return -1;
- }
-
- /*
- * We duplicate the file descriptor, because fclose on a
- * tmpfile will delete the file, but any open descriptors
- * can still access the phantom inode.
- */
- if ((fd = dup2(tfd, fd)) == -1) {
- file_error(ms, errno, "could not dup descriptor for temp file");
- return -1;
- }
- (void)close(tfd);
- if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- return fd;
-}
-
-#ifdef BUILTIN_DECOMPRESS
-
-#define FHCRC (1 << 1)
-#define FEXTRA (1 << 2)
-#define FNAME (1 << 3)
-#define FCOMMENT (1 << 4)
-
-private size_t
-uncompressgzipped(struct magic_set *ms, const unsigned char *old,
- unsigned char **newch, size_t n)
-{
- unsigned char flg = old[3];
- size_t data_start = 10;
- z_stream z;
- int rc;
-
- if (flg & FEXTRA) {
- if (data_start+1 >= n)
- return 0;
- data_start += 2 + old[data_start] + old[data_start + 1] * 256;
- }
- if (flg & FNAME) {
- while(data_start < n && old[data_start])
- data_start++;
- data_start++;
- }
- if(flg & FCOMMENT) {
- while(data_start < n && old[data_start])
- data_start++;
- data_start++;
- }
- if(flg & FHCRC)
- data_start += 2;
-
- if (data_start >= n)
- return 0;
- if ((*newch = (unsigned char *)malloc(HOWMANY + 1)) == NULL) {
- return 0;
- }
-
- /* XXX: const castaway, via strchr */
- z.next_in = (Bytef *)strchr((const char *)old + data_start,
- old[data_start]);
- z.avail_in = n - data_start;
- z.next_out = *newch;
- z.avail_out = HOWMANY;
- z.zalloc = Z_NULL;
- z.zfree = Z_NULL;
- z.opaque = Z_NULL;
-
- rc = inflateInit2(&z, -15);
- if (rc != Z_OK) {
- file_error(ms, 0, "zlib: %s", z.msg);
- return 0;
- }
-
- rc = inflate(&z, Z_SYNC_FLUSH);
- if (rc != Z_OK && rc != Z_STREAM_END) {
- file_error(ms, 0, "zlib: %s", z.msg);
- return 0;
- }
-
- n = (size_t)z.total_out;
- (void)inflateEnd(&z);
-
- /* let's keep the nul-terminate tradition */
- (*newch)[n] = '\0';
-
- return n;
-}
-#endif
-
-private size_t
-uncompressbuf(struct magic_set *ms, int fd, size_t method,
- const unsigned char *old, unsigned char **newch, size_t n)
-{
- int fdin[2], fdout[2];
- int r;
-
-#ifdef BUILTIN_DECOMPRESS
- /* FIXME: This doesn't cope with bzip2 */
- if (method == 2)
- return uncompressgzipped(ms, old, newch, n);
-#endif
- (void)fflush(stdout);
- (void)fflush(stderr);
-
- if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
- file_error(ms, errno, "cannot create pipe");
- return NODATA;
- }
- switch (fork()) {
- case 0: /* child */
- (void) close(0);
- if (fd != -1) {
- (void) dup(fd);
- (void) lseek(0, (off_t)0, SEEK_SET);
- } else {
- (void) dup(fdin[0]);
- (void) close(fdin[0]);
- (void) close(fdin[1]);
- }
-
- (void) close(1);
- (void) dup(fdout[1]);
- (void) close(fdout[0]);
- (void) close(fdout[1]);
-#ifndef DEBUG
- if (compr[method].silent)
- (void)close(2);
-#endif
-
- (void)execvp(compr[method].argv[0],
- (char *const *)(intptr_t)compr[method].argv);
-#ifdef DEBUG
- (void)fprintf(stderr, "exec `%s' failed (%s)\n",
- compr[method].argv[0], strerror(errno));
-#endif
- exit(1);
- /*NOTREACHED*/
- case -1:
- file_error(ms, errno, "could not fork");
- return NODATA;
-
- default: /* parent */
- (void) close(fdout[1]);
- if (fd == -1) {
- (void) close(fdin[0]);
- /*
- * fork again, to avoid blocking because both
- * pipes filled
- */
- switch (fork()) {
- case 0: /* child */
- (void)close(fdout[0]);
- if (swrite(fdin[1], old, n) != (ssize_t)n) {
-#ifdef DEBUG
- (void)fprintf(stderr,
- "Write failed (%s)\n",
- strerror(errno));
-#endif
- exit(1);
- }
- exit(0);
- /*NOTREACHED*/
-
- case -1:
-#ifdef DEBUG
- (void)fprintf(stderr, "Fork failed (%s)\n",
- strerror(errno));
-#endif
- exit(1);
- /*NOTREACHED*/
-
- default: /* parent */
- break;
- }
- (void) close(fdin[1]);
- fdin[1] = -1;
- }
-
- if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
-#ifdef DEBUG
- (void)fprintf(stderr, "Malloc failed (%s)\n",
- strerror(errno));
-#endif
- n = 0;
- goto err;
- }
- if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
-#ifdef DEBUG
- (void)fprintf(stderr, "Read failed (%s)\n",
- strerror(errno));
-#endif
- free(*newch);
- n = 0;
- newch[0] = '\0';
- goto err;
- } else {
- n = r;
- }
- /* NUL terminate, as every buffer is handled here. */
- (*newch)[n] = '\0';
-err:
- if (fdin[1] != -1)
- (void) close(fdin[1]);
- (void) close(fdout[0]);
-#ifdef WNOHANG
- while (waitpid(-1, NULL, WNOHANG) != -1)
- continue;
-#else
- (void)wait(NULL);
-#endif
- return n;
- }
-}
diff --git a/usr.bin/file/config.h b/usr.bin/file/config.h
deleted file mode 100644
index 1cf2668d168..00000000000
--- a/usr.bin/file/config.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Hand-made config.h file for OpenBSD, so we don't have to run
- * the dratted configure script every time we build this puppy,
- * but can still carefully import stuff from Christos' version.
- *
- * This file is in the public domain. Original Author Ian F. Darwin.
- * $OpenBSD: config.h,v 1.7 2011/07/25 16:21:22 martynas Exp $
- */
-
-/* header file issues. */
-#define HAVE_UNISTD_H 1
-#define HAVE_FCNTL_H 1
-#define HAVE_SYS_WAIT_H 1
-#define HAVE_LOCALE_H 1
-#define HAVE_SYS_STAT_H 1
-#define HAVE_INTTYPES_H 1
-#define HAVE_GETOPT_H 1
-#define HAVE_LIMITS_H 1
-/* #define HAVE_ZLIB_H 1 DO NOT ENABLE YET -- chl */
-/* #define HAVE_LIBZ 1 DO NOT ENABLE YET -- ian */
-
-#define HAVE_STRTOUL
-#define HAVE_STRERROR
-#define HAVE_VSNPRINTF
-#define HAVE_SNPRINTF
-#define HAVE_STRNDUP
-#define HAVE_STRTOF
-
-/* Compiler issues */
-#define SIZEOF_LONG_LONG 8
-
-/* Library issues */
-#define HAVE_GETOPT_LONG 1 /* in-tree as of 3.2 */
-#define HAVE_ST_RDEV 1
-
-/* ELF support */
-#define BUILTIN_ELF 1
-#define ELFCORE 1
diff --git a/usr.bin/file/elfclass.h b/usr.bin/file/elfclass.h
deleted file mode 100644
index 87c9813b43b..00000000000
--- a/usr.bin/file/elfclass.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* $OpenBSD: elfclass.h,v 1.2 2009/04/26 14:17:45 chl Exp $ */
-/*
- * Copyright (c) Christos Zoulas 2008.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- if (nbytes <= sizeof(elfhdr))
- return 0;
-
- u.l = 1;
- (void)memcpy(&elfhdr, buf, sizeof elfhdr);
- swap = (u.c[sizeof(int32_t) - 1] + 1) != elfhdr.e_ident[EI_DATA];
-
- type = elf_getu16(swap, elfhdr.e_type);
- switch (type) {
-#ifdef ELFCORE
- case ET_CORE:
- if (dophn_core(ms, class, swap, fd,
- (off_t)elf_getu(swap, elfhdr.e_phoff),
- elf_getu16(swap, elfhdr.e_phnum),
- (size_t)elf_getu16(swap, elfhdr.e_phentsize),
- fsize, &flags) == -1)
- return -1;
- break;
-#endif
- case ET_EXEC:
- case ET_DYN:
- if (dophn_exec(ms, class, swap, fd,
- (off_t)elf_getu(swap, elfhdr.e_phoff),
- elf_getu16(swap, elfhdr.e_phnum),
- (size_t)elf_getu16(swap, elfhdr.e_phentsize),
- fsize, &flags) == -1)
- return -1;
- /*FALLTHROUGH*/
- case ET_REL:
- if (doshn(ms, class, swap, fd,
- (off_t)elf_getu(swap, elfhdr.e_shoff),
- elf_getu16(swap, elfhdr.e_shnum),
- (size_t)elf_getu16(swap, elfhdr.e_shentsize),
- &flags) == -1)
- return -1;
- break;
-
- default:
- break;
- }
- return 1;
diff --git a/usr.bin/file/file.1 b/usr.bin/file/file.1
index a92a3ecc6c0..be305b2be69 100644
--- a/usr.bin/file/file.1
+++ b/usr.bin/file/file.1
@@ -1,6 +1,7 @@
-.\" $OpenBSD: file.1,v 1.35 2015/02/15 22:26:45 bentley Exp $
+.\" $OpenBSD: file.1,v 1.36 2015/04/24 16:24:11 nicm Exp $
.\" $FreeBSD: src/usr.bin/file/file.1,v 1.16 2000/03/01 12:19:39 sheldonh Exp $
.\"
+.\" Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
.\" Copyright (c) Ian F. Darwin 1986-1995.
.\" Software written by Ian F. Darwin and others;
.\" maintained 1995-present by Christos Zoulas and others.
@@ -27,7 +28,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd $Mdocdate: February 15 2015 $
+.Dd $Mdocdate: April 24 2015 $
.Dt FILE 1
.Os
.Sh NAME
@@ -36,464 +37,83 @@
.Sh SYNOPSIS
.Nm
.Bk -words
-.Op Fl 0bCcehikLNnprsvz
-.Op Fl -help
-.Op Fl -mime-encoding
-.Op Fl -mime-type
-.Op Fl F Ar separator
-.Op Fl f Ar namefile
-.Op Fl m Ar magicfiles
-.Ar file
+.Op Fl bciLsW
+.Ar
.Ek
.Sh DESCRIPTION
The
.Nm
-utility tests each argument in an attempt to classify it.
-There are three sets of tests, performed in this order:
-filesystem tests, magic tests, and language tests.
-The first test that succeeds causes the file type to be printed.
+utility tests each argument and attempts to determine its type.
+Three sets of tests are performed:
+.Bl -enum -offset Ds
+.It
+Filesystem tests, for example if a file is empty, or a special file such as a
+socket or named pipe (FIFO).
+.It
+.Dq Magic
+tests for data in particular fixed formats.
+These are loaded from the
+.Pa /etc/magic
+file (or
+.Pa ~/.magic
+instead if it exists).
+The file format is described in
+.Xr magic 5 .
+.It
+Tests for text files such as plain ASCII or C programming language files.
+.El
.Pp
-The type printed will usually contain one of the words
+The first test which succeeds causes the file type to be printed.
+The type will often contain one of the words
.Em text
-(the file contains only
-printing characters and a few common control
-characters and is probably safe to read on an
-ASCII terminal),
+(contains only printing characters and is probably safe to read on an ASCII
+terminal),
.Em executable
-(the file contains the result of compiling a program
-in a form understandable to some
-.Ux
-kernel or another),
+(the file contains a compiled executable program)
or
.Em data
-meaning anything else (data is usually
-.Dq binary
-or non-printable).
-Exceptions are well-known file formats (core files, tar archives)
-that are known to contain binary data.
-When modifying magic files or the program itself, make sure to
-.Em preserve these keywords .
-Users depend on knowing that all the readable files in a directory
-have the word
-.Dq text
-printed.
-Don't do as Berkeley did and change
-.Dq shell commands text
-to
-.Dq shell script .
-.Pp
-The filesystem tests are based on examining the return from a
-.Xr stat 2
-system call.
-The program checks to see if the file is empty,
-or if it's some sort of special file.
-Any known file types,
-such as sockets, symbolic links, and named pipes (FIFOs),
-are intuited if they are defined in
-the system header file
-.In sys/stat.h .
-.Pp
-The magic tests are used to check for files with data in
-particular fixed formats.
-The canonical example of this is a binary executable (compiled program)
-a.out file, whose format is defined in
-.In elf.h ,
-.In a.out.h ,
-and possibly
-.In exec.h
-in the standard include directory.
-These files have a
-.Dq magic number
-stored in a particular place
-near the beginning of the file that tells the
-.Ux
-operating system
-that the file is a binary executable, and which of several types thereof.
-The concept of a
-.Dq magic
-has been applied by extension to data files.
-Any file with some invariant identifier at a small fixed
-offset into the file can usually be described in this way.
-The information identifying these files is read from the magic file
-.Pa /etc/magic .
-In addition, if
-.Pa $HOME/.magic.mgc
-or
-.Pa $HOME/.magic
-exists, it will be used in preference to the system magic files.
-.Pp
-If a file does not match any of the entries in the magic file,
-it is examined to see if it seems to be a text file.
-ASCII, ISO-8859-x, non-ISO 8-bit extended-ASCII character sets
-(such as those used on Macintosh and IBM PC systems),
-UTF-8-encoded Unicode, UTF-16-encoded Unicode, and EBCDIC
-character sets can be distinguished by the different
-ranges and sequences of bytes that constitute printable text
-in each set.
-If a file passes any of these tests, its character set is reported.
-ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
-as
-.Dq text
-because they will be mostly readable on nearly any terminal;
-UTF-16 and EBCDIC are only
-.Dq character data
-because, while
-they contain text, it is text that will require translation
-before it can be read.
-In addition,
-.Nm
-will attempt to determine other characteristics of text-type files.
-If the lines of a file are terminated by CR, CRLF, or NEL, instead
-of the Unix-standard LF, this will be reported.
-Files that contain embedded escape sequences or overstriking
-will also be identified.
-.Pp
-Once
-.Nm
-has determined the character set used in a text-type file,
-it will
-attempt to determine in what language the file is written.
-The language tests look for particular strings (cf.\&
-.In names.h )
-that can appear anywhere in the first few blocks of a file.
-For example, the keyword
-.Em .br
-indicates that the file is most likely a
-troff input file, just as the keyword
-.Em struct
-indicates a C program.
-These tests are less reliable than the previous
-two groups, so they are performed last.
-The language test routines also test for some miscellany
-(such as
-.Xr tar 1
-archives).
-.Pp
-Any file that cannot be identified as having been written
-in any of the character sets listed above is simply said to be
-.Dq data .
+meaning anything else.
.Sh OPTIONS
.Bl -tag -width indent
-.It Fl 0 , -print0
-Output a null character
-.Sq \e0
-after the end of the filename.
-Nice to
-.Xr cut 1
-the output.
-This does not affect the separator which is still printed.
-.It Fl b , -brief
-Do not prepend filenames to output lines (brief mode).
-.It Fl C , -compile
-Write a
-.Pa magic.mgc
-output file that contains a pre-parsed version of the magic file or directory.
-.It Fl c , -checking-printout
-Cause a checking printout of the parsed form of the magic file.
-This is usually used in conjunction with the
-.Fl m
-flag to debug a new magic file before installing it.
-.It Fl e , -exclude Ar testname
-Exclude the test named in
-.Ar testname
-from the list of tests made to determine the file type.
-Valid test names are:
-.Bl -tag -width compress
-.It apptype
-Check for
-.Dv EMX
-application type (only on EMX).
-.It ascii
-Check for various types of ASCII files.
-.It compress
-Don't look for, or inside, compressed files.
-.It elf
-Don't print elf details.
-.It fortran
-Don't look for fortran sequences inside ASCII files.
-.It soft
-Don't consult magic files.
-.It tar
-Don't examine tar files.
-.It token
-Don't look for known tokens inside ASCII files.
-.It troff
-Don't look for troff sequences inside ASCII files.
-.El
-.It Fl F , -separator Ar separator
-Use the specified string as the separator between the filename and the
-file result returned.
-Defaults to
-.Sq \&: .
-.It Fl f , -files-from Ar namefile
-Read the names of the files to be examined from
-.Ar namefile
-(one per line)
-before the argument list.
-Either
-.Ar namefile
-or at least one filename argument must be present;
-to test the standard input, use
-.Sq -
-as a filename argument.
-.It Fl h , -no-dereference
-Causes symlinks not to be followed.
-This is the default if the environment variable
-.Dv POSIXLY_CORRECT
-is not defined.
-.It Fl -help
-Print a help message and exit.
-.It Fl i , -mime
-Causes the file command to output mime type strings rather than the more
-traditional human readable ones.
+.It Fl b
+Do not prepend filenames to output lines.
+.It Fl c
+Print a summary of the parsed magic file, usually used for debugging.
+.It Fl i , -mime , -mime-type
+Causes the file command to output MIME type strings rather than the more
+traditional human-readable ones.
Thus it may say
-.Dq text/plain charset=us-ascii
+.Dq text/plain
rather than
.Dq ASCII text .
-In order for this option to work,
+.It Fl L
+Causes symlinks to be followed.
+.It Fl s
+Instructs
.Nm
-changes the way it handles files recognized by the command itself
-(such as many of the text file types, directories etc.),
-and makes use of an alternative
-.Dq magic
-file.
-See also
-.Sx FILES ,
-below.
-.It Fl -mime-encoding , -mime-type
-Like
-.Fl i ,
-but print only the specified element(s).
-.It Fl k , -keep-going
-Don't stop at the first match, keep going.
-Subsequent matches will have the string
-.Dq "\[rs]012\- "
-prepended.
-(If a newline is required, see the
-.Fl r
-option.)
-.It Fl L , -dereference
-Causes symlinks to be followed;
-analogous to the option of the same name in
-.Xr ls 1 .
-This is the default if the environment variable
-.Dv POSIXLY_CORRECT
-is defined.
-.It Fl m , -magic-file Ar magicfiles
-Specify an alternate list of files and directories containing magic.
-This can be a single item, or a colon-separated list.
-If a compiled magic file is found alongside a file or directory,
-it will be used instead.
-.It Fl N , -no-pad
-Don't pad filenames so that they align in the output.
-.It Fl n , -no-buffer
-Force stdout to be flushed after checking each file.
-This is only useful if checking a list of files.
-It is intended to be used by programs that want filetype output from a pipe.
-.It Fl p , -preserve-date
-On systems that support
-.Xr utime 3
-or
-.Xr utimes 2 ,
-attempt to preserve the access time of files analyzed, to pretend that
-.Nm
-never read them.
-.It Fl r , -raw
-Don't translate unprintable characters to \eooo.
-Normally
-.Nm
-translates unprintable characters to their octal representation.
-.It Fl s , -special-files
-Normally,
-.Nm
-only attempts to read and determine the type of argument files which
+to attempt to read all files, not only those which
.Xr stat 2
reports are ordinary files.
-This prevents problems, because reading special files may have peculiar
-consequences.
-Specifying the
-.Fl s
-option causes
-.Nm
-to also read argument files which are block or character special files.
-This is useful for determining the filesystem types of the data in raw
-disk partitions, which are block special files.
-This option also causes
-.Nm
-to disregard the file size as reported by
-.Xr stat 2
-since on some systems it reports a zero size for raw disk partitions.
-.It Fl v , -version
-Print the version of the program and exit.
-.It Fl z , -uncompress
-Try to look inside compressed files.
+.It Fl W
+Display warnings when parsing the magic file or applying its tests.
+Usually used for debugging.
.El
-.Sh ENVIRONMENT
-The environment variable
-.Dv MAGIC
-can be used to set the default magic file name.
-If that variable is set, then
-.Nm
-will not attempt to open
-.Pa $HOME/.magic .
-.Nm
-adds
-.Dq .mgc
-to the value of this variable as appropriate.
-The environment variable
-.Dv POSIXLY_CORRECT
-controls whether
-.Nm
-will attempt to follow symlinks or not.
-If set, then
-.Nm
-follows symlinks; otherwise it does not.
-This is also controlled by the
-.Fl L
-and
-.Fl h
-options.
.Sh FILES
.Bl -tag -width /etc/magic -compact
.It Pa /etc/magic
-default list of magic numbers
+default magic file
.El
.Sh EXIT STATUS
.Ex -std file
.Sh SEE ALSO
-.Xr hexdump 1 ,
-.Xr od 1 ,
-.Xr strings 1 ,
.Xr magic 5
-.Sh STANDARDS CONFORMANCE
-This program is believed to exceed the System V Interface Definition
-of FILE(CMD), as near as one can determine from the vague language
-contained therein.
-Its behavior is mostly compatible with the System V program of the same name.
-This version knows more magic, however, so it will produce
-different (albeit more accurate) output in many cases.
-.\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html
-.Pp
-The one significant difference
-between this version and System V
-is that this version treats any whitespace
-as a delimiter, so that spaces in pattern strings must be escaped.
-For example,
-.Bd -literal -offset indent
-\*(Gt10 string language impress\ (imPRESS data)
-.Ed
-.Pp
-in an existing magic file would have to be changed to
-.Bd -literal -offset indent
-\*(Gt10 string language\e impress (imPRESS data)
-.Ed
-.Pp
-In addition, in this version, if a pattern string contains a backslash,
-it must be escaped.
-For example
-.Bd -literal -offset indent
-0 string \ebegindata Andrew Toolkit document
-.Ed
-.Pp
-in an existing magic file would have to be changed to
-.Bd -literal -offset indent
-0 string \e\ebegindata Andrew Toolkit document
-.Ed
-.Pp
-SunOS releases 3.2 and later from Sun Microsystems include a
+.Sh AUTHORS
.Nm
-command derived from the System V one, but with some extensions.
-This version differs from Sun's only in minor ways.
-It includes the extension of the
-.Sq &
-operator, used as,
-for example,
-.Bd -literal -offset indent
-\*(Gt16 long&0x7fffffff \*(Gt0 not stripped
-.Ed
-.Sh HISTORY
-There has been a
-.Nm
-command in every
-.Ux
-since at least Research Version 4
-(man page dated November, 1973).
-The System V version introduced one significant major change:
-the external list of magic types.
-This slowed the program down slightly but made it a lot more flexible.
-.Pp
-This program, based on the System V version,
-was written by Ian Darwin
-without looking at anybody else's source code.
-.Pp
-John Gilmore revised the code extensively, making it better than
-the first version.
-Geoff Collyer found several inadequacies
-and provided some magic file entries.
-Contributions by the `&' operator by Rob McMahon, 1989.
-.Pp
-Guy Harris, made many changes from 1993 to the present.
-.Pp
-Primary development and maintenance from 1990 to the present by
-Christos Zoulas.
-.Pp
-Altered by Chris Lowth, 2000:
-Handle the
-.Fl i
-option to output mime type strings, using an alternative
-magic file and internal logic.
-.Pp
-Altered by Eric Fischer, July, 2000,
-to identify character codes and attempt to identify the languages
-of non-ASCII files.
-.Pp
-Altered by Reuben Thomas, 2007 to 2008, to improve MIME
-support and merge MIME and non-MIME magic, support directories as well
-as files of magic, apply many bug fixes and improve the build system.
-.Pp
-The list of contributors to the
-.Dq magic
-directory (magic files)
-is too long to include here.
-You know who you are; thank you.
-Many contributors are listed in the source files.
-.Sh BUGS
-There must be a better way to automate the construction of the Magic
-file from all the glop in Magdir.
-What is it?
-.Pp
-.Nm
-uses several algorithms that favor speed over accuracy,
-thus it can be misled about the contents of
-text
-files.
-.Pp
-The support for text files (primarily for programming languages)
-is simplistic, inefficient and requires recompilation to update.
-.Pp
-The list of keywords in
-.Pa ascmagic
-probably belongs in the Magic file.
-This could be done by using some keyword like
-.Sq *
-for the offset value.
-.Pp
-Complain about conflicts in the magic file entries.
-Make a rule that the magic entries sort based on file offset rather
-than position within the magic file?
-.Pp
-The program should provide a way to give an estimate
-of
-.Dq how good
-a guess is.
-We end up removing guesses (e.g.
-.Dq From\
-as first 5 chars of file) because
-they are not as good as other guesses (e.g.\&
-.Dq Newsgroups:
-versus
-.Dq Return-Path: ) .
-Still, if the others don't pan out, it should be possible to use the
-first guess.
+commands have appeared in many previous versions of
+.Ux .
+This version was written by Nicholas Marriott for
+.Ox 5.8
+to replace the previous version originally written by Ian Darwin.
.Pp
-This manual page, and particularly this section, is too long.
+There is a large number of contributors to the magic files; many are listed in
+the source files.
diff --git a/usr.bin/file/file.c b/usr.bin/file/file.c
index fcd6c34cd5f..d83a1f4515f 100644
--- a/usr.bin/file/file.c
+++ b/usr.bin/file/file.c
@@ -1,475 +1,513 @@
-/* $OpenBSD: file.c,v 1.26 2015/01/16 18:08:15 millert Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
+/* $OpenBSD: file.c,v 1.27 2015/04/24 16:24:11 nicm Exp $ */
+
/*
- * file - find type of a file or files - main program.
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "file.h"
-#include "magic.h"
+#include <sys/ioctl.h>
+#include <sys/mman.h>
-#include <stdio.h>
+#include <errno.h>
+#include <libgen.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <pwd.h>
#include <stdlib.h>
#include <unistd.h>
-#include <limits.h>
-#include <string.h>
-#ifdef RESTORE_TIME
-# if (__COHERENT__ >= 0x420)
-# include <sys/utime.h>
-# else
-# ifdef USE_UTIMES
-# include <sys/time.h>
-# else
-# include <utime.h>
-# endif
-# endif
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h> /* for read() */
-#endif
-#ifdef HAVE_LOCALE_H
-#include <locale.h>
-#endif
-#ifdef HAVE_WCHAR_H
-#include <wchar.h>
-#endif
-#include <getopt.h>
-#ifndef HAVE_GETOPT_LONG
-int getopt_long(int argc, char * const *argv, const char *optstring, const struct option *longopts, int *longindex);
-#endif
+#include "file.h"
+#include "magic.h"
+#include "xmalloc.h"
-#include <netinet/in.h> /* for byte swapping */
+struct input_file
+{
+ struct magic *m;
-#include "patchlevel.h"
+ const char *path;
+ const char *label;
+ int fd;
+ struct stat sb;
+ const char *error;
-#ifdef S_IFLNK
-#define SYMLINKFLAG "Lh"
-#else
-#define SYMLINKFLAG ""
-#endif
+ void *base;
+ size_t size;
+ int mapped;
+ char *result;
-# define USAGE "Usage: %s [-bcik" SYMLINKFLAG "nNprsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n" \
- " %s -C -m magicfiles\n"
+ char link_path[PATH_MAX];
+ const char *link_error;
+ int link_target;
+};
-private int /* Global command-line options */
- bflag = 0, /* brief output format */
- nopad = 0, /* Don't pad output */
- nobuffer = 0, /* Do not buffer stdout */
- nulsep = 0; /* Append '\0' to the separator */
+extern char *__progname;
-private const char *magicfile = 0; /* where the magic is */
-private const char *default_magicfile = MAGIC;
-private const char *separator = ":"; /* Default field separator */
+__dead void usage(void);
-extern char *__progname; /* used throughout */
+static void open_file(struct input_file *, const char *, int *);
+static void read_link(struct input_file *);
+static void test_file(struct magic *, struct input_file *, int);
-private struct magic_set *magic;
+static int try_stat(struct input_file *);
+static int try_empty(struct input_file *);
+static int try_access(struct input_file *);
+static int try_text(struct input_file *);
+static int try_magic(struct input_file *);
+static int try_unknown(struct input_file *);
-private void unwrap(char *);
-private void usage(void);
-private void help(void);
+static int bflag;
+static int cflag;
+static int iflag;
+static int Lflag;
+static int sflag;
+static int Wflag;
-int main(int, char *[]);
-private void process(const char *, int);
-private void load(const char *, int);
+static struct option longopts[] = {
+ { "mime", no_argument, NULL, 'i' },
+ { "mime-type", no_argument, NULL, 'i' },
+ { NULL, 0, NULL, 0 }
+};
+__dead void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-bchiLsW] [file ...]\n", __progname);
+ exit(1);
+}
-/*
- * main - parse arguments and handle options
- */
int
-main(int argc, char *argv[])
+main(int argc, char **argv)
{
- int c;
- size_t i;
- int action = 0, didsomefiles = 0, errflg = 0;
- int flags = 0;
- char *home, *usermagic;
- struct stat sb;
- static const char hmagic[] = "/.magic";
-#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0"
- int longindex;
- static const struct option long_options[] =
- {
-#define OPT(shortname, longname, opt, doc) \
- {longname, opt, NULL, shortname},
-#define OPT_LONGONLY(longname, opt, doc) \
- {longname, opt, NULL, 0},
-#include "file_opts.h"
-#undef OPT
-#undef OPT_LONGONLY
- {0, 0, NULL, 0}
-};
-
- static const struct {
- const char *name;
- int value;
- } nv[] = {
- { "apptype", MAGIC_NO_CHECK_APPTYPE },
- { "ascii", MAGIC_NO_CHECK_ASCII },
- { "compress", MAGIC_NO_CHECK_COMPRESS },
- { "elf", MAGIC_NO_CHECK_ELF },
- { "soft", MAGIC_NO_CHECK_SOFT },
- { "tar", MAGIC_NO_CHECK_TAR },
- { "tokens", MAGIC_NO_CHECK_TOKENS },
- };
-
- /* makes islower etc work for other langs */
- (void)setlocale(LC_CTYPE, "");
-
-#ifdef __EMX__
- /* sh-like wildcard expansion! Shouldn't hurt at least ... */
- _wildcard(&argc, &argv);
-#endif
-
- magicfile = default_magicfile;
- if ((usermagic = getenv("MAGIC")) != NULL)
- magicfile = usermagic;
- else
- if ((home = getenv("HOME")) != NULL) {
- size_t len = strlen(home) + sizeof(hmagic);
- if ((usermagic = malloc(len)) != NULL) {
- (void)strlcpy(usermagic, home, len);
- (void)strlcat(usermagic, hmagic, len);
- if (stat(usermagic, &sb)<0)
- free(usermagic);
- else
- magicfile = usermagic;
- }
- }
-
-#ifdef S_IFLNK
- flags |= getenv("POSIXLY_CORRECT") ? MAGIC_SYMLINK : 0;
-#endif
- while ((c = getopt_long(argc, argv, OPTSTRING, long_options,
- &longindex)) != -1)
- switch (c) {
- case 0 :
- switch (longindex) {
- case 0:
- help();
- break;
- case 10:
- flags |= MAGIC_MIME_TYPE;
- break;
- case 11:
- flags |= MAGIC_MIME_ENCODING;
- break;
- }
- break;
- case '0':
- nulsep = 1;
+ struct input_file *files = NULL;
+ int nfiles, opt, i, width = 0;
+ FILE *f;
+ struct magic *m;
+ char *home, *path;
+ struct passwd *pw;
+
+ for (;;) {
+ opt = getopt_long(argc, argv, "bchiLsW", longopts, NULL);
+ if (opt == -1)
break;
+ switch (opt) {
case 'b':
- bflag++;
+ bflag = 1;
break;
case 'c':
- action = FILE_CHECK;
- break;
- case 'C':
- action = FILE_COMPILE;
- break;
- case 'd':
- flags |= MAGIC_DEBUG|MAGIC_CHECK;
- break;
- case 'e':
- for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++)
- if (strcmp(nv[i].name, optarg) == 0)
- break;
-
- if (i == sizeof(nv) / sizeof(nv[0]))
- errflg++;
- else
- flags |= nv[i].value;
- break;
-
- case 'f':
- if(action)
- usage();
- load(magicfile, flags);
- unwrap(optarg);
- ++didsomefiles;
+ cflag = 1;
break;
- case 'F':
- separator = optarg;
+ case 'h':
+ Lflag = 0;
break;
case 'i':
- flags |= MAGIC_MIME;
- break;
- case 'k':
- flags |= MAGIC_CONTINUE;
- break;
- case 'm':
- magicfile = optarg;
- break;
- case 'n':
- ++nobuffer;
+ iflag = 1;
break;
- case 'N':
- ++nopad;
- break;
-#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
- case 'p':
- flags |= MAGIC_PRESERVE_ATIME;
- break;
-#endif
- case 'r':
- flags |= MAGIC_RAW;
+ case 'L':
+ Lflag = 1;
break;
case 's':
- flags |= MAGIC_DEVICES;
- break;
- case 'v':
- (void)fprintf(stderr, "%s-%d.%.2d\n", __progname,
- FILE_VERSION_MAJOR, patchlevel);
- (void)fprintf(stderr, "magic file from %s\n",
- magicfile);
- return 1;
- case 'z':
- flags |= MAGIC_COMPRESS;
+ sflag = 1;
break;
-#ifdef S_IFLNK
- case 'L':
- flags |= MAGIC_SYMLINK;
- break;
- case 'h':
- flags &= ~MAGIC_SYMLINK;
+ case 'W':
+ Wflag = 1;
break;
-#endif
- case '?':
default:
- errflg++;
- break;
+ usage();
}
-
- if (errflg) {
- usage();
}
+ argc -= optind;
+ argv += optind;
+ if (cflag) {
+ if (argc != 0)
+ usage();
+ } else if (argc == 0)
+ usage();
- switch(action) {
- case FILE_CHECK:
- case FILE_COMPILE:
- magic = magic_open(flags|MAGIC_CHECK);
- if (magic == NULL) {
- (void)fprintf(stderr, "%s: %s\n", __progname,
- strerror(errno));
- return 1;
- }
- c = action == FILE_CHECK ? magic_check(magic, magicfile) :
- magic_compile(magic, magicfile);
- if (c == -1) {
- (void)fprintf(stderr, "%s: %s\n", __progname,
- magic_error(magic));
- return -1;
- }
- return 0;
- default:
- load(magicfile, flags);
- break;
+ nfiles = argc;
+ if (nfiles != 0) {
+ files = xcalloc(nfiles, sizeof *files);
+ for (i = 0; i < argc; i++)
+ open_file(&files[i], argv[i], &width);
}
- if (optind == argc) {
- if (!didsomefiles) {
- usage();
- }
+ home = getenv("HOME");
+ if (home == NULL || *home == '\0') {
+ pw = getpwuid(getuid());
+ if (pw != NULL)
+ home = pw->pw_dir;
+ else
+ home = NULL;
}
- else {
- size_t j, wid, nw;
- for (wid = 0, j = (size_t)optind; j < (size_t)argc; j++) {
- nw = file_mbswidth(argv[j]);
- if (nw > wid)
- wid = nw;
- }
- /*
- * If bflag is only set twice, set it depending on
- * number of files [this is undocumented, and subject to change]
- */
- if (bflag == 2) {
- bflag = optind >= argc - 1;
- }
- for (; optind < argc; optind++)
- process(argv[optind], wid);
+ if (home != NULL) {
+ xasprintf(&path, "%s/.magic", home);
+ f = fopen(path, "r");
+ } else
+ f = NULL;
+ if (f == NULL) {
+ path = xstrdup("/etc/magic");
+ f = fopen(path, "r");
+ }
+ if (f == NULL)
+ err(1, "%s", path);
+
+ if (geteuid() == 0) {
+ pw = getpwnam(FILE_USER);
+ if (pw == NULL)
+ errx(1, "unknown user %s", FILE_USER);
+ if (setgroups(1, &pw->pw_gid) != 0)
+ err(1, "setgroups");
+ if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0)
+ err(1, "setresgid");
+ if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0)
+ err(1, "setresuid");
+ }
+
+ m = magic_load(f, path, cflag || Wflag);
+ if (cflag) {
+ magic_dump(m);
+ exit(0);
}
- c = magic->haderr ? 1 : 0;
- magic_close(magic);
- return c;
+ for (i = 0; i < nfiles; i++)
+ test_file(m, &files[i], width);
+ exit(0);
}
+static void
+open_file(struct input_file *inf, const char *path, int *width)
+{
+ char *label;
+ int n, retval;
+
+ inf->path = xstrdup(path);
-private void
-/*ARGSUSED*/
-load(const char *m, int flags)
+ n = xasprintf(&label, "%s:", inf->path);
+ if (n > *width)
+ *width = n;
+ inf->label = label;
+
+ retval = lstat(inf->path, &inf->sb);
+ if (retval == -1) {
+ inf->error = strerror(errno);
+ return;
+ }
+
+ if (S_ISLNK(inf->sb.st_mode))
+ read_link(inf);
+ inf->fd = open(inf->path, O_RDONLY|O_NONBLOCK);
+}
+
+static void
+read_link(struct input_file *inf)
{
- if (magic || m == NULL)
+ struct stat sb;
+ char path[PATH_MAX];
+ char *copy, *root;
+ int used;
+ ssize_t size;
+
+ size = readlink(inf->path, path, sizeof path);
+ if (size == -1) {
+ inf->link_error = strerror(errno);
return;
- magic = magic_open(flags);
- if (magic == NULL) {
- (void)fprintf(stderr, "%s: %s\n", __progname, strerror(errno));
- exit(1);
}
- if (magic_load(magic, magicfile) == -1) {
- (void)fprintf(stderr, "%s: %s\n",
- __progname, magic_error(magic));
- exit(1);
+ path[size] = '\0';
+
+ if (*path == '/')
+ strlcpy(inf->link_path, path, sizeof inf->link_path);
+ else {
+ copy = xstrdup(inf->path);
+
+ root = dirname(copy);
+ if (*root == '\0' || strcmp(root, ".") == 0 ||
+ strcmp (root, "/") == 0)
+ strlcpy(inf->link_path, path, sizeof inf->link_path);
+ else {
+ used = snprintf(inf->link_path, sizeof inf->link_path,
+ "%s/%s", root, path);
+ if (used < 0 || (size_t)used >= sizeof inf->link_path) {
+ inf->link_error = strerror(ENAMETOOLONG);
+ return;
+ }
+ }
+
+ free(copy);
+ }
+
+ if (Lflag) {
+ if (stat(inf->path, &inf->sb) == -1)
+ inf->error = strerror(errno);
+ } else {
+ if (stat(inf->link_path, &sb) == -1)
+ inf->link_target = errno;
}
}
-/*
- * unwrap -- read a file of filenames, do each one.
- */
-private void
-unwrap(char *fn)
+static void *
+fill_buffer(struct input_file *inf)
{
- char buf[PATH_MAX];
- FILE *f;
- int wid = 0, cwid;
+ static void *buffer;
+ ssize_t got;
+ size_t left;
+ void *next;
+
+ if (buffer == NULL)
+ buffer = xmalloc(FILE_READ_SIZE);
+
+ next = buffer;
+ left = inf->size;
+ while (left != 0) {
+ got = read(inf->fd, next, left);
+ if (got == -1) {
+ if (errno == EINTR)
+ continue;
+ return NULL;
+ }
+ if (got == 0)
+ break;
+ next = (char*)next + got;
+ left -= got;
+ }
- if (strcmp("-", fn) == 0) {
- f = stdin;
- wid = 1;
- } else {
- if ((f = fopen(fn, "r")) == NULL) {
- (void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
- __progname, fn, strerror(errno));
- exit(1);
+ return buffer;
+}
+
+static int
+load_file(struct input_file *inf)
+{
+ int available;
+
+ inf->size = inf->sb.st_size;
+ if (inf->size > FILE_READ_SIZE)
+ inf->size = FILE_READ_SIZE;
+ if (S_ISFIFO(inf->sb.st_mode)) {
+ if (ioctl(inf->fd, FIONREAD, &available) == -1) {
+ xasprintf(&inf->result, "cannot read '%s' (%s)",
+ inf->path, strerror(errno));
+ return (1);
+ }
+ inf->size = available;
+ } else if (!S_ISREG(inf->sb.st_mode) && inf->size == 0)
+ inf->size = FILE_READ_SIZE;
+ if (inf->size == 0)
+ return (0);
+
+ inf->base = mmap(NULL, inf->size, PROT_READ, MAP_PRIVATE, inf->fd, 0);
+ if (inf->base == MAP_FAILED) {
+ inf->base = fill_buffer(inf);
+ if (inf->base == NULL) {
+ xasprintf(&inf->result, "cannot read '%s' (%s)",
+ inf->path, strerror(errno));
+ return (1);
}
+ } else
+ inf->mapped = 1;
+ return (0);
+}
- while (fgets(buf, sizeof(buf), f) != NULL) {
- buf[strcspn(buf, "\n")] = '\0';
- cwid = file_mbswidth(buf);
- if (cwid > wid)
- wid = cwid;
+static int
+try_stat(struct input_file *inf)
+{
+ if (inf->error != NULL) {
+ xasprintf(&inf->result, "cannot stat '%s' (%s)", inf->path,
+ inf->error);
+ return (1);
+ }
+ if (sflag) {
+ switch (inf->sb.st_mode & S_IFMT) {
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFIFO:
+ case S_IFREG:
+ return (0);
}
+ }
- rewind(f);
+ if (iflag && (inf->sb.st_mode & S_IFMT) != S_IFREG) {
+ xasprintf(&inf->result, "application/x-not-regular-file");
+ return (1);
}
- while (fgets(buf, sizeof(buf), f) != NULL) {
- buf[strcspn(buf, "\n")] = '\0';
- process(buf, wid);
- if(nobuffer)
- (void)fflush(stdout);
+
+ switch (inf->sb.st_mode & S_IFMT) {
+ case S_IFDIR:
+ xasprintf(&inf->result, "directory");
+ return (1);
+ case S_IFLNK:
+ if (inf->link_error != NULL) {
+ xasprintf(&inf->result, "unreadable symlink '%s' (%s)",
+ inf->path, inf->link_error);
+ return (1);
+ }
+ if (inf->link_target == ELOOP)
+ xasprintf(&inf->result, "symbolic link in a loop");
+ else if (inf->link_target != 0) {
+ xasprintf(&inf->result, "broken symbolic link to '%s'",
+ inf->link_path);
+ } else {
+ xasprintf(&inf->result, "symbolic link to '%s'",
+ inf->link_path);
+ }
+ return (1);
+ case S_IFSOCK:
+ xasprintf(&inf->result, "socket");
+ return (1);
+ case S_IFBLK:
+ xasprintf(&inf->result, "block special (%ld/%ld)",
+ (long)major(inf->sb.st_rdev), (long)minor(inf->sb.st_rdev));
+ return (1);
+ case S_IFCHR:
+ xasprintf(&inf->result, "character special (%ld/%ld)",
+ (long)major(inf->sb.st_rdev), (long)minor(inf->sb.st_rdev));
+ return (1);
+ case S_IFIFO:
+ xasprintf(&inf->result, "fifo (named pipe)");
+ return (1);
}
+ return (0);
+}
+
+static int
+try_empty(struct input_file *inf)
+{
+ if (inf->size != 0)
+ return (0);
- (void)fclose(f);
+ if (iflag)
+ xasprintf(&inf->result, "application/x-empty");
+ else
+ xasprintf(&inf->result, "empty");
+ return (1);
}
-/*
- * Called for each input file on the command line (or in a list of files)
- */
-private void
-process(const char *inname, int wid)
+static int
+try_access(struct input_file *inf)
{
- const char *type;
- int std_in = strcmp(inname, "-") == 0;
+ char tmp[256] = "";
+
+ if (inf->fd != -1)
+ return (0);
+
+ if (inf->sb.st_mode & 0222)
+ strlcat(tmp, "writable, ", sizeof tmp);
+ if (inf->sb.st_mode & 0111)
+ strlcat(tmp, "executable, ", sizeof tmp);
+ if (S_ISREG(inf->sb.st_mode))
+ strlcat(tmp, "regular file, ", sizeof tmp);
+ strlcat(tmp, "no read permission", sizeof tmp);
+
+ inf->result = xstrdup(tmp);
+ return (1);
+}
+
+static int
+try_text(struct input_file *inf)
+{
+ const char *type, *s;
+ int flags;
+
+ flags = MAGIC_TEST_TEXT;
+ if (iflag)
+ flags |= MAGIC_TEST_MIME;
+
+ type = text_get_type(inf->base, inf->size);
+ if (type == NULL)
+ return (0);
+
+ s = magic_test(inf->m, inf->base, inf->size, flags);
+ if (s != NULL) {
+ inf->result = xstrdup(s);
+ return (1);
+ }
- if (wid > 0 && !bflag) {
- (void)printf("%s", std_in ? "/dev/stdin" : inname);
- if (nulsep)
- (void)putc('\0', stdout);
+ s = text_try_words(inf->base, inf->size, flags);
+ if (s != NULL) {
+ if (iflag)
+ inf->result = xstrdup(s);
else
- (void)printf("%s", separator);
- (void)printf("%*s ",
- (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
+ xasprintf(&inf->result, "%s %s text", type, s);
+ return (1);
}
- type = magic_file(magic, std_in ? NULL : inname);
- if (type == NULL)
- (void)printf("ERROR: %s\n", magic_error(magic));
+ if (iflag)
+ inf->result = xstrdup("text/plain");
else
- (void)printf("%s\n", type);
+ xasprintf(&inf->result, "%s text", type);
+ return (1);
}
-size_t
-file_mbswidth(const char *s)
+static int
+try_magic(struct input_file *inf)
{
-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
- size_t bytesconsumed, old_n, n, width = 0;
- mbstate_t state;
- wchar_t nextchar;
- (void)memset(&state, 0, sizeof(mbstate_t));
- old_n = n = strlen(s);
- int w;
-
- while (n > 0) {
- bytesconsumed = mbrtowc(&nextchar, s, n, &state);
- if (bytesconsumed == (size_t)(-1) ||
- bytesconsumed == (size_t)(-2)) {
- /* Something went wrong, return something reasonable */
- return old_n;
- }
- if (s[0] == '\n') {
- /*
- * do what strlen() would do, so that caller
- * is always right
- */
- width++;
- } else {
- w = wcwidth(nextchar);
- if (w > 0)
- width += w;
- }
+ const char *s;
+ int flags;
+
+ flags = 0;
+ if (iflag)
+ flags |= MAGIC_TEST_MIME;
- s += bytesconsumed, n -= bytesconsumed;
+ s = magic_test(inf->m, inf->base, inf->size, flags);
+ if (s != NULL) {
+ inf->result = xstrdup(s);
+ return (1);
}
- return width;
-#else
- return strlen(s);
-#endif
+ return (0);
}
-private void
-usage(void)
+static int
+try_unknown(struct input_file *inf)
{
- (void)fprintf(stderr, USAGE, __progname, __progname);
- (void)fputs("Try `file --help' for more information.\n", stderr);
- exit(1);
+ if (iflag)
+ xasprintf(&inf->result, "application/x-not-regular-file");
+ else
+ xasprintf(&inf->result, "data");
+ return (1);
}
-private void
-help(void)
+static void
+test_file(struct magic *m, struct input_file *inf, int width)
{
- (void)fputs(
-"Usage: file [OPTION...] [FILE...]\n"
-"Determine type of FILEs.\n"
-"\n", stderr);
-#define OPT(shortname, longname, opt, doc) \
- fprintf(stderr, " -%c, --" longname doc, shortname);
-#define OPT_LONGONLY(longname, opt, doc) \
- fprintf(stderr, " --" longname doc);
-#include "file_opts.h"
-#undef OPT
-#undef OPT_LONGONLY
- exit(0);
+ int stop;
+
+ inf->m = m;
+
+ stop = 0;
+ if (!stop)
+ stop = try_stat(inf);
+ if (!stop)
+ stop = try_access(inf);
+ if (!stop)
+ stop = load_file(inf);
+ if (!stop)
+ stop = try_empty(inf);
+ if (!stop)
+ stop = try_magic(inf);
+ if (!stop)
+ stop = try_text(inf);
+ if (!stop)
+ stop = try_unknown(inf);
+
+ if (bflag)
+ printf("%s\n", inf->result);
+ else
+ printf("%-*s %s\n", width, inf->label, inf->result);
+
+ if (inf->mapped && inf->base != NULL)
+ munmap(inf->base, inf->size);
+ inf->base = NULL;
+
+ free(inf->result);
}
diff --git a/usr.bin/file/file.h b/usr.bin/file/file.h
index 27229b1a6c3..77b6e85da3c 100644
--- a/usr.bin/file/file.h
+++ b/usr.bin/file/file.h
@@ -1,386 +1,32 @@
-/* $OpenBSD: file.h,v 1.24 2014/05/18 17:50:11 espie Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
+/* $OpenBSD: file.h,v 1.25 2015/04/24 16:24:11 nicm Exp $ */
+
/*
- * file.h - definitions for file(1) program
- * @(#)$Id: file.h,v 1.24 2014/05/18 17:50:11 espie Exp $
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#ifndef __file_h__
-#define __file_h__
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h> /* Include that here, to make sure __P gets defined */
-#include <errno.h>
-#include <fcntl.h> /* For open and flags */
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-#include <regex.h>
-#include <sys/types.h>
-/* Do this here and now, because struct stat gets re-defined on solaris */
-#include <sys/stat.h>
-#include <stdarg.h>
-
-#define ENABLE_CONDITIONALS
-
-#ifndef MAGIC
-#define MAGIC "/etc/magic"
-#endif
-
-#ifdef __EMX__
-#define PATHSEP ';'
-#else
-#define PATHSEP ':'
-#endif
-
-#define private static
-#ifndef protected
-#define protected
-#endif
-#define public
-
-#ifndef __GNUC_PREREQ__
-#ifdef __GNUC__
-#define __GNUC_PREREQ__(x, y) \
- ((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) || \
- (__GNUC__ > (x)))
-#else
-#define __GNUC_PREREQ__(x, y) 0
-#endif
-#endif
-
-#ifndef MIN
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-#endif
-
-#ifndef MAX
-#define MAX(a,b) (((a) > (b)) ? (a) : (b))
-#endif
-
-#ifndef HOWMANY
-# define HOWMANY (256 * 1024) /* how much of the file to look at */
-#endif
-#define MAXMAGIS 8192 /* max entries in any one magic file
- or directory */
-#define MAXDESC 64 /* max leng of text description/MIME type */
-#define MAXstring 32 /* max leng of "string" types */
-
-#define MAGICNO 0xF11E041C
-#define VERSIONNO 5
-#define FILE_MAGICSIZE (32 * 6)
-
-#define FILE_LOAD 0
-#define FILE_CHECK 1
-#define FILE_COMPILE 2
-
-struct magic {
- /* Word 1 */
- uint16_t cont_level; /* level of ">" */
- uint8_t flag;
-#define INDIR 0x01 /* if '(...)' appears */
-#define OFFADD 0x02 /* if '>&' or '>...(&' appears */
-#define INDIROFFADD 0x04 /* if '>&(' appears */
-#define UNSIGNED 0x08 /* comparison is unsigned */
-#define NOSPACE 0x10 /* suppress space character before output */
-#define BINTEST 0x20 /* test is for a binary type (set only
- for top-level tests) */
-#define TEXTTEST 0 /* for passing to file_softmagic */
-
- uint8_t dummy1;
-
- /* Word 2 */
- uint8_t reln; /* relation (0=eq, '>'=gt, etc) */
- uint8_t vallen; /* length of string value, if any */
- uint8_t type; /* comparison type (FILE_*) */
- uint8_t in_type; /* type of indirection */
-#define FILE_INVALID 0
-#define FILE_BYTE 1
-#define FILE_SHORT 2
-#define FILE_DEFAULT 3
-#define FILE_LONG 4
-#define FILE_STRING 5
-#define FILE_DATE 6
-#define FILE_BESHORT 7
-#define FILE_BELONG 8
-#define FILE_BEDATE 9
-#define FILE_LESHORT 10
-#define FILE_LELONG 11
-#define FILE_LEDATE 12
-#define FILE_PSTRING 13
-#define FILE_LDATE 14
-#define FILE_BELDATE 15
-#define FILE_LELDATE 16
-#define FILE_REGEX 17
-#define FILE_BESTRING16 18
-#define FILE_LESTRING16 19
-#define FILE_SEARCH 20
-#define FILE_MEDATE 21
-#define FILE_MELDATE 22
-#define FILE_MELONG 23
-#define FILE_QUAD 24
-#define FILE_LEQUAD 25
-#define FILE_BEQUAD 26
-#define FILE_QDATE 27
-#define FILE_LEQDATE 28
-#define FILE_BEQDATE 29
-#define FILE_QLDATE 30
-#define FILE_LEQLDATE 31
-#define FILE_BEQLDATE 32
-#define FILE_FLOAT 33
-#define FILE_BEFLOAT 34
-#define FILE_LEFLOAT 35
-#define FILE_DOUBLE 36
-#define FILE_BEDOUBLE 37
-#define FILE_LEDOUBLE 38
-#define FILE_NAMES_SIZE 39/* size of array to contain all names */
-
-#define IS_STRING(t) \
- ((t) == FILE_STRING || \
- (t) == FILE_PSTRING || \
- (t) == FILE_BESTRING16 || \
- (t) == FILE_LESTRING16 || \
- (t) == FILE_REGEX || \
- (t) == FILE_SEARCH || \
- (t) == FILE_DEFAULT)
-
-#define FILE_FMT_NONE 0
-#define FILE_FMT_NUM 1 /* "cduxXi" */
-#define FILE_FMT_STR 2 /* "s" */
-#define FILE_FMT_QUAD 3 /* "ll" */
-#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
-#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
-
- /* Word 3 */
- uint8_t in_op; /* operator for indirection */
- uint8_t mask_op; /* operator for mask */
-#ifdef ENABLE_CONDITIONALS
- uint8_t cond; /* conditional type */
- uint8_t dummy2;
-#else
- uint8_t dummy2;
- uint8_t dummy3;
-#endif
-
-#define FILE_OPS "&|^+-*/%"
-#define FILE_OPAND 0
-#define FILE_OPOR 1
-#define FILE_OPXOR 2
-#define FILE_OPADD 3
-#define FILE_OPMINUS 4
-#define FILE_OPMULTIPLY 5
-#define FILE_OPDIVIDE 6
-#define FILE_OPMODULO 7
-#define FILE_OPS_MASK 0x07 /* mask for above ops */
-#define FILE_UNUSED_1 0x08
-#define FILE_UNUSED_2 0x10
-#define FILE_UNUSED_3 0x20
-#define FILE_OPINVERSE 0x40
-#define FILE_OPINDIRECT 0x80
-
-#ifdef ENABLE_CONDITIONALS
-#define COND_NONE 0
-#define COND_IF 1
-#define COND_ELIF 2
-#define COND_ELSE 3
-#endif /* ENABLE_CONDITIONALS */
-
- /* Word 4 */
- uint32_t offset; /* offset to magic number */
- /* Word 5 */
- int32_t in_offset; /* offset from indirection */
- /* Word 6 */
- uint32_t lineno; /* line number in magic file */
- /* Word 7,8 */
- union {
- uint64_t _mask; /* for use with numeric and date types */
- struct {
- uint32_t _count; /* repeat/line count */
- uint32_t _flags; /* modifier flags */
- } _s; /* for use with string types */
- } _u;
-#define num_mask _u._mask
-#define str_range _u._s._count
-#define str_flags _u._s._flags
-
- /* Words 9-16 */
- union VALUETYPE {
- uint8_t b;
- uint16_t h;
- uint32_t l;
- uint64_t q;
- uint8_t hs[2]; /* 2 bytes of a fixed-endian "short" */
- uint8_t hl[4]; /* 4 bytes of a fixed-endian "long" */
- uint8_t hq[8]; /* 8 bytes of a fixed-endian "quad" */
- char s[MAXstring]; /* the search string or regex pattern */
- float f;
- double d;
- } value; /* either number or string */
- /* Words 17..31 */
- char desc[MAXDESC]; /* description */
- /* Words 32..47 */
- char mimetype[MAXDESC]; /* MIME type */
-};
-
-#define BIT(A) (1 << (A))
-#define STRING_COMPACT_BLANK BIT(0)
-#define STRING_COMPACT_OPTIONAL_BLANK BIT(1)
-#define STRING_IGNORE_LOWERCASE BIT(2)
-#define STRING_IGNORE_UPPERCASE BIT(3)
-#define REGEX_OFFSET_START BIT(4)
-#define CHAR_COMPACT_BLANK 'B'
-#define CHAR_COMPACT_OPTIONAL_BLANK 'b'
-#define CHAR_IGNORE_LOWERCASE 'c'
-#define CHAR_IGNORE_UPPERCASE 'C'
-#define CHAR_REGEX_OFFSET_START 's'
-#define STRING_IGNORE_CASE (STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
-#define STRING_DEFAULT_RANGE 100
-
-
-/* list of magic entries */
-struct mlist {
- struct magic *magic; /* array of magic entries */
- uint32_t nmagic; /* number of entries in array */
- int mapped; /* allocation type: 0 => apprentice_file
- * 1 => apprentice_map + malloc
- * 2 => apprentice_map + mmap */
- struct mlist *next, *prev;
-};
-
-struct magic_set {
- struct mlist *mlist;
- struct cont {
- size_t len;
- struct level_info {
- int32_t off;
- int got_match;
-#ifdef ENABLE_CONDITIONALS
- int last_match;
- int last_cond; /* used for error checking by parse() */
-#endif
- } *li;
- } c;
- struct out {
- char *buf; /* Accumulation buffer */
- char *pbuf; /* Printable buffer */
- } o;
- uint32_t offset;
- int error;
- int flags;
- int haderr;
- const char *file;
- size_t line; /* current magic line number */
-
- /* data for searches */
- struct {
- const char *s; /* start of search in original source */
- size_t s_len; /* length of search region */
- size_t offset; /* starting offset in source: XXX - should this be off_t? */
- size_t rm_len; /* match length */
- } search;
-
- /* FIXME: Make the string dynamically allocated so that e.g.
- strings matched in files can be longer than MAXstring */
- union VALUETYPE ms_value; /* either number or string */
-};
-
-/* Type for Unicode characters */
-typedef unsigned long unichar;
-
-struct stat;
-protected const char *file_fmttime(uint64_t, int);
-protected int file_buffer(struct magic_set *, int, const char *, const void *,
- size_t);
-protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
-protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
-protected int file_printf(struct magic_set *, const char *, ...);
-protected int file_reset(struct magic_set *);
-protected int file_tryelf(struct magic_set *, int, const unsigned char *,
- size_t);
-protected int file_zmagic(struct magic_set *, int, const char *,
- const unsigned char *, size_t);
-protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
-protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
-protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
-protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
-protected uint64_t file_signextend(struct magic_set *, struct magic *,
- uint64_t);
-protected void file_delmagic(struct magic *, int type, size_t entries);
-protected void file_badread(struct magic_set *);
-protected void file_badseek(struct magic_set *);
-protected void file_oomem(struct magic_set *, size_t);
-protected void file_oomem2(struct magic_set *, size_t, size_t);
-protected void file_error(struct magic_set *, int, const char *, ...);
-protected void file_magerror(struct magic_set *, const char *, ...);
-protected void file_magwarn(struct magic_set *, const char *, ...);
-protected void file_mdump(struct magic *);
-protected void file_showstr(FILE *, const char *, size_t);
-protected size_t file_mbswidth(const char *);
-protected const char *file_getbuffer(struct magic_set *);
-protected ssize_t sread(int, void *, size_t, int);
-protected int file_check_mem(struct magic_set *, unsigned int);
-protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
-
-#ifndef COMPILE_ONLY
-extern const char *file_names[];
-extern const size_t file_nnames;
-#endif
-
-#ifndef HAVE_STRERROR
-extern int sys_nerr;
-extern char *sys_errlist[];
-#define strerror(e) \
- (((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
-#endif
-
-#ifndef HAVE_STRTOUL
-#define strtoul(a, b, c) strtol(a, b, c)
-#endif
+#ifndef FILE_H
+#define FILE_H
-#ifndef HAVE_VASPRINTF
-int vasprintf(char **ptr, const char *format_string, va_list vargs);
-#endif
-#ifndef HAVE_ASPRINTF
-int asprintf(char **ptr, const char *format_string, ...);
-#endif
+/* Bytes to read if can't use the whole file. */
+#define FILE_READ_SIZE (256 * 1024)
-#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
-#define QUICK
-#endif
+/* User to drop to if run as root. */
+#define FILE_USER "nobody"
-#ifndef O_BINARY
-#define O_BINARY 0
-#endif
+/* text.c */
+const char *text_get_type(const void *, size_t);
+const char *text_try_words(const void *, size_t, int);
-#endif /* __file_h__ */
+#endif /* FILE_H */
diff --git a/usr.bin/file/file_opts.h b/usr.bin/file/file_opts.h
deleted file mode 100644
index 593bc2f7661..00000000000
--- a/usr.bin/file/file_opts.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* $OpenBSD: file_opts.h,v 1.2 2009/04/26 14:17:45 chl Exp $ */
-/*
- * Table of command-line options
- *
- * The first column specifies the short name, if any, or 0 if none.
- * The second column specifies the long name.
- * The third column specifies whether it takes a parameter.
- * The fourth column is the documentation.
- *
- * N.B. The long options' order must correspond to the code in file.c,
- * and OPTSTRING must be kept up-to-date with the short options.
- * Pay particular attention to the numbers of long-only options in the
- * switch statement!
- */
-
-OPT_LONGONLY("help", 0, " display this help and exit\n")
-OPT('v', "version", 0, " output version information and exit\n")
-OPT('m', "magic-file", 1, " LIST use LIST as a colon-separated list of magic\n"
- " number files\n")
-OPT('z', "uncompress", 0, " try to look inside compressed files\n")
-OPT('b', "brief", 0, " do not prepend filenames to output lines\n")
-OPT('c', "checking-printout", 0, " print the parsed form of the magic file, use in\n"
- " conjunction with -m to debug a new magic file\n"
- " before installing it\n")
-OPT('e', "exclude", 1, " TEST exclude TEST from the list of test to be\n"
- " performed for file. Valid tests are:\n"
- " ascii, apptype, compress, elf, soft, tar, tokens, troff\n")
-OPT('f', "files-from", 1, " FILE read the filenames to be examined from FILE\n")
-OPT('F', "separator", 1, " STRING use string as separator instead of `:'\n")
-OPT('i', "mime", 0, " output MIME type strings (--mime-type and\n"
- " --mime-encoding)\n")
-OPT_LONGONLY("mime-type", 0, " output the MIME type\n")
-OPT_LONGONLY("mime-encoding", 0, " output the MIME encoding\n")
-OPT('k', "keep-going", 0, " don't stop at the first match\n")
-#ifdef S_IFLNK
-OPT('L', "dereference", 0, " follow symlinks (default)\n")
-OPT('h', "no-dereference", 0, " don't follow symlinks\n")
-#endif
-OPT('n', "no-buffer", 0, " do not buffer output\n")
-OPT('N', "no-pad", 0, " do not pad output\n")
-OPT('0', "print0", 0, " terminate filenames with ASCII NUL\n")
-#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
-OPT('p', "preserve-date", 0, " preserve access times on files\n")
-#endif
-OPT('r', "raw", 0, " don't translate unprintable chars to \\ooo\n")
-OPT('s', "special-files", 0, " treat special (block/char devices) files as\n"
- " ordinary ones\n")
-OPT('C', "compile", 0, " compile file specified by -m\n")
-OPT('d', "debug", 0, " print debugging messages\n")
diff --git a/usr.bin/file/fsmagic.c b/usr.bin/file/fsmagic.c
deleted file mode 100644
index 80ccf6e46d0..00000000000
--- a/usr.bin/file/fsmagic.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/* $OpenBSD: fsmagic.c,v 1.14 2009/10/27 23:59:37 deraadt Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * fsmagic - magic based on filesystem info - directory, special files, etc.
- */
-
-#include "file.h"
-#include "magic.h"
-#include <string.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <stdlib.h>
-#include <sys/stat.h>
-/* Since major is a function on SVR4, we cannot use `ifndef major'. */
-#ifdef MAJOR_IN_MKDEV
-# include <sys/mkdev.h>
-# define HAVE_MAJOR
-#endif
-#ifdef MAJOR_IN_SYSMACROS
-# include <sys/sysmacros.h>
-# define HAVE_MAJOR
-#endif
-#ifdef major /* Might be defined in sys/types.h. */
-# define HAVE_MAJOR
-#endif
-
-#ifndef HAVE_MAJOR
-# define major(dev) (((dev) >> 8) & 0xff)
-# define minor(dev) ((dev) & 0xff)
-#endif
-#undef HAVE_MAJOR
-
-private int
-bad_link(struct magic_set *ms, int err, char *buf)
-{
- char *errfmt;
- if (err == ELOOP)
- errfmt = "symbolic link in a loop";
- else
- errfmt = "broken symbolic link to `%s'";
- if (ms->flags & MAGIC_ERROR) {
- file_error(ms, err, errfmt, buf);
- return -1;
- }
- if (file_printf(ms, errfmt, buf) == -1)
- return -1;
- return 1;
-}
-
-protected int
-file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb)
-{
- int ret = 0;
- int mime = ms->flags & MAGIC_MIME;
-#ifdef S_IFLNK
- char buf[BUFSIZ+4];
- int nch;
- struct stat tstatbuf;
-#endif
-
- if (fn == NULL)
- return 0;
-
- /*
- * Fstat is cheaper but fails for files you don't have read perms on.
- * On 4.2BSD and similar systems, use lstat() to identify symlinks.
- */
-#ifdef S_IFLNK
- if ((ms->flags & MAGIC_SYMLINK) == 0)
- ret = lstat(fn, sb);
- else
-#endif
- ret = stat(fn, sb); /* don't merge into if; see "ret =" above */
-
- if (ret) {
- if (ms->flags & MAGIC_ERROR) {
- file_error(ms, errno, "cannot stat `%s'", fn);
- return -1;
- }
- if (file_printf(ms, "cannot open `%s' (%s)",
- fn, strerror(errno)) == -1)
- return -1;
- return 1;
- }
-
- if (mime) {
- if ((sb->st_mode & S_IFMT) != S_IFREG) {
- if ((mime & MAGIC_MIME_TYPE) &&
- file_printf(ms, "application/x-not-regular-file")
- == -1)
- return -1;
- return 1;
- }
- }
- else {
-#ifdef S_ISUID
- if (sb->st_mode & S_ISUID)
- if (file_printf(ms, "setuid ") == -1)
- return -1;
-#endif
-#ifdef S_ISGID
- if (sb->st_mode & S_ISGID)
- if (file_printf(ms, "setgid ") == -1)
- return -1;
-#endif
-#ifdef S_ISVTX
- if (sb->st_mode & S_ISVTX)
- if (file_printf(ms, "sticky ") == -1)
- return -1;
-#endif
- }
-
- switch (sb->st_mode & S_IFMT) {
- case S_IFDIR:
- if (file_printf(ms, "directory") == -1)
- return -1;
- return 1;
-#ifdef S_IFCHR
- case S_IFCHR:
- /*
- * If -s has been specified, treat character special files
- * like ordinary files. Otherwise, just report that they
- * are block special files and go on to the next file.
- */
- if ((ms->flags & MAGIC_DEVICES) != 0)
- break;
-#ifdef HAVE_STAT_ST_RDEV
-# ifdef dv_unit
- if (file_printf(ms, "character special (%d/%d/%d)",
- major(sb->st_rdev), dv_unit(sb->st_rdev),
- dv_subunit(sb->st_rdev)) == -1)
- return -1;
-# else
- if (file_printf(ms, "character special (%ld/%ld)",
- (long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1)
- return -1;
-# endif
-#else
- if (file_printf(ms, "character special") == -1)
- return -1;
-#endif
- return 1;
-#endif
-#ifdef S_IFBLK
- case S_IFBLK:
- /*
- * If -s has been specified, treat block special files
- * like ordinary files. Otherwise, just report that they
- * are block special files and go on to the next file.
- */
- if ((ms->flags & MAGIC_DEVICES) != 0)
- break;
-#ifdef HAVE_STAT_ST_RDEV
-# ifdef dv_unit
- if (file_printf(ms, "block special (%d/%d/%d)",
- major(sb->st_rdev), dv_unit(sb->st_rdev),
- dv_subunit(sb->st_rdev)) == -1)
- return -1;
-# else
- if (file_printf(ms, "block special (%ld/%ld)",
- (long)major(sb->st_rdev), (long)minor(sb->st_rdev)) == -1)
- return -1;
-# endif
-#else
- if (file_printf(ms, "block special") == -1)
- return -1;
-#endif
- return 1;
-#endif
- /* TODO add code to handle V7 MUX and Blit MUX files */
-#ifdef S_IFIFO
- case S_IFIFO:
- if((ms->flags & MAGIC_DEVICES) != 0)
- break;
- if (file_printf(ms, "fifo (named pipe)") == -1)
- return -1;
- return 1;
-#endif
-#ifdef S_IFDOOR
- case S_IFDOOR:
- if (file_printf(ms, "door") == -1)
- return -1;
- return 1;
-#endif
-#ifdef S_IFLNK
- case S_IFLNK:
- if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) {
- if (ms->flags & MAGIC_ERROR) {
- file_error(ms, errno, "unreadable symlink `%s'",
- fn);
- return -1;
- }
- if (file_printf(ms,
- "unreadable symlink `%s' (%s)", fn,
- strerror(errno)) == -1)
- return -1;
- return 1;
- }
- buf[nch] = '\0'; /* readlink(2) does not do this */
-
- /* If broken symlink, say so and quit early. */
- if (*buf == '/') {
- if (stat(buf, &tstatbuf) < 0)
- return bad_link(ms, errno, buf);
- } else {
- char *tmp;
- char buf2[BUFSIZ+BUFSIZ+4];
-
- if ((tmp = strrchr(fn, '/')) == NULL) {
- tmp = buf; /* in current directory anyway */
- } else {
- if (tmp - fn + 1 > BUFSIZ) {
- if (ms->flags & MAGIC_ERROR) {
- file_error(ms, 0,
- "path too long: `%s'", buf);
- return -1;
- }
- if (file_printf(ms,
- "path too long: `%s'", fn) == -1)
- return -1;
- return 1;
- }
- (void)strlcpy(buf2, fn, sizeof buf2); /* take dir part */
- buf2[tmp - fn + 1] = '\0';
- (void)strlcat(buf2, buf, sizeof buf2); /* plus (rel) link */
- tmp = buf2;
- }
- if (stat(tmp, &tstatbuf) < 0)
- return bad_link(ms, errno, buf);
- }
-
- /* Otherwise, handle it. */
- if ((ms->flags & MAGIC_SYMLINK) != 0) {
- const char *p;
- ms->flags &= MAGIC_SYMLINK;
- p = magic_file(ms, buf);
- ms->flags |= MAGIC_SYMLINK;
- return p != NULL ? 1 : -1;
- } else { /* just print what it points to */
- if (file_printf(ms, "symbolic link to `%s'",
- buf) == -1)
- return -1;
- }
- return 1;
-#endif
-#ifdef S_IFSOCK
-#ifndef __COHERENT__
- case S_IFSOCK:
- if (file_printf(ms, "socket") == -1)
- return -1;
- return 1;
-#endif
-#endif
- case S_IFREG:
- break;
- default:
- file_error(ms, 0, "invalid mode 0%o", sb->st_mode);
- return -1;
- /*NOTREACHED*/
- }
-
- /*
- * regular file, check next possibility
- *
- * If stat() tells us the file has zero length, report here that
- * the file is empty, so we can skip all the work of opening and
- * reading the file.
- * But if the -s option has been given, we skip this optimization,
- * since on some systems, stat() reports zero size for raw disk
- * partitions. (If the block special device really has zero length,
- * the fact that it is empty will be detected and reported correctly
- * when we read the file.)
- */
- if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) {
- if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
- file_printf(ms, mime ? "application/x-empty" :
- "empty") == -1)
- return -1;
- return 1;
- }
- return 0;
-}
diff --git a/usr.bin/file/funcs.c b/usr.bin/file/funcs.c
deleted file mode 100644
index cdc593ee54f..00000000000
--- a/usr.bin/file/funcs.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/* $OpenBSD: funcs.c,v 1.8 2014/05/18 17:50:11 espie Exp $ */
-/*
- * Copyright (c) Christos Zoulas 2003.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include "file.h"
-#include "magic.h"
-#include <stdarg.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#if defined(HAVE_WCHAR_H)
-#include <wchar.h>
-#endif
-#if defined(HAVE_WCTYPE_H)
-#include <wctype.h>
-#endif
-
-/*
- * Like printf, only we append to a buffer.
- */
-protected int
-file_printf(struct magic_set *ms, const char *fmt, ...)
-{
- va_list ap;
- int len;
- char *buf, *newstr;
-
- va_start(ap, fmt);
- len = vasprintf(&buf, fmt, ap);
- if (len < 0)
- goto out;
- va_end(ap);
-
- if (ms->o.buf != NULL) {
- len = asprintf(&newstr, "%s%s", ms->o.buf, buf);
- free(buf);
- if (len < 0)
- goto out;
- free(ms->o.buf);
- buf = newstr;
- }
- ms->o.buf = buf;
- return 0;
-out:
- file_error(ms, errno, "vasprintf failed");
- return -1;
-}
-
-/*
- * error - print best error message possible
- */
-/*VARARGS*/
-private void
-file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
- uint32_t lineno)
-{
- /* Only the first error is ok */
- if (ms->haderr)
- return;
- if (lineno != 0) {
- free(ms->o.buf);
- ms->o.buf = NULL;
- file_printf(ms, "line %u: ", lineno);
- }
- file_printf(ms, f, va);
- if (error > 0)
- file_printf(ms, " (%s)", strerror(error));
- ms->haderr++;
- ms->error = error;
-}
-
-/*VARARGS*/
-protected void
-file_error(struct magic_set *ms, int error, const char *f, ...)
-{
- va_list va;
- va_start(va, f);
- file_error_core(ms, error, f, va, 0);
- va_end(va);
-}
-
-/*
- * Print an error with magic line number.
- */
-/*VARARGS*/
-protected void
-file_magerror(struct magic_set *ms, const char *f, ...)
-{
- va_list va;
- va_start(va, f);
- file_error_core(ms, 0, f, va, ms->line);
- va_end(va);
-}
-
-protected void
-file_oomem(struct magic_set *ms, size_t len)
-{
- file_error(ms, errno, "cannot allocate %zu bytes", len);
-}
-
-protected void
-file_oomem2(struct magic_set *ms, size_t len, size_t l2)
-{
- file_error(ms, errno, "cannot allocate %zu * %zu bytes", len, l2);
-}
-protected void
-file_badseek(struct magic_set *ms)
-{
- file_error(ms, errno, "error seeking");
-}
-
-protected void
-file_badread(struct magic_set *ms)
-{
- file_error(ms, errno, "error reading");
-}
-
-#ifndef COMPILE_ONLY
-protected int
-file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf,
- size_t nb)
-{
- int m;
- int mime = ms->flags & MAGIC_MIME;
-
- if (nb == 0) {
- if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
- file_printf(ms, mime ? "application/x-empty" :
- "empty") == -1)
- return -1;
- return 1;
- } else if (nb == 1) {
- if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
- file_printf(ms, mime ? "application/octet-stream" :
- "very short file (no magic)") == -1)
- return -1;
- return 1;
- }
-
-#ifdef __EMX__
- if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
- switch (file_os2_apptype(ms, inname, buf, nb)) {
- case -1:
- return -1;
- case 0:
- break;
- default:
- return 1;
- }
- }
-#endif
-
- /* try compression stuff */
- if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 ||
- (m = file_zmagic(ms, fd, inname, buf, nb)) == 0) {
- /* Check if we have a tar file */
- if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 ||
- (m = file_is_tar(ms, buf, nb)) == 0) {
- /* try tests in /etc/magic (or surrogate magic file) */
- if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 ||
- (m = file_softmagic(ms, buf, nb, BINTEST)) == 0) {
- /* try known keywords, check whether it is ASCII */
- if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 ||
- (m = file_ascmagic(ms, buf, nb)) == 0) {
- /* abandon hope, all ye who remain here */
- if ((!mime || (mime & MAGIC_MIME_TYPE)) &&
- file_printf(ms, mime ? "application/octet-stream" :
- "data") == -1)
- return -1;
- m = 1;
- }
- }
- }
- }
-#ifdef BUILTIN_ELF
- if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 &&
- nb > 5 && fd != -1) {
- /*
- * We matched something in the file, so this *might*
- * be an ELF file, and the file is at least 5 bytes
- * long, so if it's an ELF file it has at least one
- * byte past the ELF magic number - try extracting
- * information from the ELF headers that cannot easily
- * be extracted with rules in the magic file.
- */
- (void)file_tryelf(ms, fd, buf, nb);
- }
-#endif
- return m;
-}
-#endif
-
-protected int
-file_reset(struct magic_set *ms)
-{
- if (ms->mlist == NULL) {
- file_error(ms, 0, "no magic files loaded");
- return -1;
- }
- ms->o.buf = NULL;
- ms->haderr = 0;
- ms->error = -1;
- return 0;
-}
-
-#define OCTALIFY(n, o) \
- /*LINTED*/ \
- (void)(*(n)++ = '\\', \
- *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \
- *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \
- *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \
- (o)++)
-
-protected const char *
-file_getbuffer(struct magic_set *ms)
-{
- char *pbuf, *op, *np;
- size_t psize, len;
-
- if (ms->haderr)
- return NULL;
-
- if (ms->flags & MAGIC_RAW)
- return ms->o.buf;
-
- /* * 4 is for octal representation, + 1 is for NUL */
- len = strlen(ms->o.buf);
- if (len > (SIZE_MAX - 1) / 4) {
- file_oomem(ms, len);
- return NULL;
- }
- psize = len * 4 + 1;
- if ((pbuf = realloc(ms->o.pbuf, psize)) == NULL) {
- file_oomem(ms, psize);
- return NULL;
- }
- ms->o.pbuf = pbuf;
-
-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
- {
- mbstate_t state;
- wchar_t nextchar;
- int mb_conv = 1;
- size_t bytesconsumed;
- char *eop;
- (void)memset(&state, 0, sizeof(mbstate_t));
-
- np = ms->o.pbuf;
- op = ms->o.buf;
- eop = op + len;
-
- while (op < eop) {
- bytesconsumed = mbrtowc(&nextchar, op,
- (size_t)(eop - op), &state);
- if (bytesconsumed == (size_t)(-1) ||
- bytesconsumed == (size_t)(-2)) {
- mb_conv = 0;
- break;
- }
-
- if (iswprint(nextchar)) {
- (void)memcpy(np, op, bytesconsumed);
- op += bytesconsumed;
- np += bytesconsumed;
- } else {
- while (bytesconsumed-- > 0)
- OCTALIFY(np, op);
- }
- }
- *np = '\0';
-
- /* Parsing succeeded as a multi-byte sequence */
- if (mb_conv != 0)
- return ms->o.pbuf;
- }
-#endif
-
- for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
- if (isprint((unsigned char)*op)) {
- *np++ = *op;
- } else {
- OCTALIFY(np, op);
- }
- }
- *np = '\0';
- return ms->o.pbuf;
-}
-
-protected int
-file_check_mem(struct magic_set *ms, unsigned int level)
-{
- size_t len;
-
- if (level >= ms->c.len) {
- len = (ms->c.len += 20) * sizeof(*ms->c.li);
- ms->c.li = (ms->c.li == NULL) ? malloc(len) :
- realloc(ms->c.li, len);
- if (ms->c.li == NULL) {
- file_oomem(ms, len);
- return -1;
- }
- }
- ms->c.li[level].got_match = 0;
-#ifdef ENABLE_CONDITIONALS
- ms->c.li[level].last_match = 0;
- ms->c.li[level].last_cond = COND_NONE;
-#endif /* ENABLE_CONDITIONALS */
- return 0;
-}
diff --git a/usr.bin/file/is_tar.c b/usr.bin/file/is_tar.c
deleted file mode 100644
index 6d84bfccd5a..00000000000
--- a/usr.bin/file/is_tar.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/* $OpenBSD: is_tar.c,v 1.10 2009/10/27 23:59:37 deraadt Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * is_tar() -- figure out whether file is a tar archive.
- *
- * Stolen (by the author!) from the public domain tar program:
- * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
- *
- * @(#)list.c 1.18 9/23/86 Public Domain - gnu
- *
- * Comments changed and some code/comments reformatted
- * for file command by Ian Darwin.
- */
-
-#include "file.h"
-#include "magic.h"
-#include <string.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include "tar.h"
-
-#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
-
-private int is_tar(const unsigned char *, size_t);
-private int from_oct(int, const char *); /* Decode octal number */
-
-static const char tartype[][32] = {
- "tar archive",
- "POSIX tar archive",
- "POSIX tar archive (GNU)",
-};
-
-protected int
-file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
-{
- /*
- * Do the tar test first, because if the first file in the tar
- * archive starts with a dot, we can confuse it with an nroff file.
- */
- int tar = is_tar(buf, nbytes);
- int mime = ms->flags & MAGIC_MIME;
-
- if (tar < 1 || tar > 3)
- return 0;
-
- if (mime == MAGIC_MIME_ENCODING)
- return 0;
-
- if (file_printf(ms, mime ? "application/x-tar" :
- tartype[tar - 1]) == -1)
- return -1;
- return 1;
-}
-
-/*
- * Return
- * 0 if the checksum is bad (i.e., probably not a tar archive),
- * 1 for old UNIX tar file,
- * 2 for Unix Std (POSIX) tar file,
- * 3 for GNU tar file.
- */
-private int
-is_tar(const unsigned char *buf, size_t nbytes)
-{
- const union record *header = (const union record *)(const void *)buf;
- int i;
- int sum, recsum;
- const char *p;
-
- if (nbytes < sizeof(union record))
- return 0;
-
- recsum = from_oct(8, header->header.chksum);
-
- sum = 0;
- p = header->charptr;
- for (i = sizeof(union record); --i >= 0;) {
- /*
- * We cannot use unsigned char here because of old compilers,
- * e.g. V7.
- */
- sum += 0xFF & *p++;
- }
-
- /* Adjust checksum to count the "chksum" field as blanks. */
- for (i = sizeof(header->header.chksum); --i >= 0;)
- sum -= 0xFF & header->header.chksum[i];
- sum += ' '* sizeof header->header.chksum;
-
- if (sum != recsum)
- return 0; /* Not a tar archive */
-
- if (strcmp(header->header.magic, GNUTMAGIC) == 0)
- return 3; /* GNU Unix Standard tar archive */
- if (strcmp(header->header.magic, TMAGIC) == 0)
- return 2; /* Unix Standard tar archive */
-
- return 1; /* Old fashioned tar archive */
-}
-
-
-/*
- * Quick and dirty octal conversion.
- *
- * Result is -1 if the field is invalid (all blank, or nonoctal).
- */
-private int
-from_oct(int digs, const char *where)
-{
- int value;
-
- while (isspace((unsigned char)*where)) { /* Skip spaces */
- where++;
- if (--digs <= 0)
- return -1; /* All blank field */
- }
- value = 0;
- while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */
- value = (value << 3) | (*where++ - '0');
- --digs;
- }
-
- if (digs > 0 && *where && !isspace((unsigned char)*where))
- return -1; /* Ended on non-space/nul */
-
- return value;
-}
diff --git a/usr.bin/file/magic-common.c b/usr.bin/file/magic-common.c
new file mode 100644
index 00000000000..e84d113b962
--- /dev/null
+++ b/usr.bin/file/magic-common.c
@@ -0,0 +1,83 @@
+/* $OpenBSD: magic-common.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+
+/*
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "magic.h"
+
+char *
+magic_strtoull(const char *s, uint64_t *u)
+{
+ char *endptr;
+
+ if (*s == '-')
+ return (NULL);
+ errno = 0;
+ *u = strtoull(s, &endptr, 0);
+ if (*s == '\0')
+ return (NULL);
+ if (errno == ERANGE && *u == ULLONG_MAX)
+ return (NULL);
+ if (*endptr == 'L')
+ endptr++;
+ return (endptr);
+}
+
+char *
+magic_strtoll(const char *s, int64_t *i)
+{
+ char *endptr;
+
+ errno = 0;
+ *i = strtoll(s, &endptr, 0);
+ if (*s == '\0')
+ return (NULL);
+ if (errno == ERANGE && *i == LLONG_MAX)
+ return (NULL);
+ if (*endptr == 'L')
+ endptr++;
+ return (endptr);
+}
+
+void
+magic_warn(struct magic_line *ml, const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
+
+ if (!ml->root->warnings)
+ return;
+
+ va_start(ap, fmt);
+ if (vasprintf(&msg, fmt, ap) == -1) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+
+ fprintf(stderr, "%s:%u: %s\n", ml->root->path, ml->line, msg);
+ free(msg);
+}
diff --git a/usr.bin/file/magic-dump.c b/usr.bin/file/magic-dump.c
new file mode 100644
index 00000000000..286680b5312
--- /dev/null
+++ b/usr.bin/file/magic-dump.c
@@ -0,0 +1,53 @@
+/* $OpenBSD: magic-dump.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+
+/*
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <stdio.h>
+
+#include "magic.h"
+
+static void
+magic_dump_line(struct magic_line *ml, u_int depth)
+{
+ struct magic_line *child;
+ u_int i;
+
+ printf("%u", ml->line);
+ for (i = 0; i < depth; i++)
+ printf(">");
+ printf(" %s/%s%s%s%s [%u]%s\n", ml->type_string,
+ ml->result == NULL ? "" : ml->result,
+ ml->mimetype == NULL ? "" : " (",
+ ml->mimetype == NULL ? "" : ml->mimetype,
+ ml->mimetype == NULL ? "" : ")",
+ ml->strength, ml->text ? " (text)" : "");
+
+ TAILQ_FOREACH(child, &ml->children, entry)
+ magic_dump_line(child, depth + 1);
+
+}
+
+void
+magic_dump(struct magic *m)
+{
+ struct magic_line *ml;
+
+ RB_FOREACH(ml, magic_tree, &m->tree)
+ magic_dump_line(ml, 0);
+}
diff --git a/usr.bin/file/magic-load.c b/usr.bin/file/magic-load.c
new file mode 100644
index 00000000000..29455141d57
--- /dev/null
+++ b/usr.bin/file/magic-load.c
@@ -0,0 +1,1020 @@
+/* $OpenBSD: magic-load.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+
+/*
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <regex.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "magic.h"
+#include "xmalloc.h"
+
+static int
+magic_odigit(u_char c)
+{
+ if (c >= '0' && c <= '7')
+ return (c - '0');
+ return (-1);
+}
+
+static int
+magic_xdigit(u_char c)
+{
+ if (c >= '0' && c <= '9')
+ return (c - '0');
+ if (c >= 'a' && c <= 'f')
+ return (10 + c - 'a');
+ if (c >= 'A' && c <= 'F')
+ return (10 + c - 'A');
+ return (-1);
+}
+
+static void
+magic_mark_text(struct magic_line *ml, int text)
+{
+ do {
+ ml->text = text;
+ ml = ml->parent;
+ } while (ml != NULL);
+}
+
+static int
+magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re,
+ const char *p)
+{
+ int error;
+ char errbuf[256];
+
+ error = regcomp(re, p, REG_EXTENDED|REG_NOSUB);
+ if (error != 0) {
+ regerror(error, re, errbuf, sizeof errbuf);
+ magic_warn(ml, "bad %s pattern: %s", name, errbuf);
+ return (-1);
+ }
+ return (0);
+}
+
+static int
+magic_set_result(struct magic_line *ml, const char *s)
+{
+ const char *fmt;
+ const char *endfmt;
+ const char *cp;
+ regex_t *re = NULL;
+ regmatch_t pmatch;
+ size_t fmtlen;
+
+ while (isspace((u_char)*s))
+ s++;
+ if (*s == '\0') {
+ ml->result = NULL;
+ return (0);
+ }
+ ml->result = xstrdup(s);
+
+ fmt = NULL;
+ for (cp = s; *cp != '\0'; cp++) {
+ if (cp[0] == '%' && cp[1] != '%') {
+ if (fmt != NULL) {
+ magic_warn(ml, "multiple formats");
+ return (-1);
+ }
+ fmt = cp;
+ }
+ }
+ if (fmt == NULL)
+ return (0);
+ fmt++;
+
+ for (endfmt = fmt; *endfmt != '\0'; endfmt++) {
+ if (strchr("diouxXeEfFgGsc", *endfmt) != NULL)
+ break;
+ }
+ if (*endfmt == '\0') {
+ magic_warn(ml, "unterminated format");
+ return (-1);
+ }
+ fmtlen = endfmt + 1 - fmt;
+ if (fmtlen > 32) {
+ magic_warn(ml, "format too long");
+ return (-1);
+ }
+
+ if (*endfmt == 's') {
+ switch (ml->type) {
+ case MAGIC_TYPE_DATE:
+ case MAGIC_TYPE_LDATE:
+ case MAGIC_TYPE_UDATE:
+ case MAGIC_TYPE_ULDATE:
+ case MAGIC_TYPE_BEDATE:
+ case MAGIC_TYPE_BELDATE:
+ case MAGIC_TYPE_UBEDATE:
+ case MAGIC_TYPE_UBELDATE:
+ case MAGIC_TYPE_QDATE:
+ case MAGIC_TYPE_QLDATE:
+ case MAGIC_TYPE_UQDATE:
+ case MAGIC_TYPE_UQLDATE:
+ case MAGIC_TYPE_BEQDATE:
+ case MAGIC_TYPE_BEQLDATE:
+ case MAGIC_TYPE_UBEQDATE:
+ case MAGIC_TYPE_UBEQLDATE:
+ case MAGIC_TYPE_LEQDATE:
+ case MAGIC_TYPE_LEQLDATE:
+ case MAGIC_TYPE_ULEQDATE:
+ case MAGIC_TYPE_ULEQLDATE:
+ case MAGIC_TYPE_LEDATE:
+ case MAGIC_TYPE_LELDATE:
+ case MAGIC_TYPE_ULEDATE:
+ case MAGIC_TYPE_ULELDATE:
+ case MAGIC_TYPE_MEDATE:
+ case MAGIC_TYPE_MELDATE:
+ case MAGIC_TYPE_STRING:
+ case MAGIC_TYPE_PSTRING:
+ case MAGIC_TYPE_BESTRING16:
+ case MAGIC_TYPE_LESTRING16:
+ case MAGIC_TYPE_REGEX:
+ case MAGIC_TYPE_SEARCH:
+ break;
+ default:
+ ml->stringify = 1;
+ break;
+ }
+ }
+
+ if (!ml->root->compiled) {
+ /*
+ * XXX %ld (and %lu and so on) is invalid on 64-bit platforms
+ * with byte, short, long. We get lucky because our first and
+ * only argument ends up in a register. Accept it for now.
+ */
+ if (magic_make_pattern(ml, "short", &ml->root->format_short,
+ "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
+ return (-1);
+ if (magic_make_pattern(ml, "long", &ml->root->format_long,
+ "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0)
+ return (-1);
+ if (magic_make_pattern(ml, "quad", &ml->root->format_quad,
+ "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0)
+ return (-1);
+ if (magic_make_pattern(ml, "float", &ml->root->format_float,
+ "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0)
+ return (-1);
+ if (magic_make_pattern(ml, "string", &ml->root->format_string,
+ "^-?[0-9]*(\\.[0-9]*)?s$") != 0)
+ return (-1);
+ ml->root->compiled = 1;
+ }
+
+ if (ml->stringify)
+ re = &ml->root->format_string;
+ else {
+ switch (ml->type) {
+ case MAGIC_TYPE_NONE:
+ case MAGIC_TYPE_DEFAULT:
+ return (0); /* don't use result */
+ case MAGIC_TYPE_BYTE:
+ case MAGIC_TYPE_UBYTE:
+ case MAGIC_TYPE_SHORT:
+ case MAGIC_TYPE_USHORT:
+ case MAGIC_TYPE_BESHORT:
+ case MAGIC_TYPE_UBESHORT:
+ case MAGIC_TYPE_LESHORT:
+ case MAGIC_TYPE_ULESHORT:
+ re = &ml->root->format_short;
+ break;
+ case MAGIC_TYPE_LONG:
+ case MAGIC_TYPE_ULONG:
+ case MAGIC_TYPE_BELONG:
+ case MAGIC_TYPE_UBELONG:
+ case MAGIC_TYPE_LELONG:
+ case MAGIC_TYPE_ULELONG:
+ case MAGIC_TYPE_MELONG:
+ re = &ml->root->format_long;
+ break;
+ case MAGIC_TYPE_QUAD:
+ case MAGIC_TYPE_UQUAD:
+ case MAGIC_TYPE_BEQUAD:
+ case MAGIC_TYPE_UBEQUAD:
+ case MAGIC_TYPE_LEQUAD:
+ case MAGIC_TYPE_ULEQUAD:
+ re = &ml->root->format_quad;
+ break;
+ case MAGIC_TYPE_FLOAT:
+ case MAGIC_TYPE_BEFLOAT:
+ case MAGIC_TYPE_LEFLOAT:
+ case MAGIC_TYPE_DOUBLE:
+ case MAGIC_TYPE_BEDOUBLE:
+ case MAGIC_TYPE_LEDOUBLE:
+ re = &ml->root->format_float;
+ break;
+ case MAGIC_TYPE_DATE:
+ case MAGIC_TYPE_LDATE:
+ case MAGIC_TYPE_UDATE:
+ case MAGIC_TYPE_ULDATE:
+ case MAGIC_TYPE_BEDATE:
+ case MAGIC_TYPE_BELDATE:
+ case MAGIC_TYPE_UBEDATE:
+ case MAGIC_TYPE_UBELDATE:
+ case MAGIC_TYPE_QDATE:
+ case MAGIC_TYPE_QLDATE:
+ case MAGIC_TYPE_UQDATE:
+ case MAGIC_TYPE_UQLDATE:
+ case MAGIC_TYPE_BEQDATE:
+ case MAGIC_TYPE_BEQLDATE:
+ case MAGIC_TYPE_UBEQDATE:
+ case MAGIC_TYPE_UBEQLDATE:
+ case MAGIC_TYPE_LEQDATE:
+ case MAGIC_TYPE_LEQLDATE:
+ case MAGIC_TYPE_ULEQDATE:
+ case MAGIC_TYPE_ULEQLDATE:
+ case MAGIC_TYPE_LEDATE:
+ case MAGIC_TYPE_LELDATE:
+ case MAGIC_TYPE_ULEDATE:
+ case MAGIC_TYPE_ULELDATE:
+ case MAGIC_TYPE_MEDATE:
+ case MAGIC_TYPE_MELDATE:
+ case MAGIC_TYPE_STRING:
+ case MAGIC_TYPE_PSTRING:
+ case MAGIC_TYPE_REGEX:
+ case MAGIC_TYPE_SEARCH:
+ re = &ml->root->format_string;
+ break;
+ case MAGIC_TYPE_BESTRING16:
+ case MAGIC_TYPE_LESTRING16:
+ magic_warn(ml, "unsupported type %s", ml->type_string);
+ return (-1);
+ }
+ }
+
+ pmatch.rm_so = 0;
+ pmatch.rm_eo = fmtlen;
+ if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) {
+ magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string,
+ (int)fmtlen, fmt);
+ return (-1);
+ }
+
+ return (0);
+}
+
+static u_int
+magic_get_strength(struct magic_line *ml)
+{
+ int n;
+ size_t size;
+
+ if (ml->test_not || ml->test_operator == 'x')
+ return (1);
+
+ n = 20;
+ switch (ml->type) {
+ case MAGIC_TYPE_NONE:
+ case MAGIC_TYPE_DEFAULT:
+ return (0);
+ case MAGIC_TYPE_BYTE:
+ case MAGIC_TYPE_UBYTE:
+ n += 1 * MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case MAGIC_TYPE_SHORT:
+ case MAGIC_TYPE_USHORT:
+ case MAGIC_TYPE_BESHORT:
+ case MAGIC_TYPE_UBESHORT:
+ case MAGIC_TYPE_LESHORT:
+ case MAGIC_TYPE_ULESHORT:
+ n += 2 * MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case MAGIC_TYPE_LONG:
+ case MAGIC_TYPE_ULONG:
+ case MAGIC_TYPE_FLOAT:
+ case MAGIC_TYPE_DATE:
+ case MAGIC_TYPE_LDATE:
+ case MAGIC_TYPE_UDATE:
+ case MAGIC_TYPE_ULDATE:
+ case MAGIC_TYPE_BELONG:
+ case MAGIC_TYPE_UBELONG:
+ case MAGIC_TYPE_BEFLOAT:
+ case MAGIC_TYPE_BEDATE:
+ case MAGIC_TYPE_BELDATE:
+ case MAGIC_TYPE_UBEDATE:
+ case MAGIC_TYPE_UBELDATE:
+ n += 4 * MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case MAGIC_TYPE_QUAD:
+ case MAGIC_TYPE_UQUAD:
+ case MAGIC_TYPE_DOUBLE:
+ case MAGIC_TYPE_QDATE:
+ case MAGIC_TYPE_QLDATE:
+ case MAGIC_TYPE_UQDATE:
+ case MAGIC_TYPE_UQLDATE:
+ case MAGIC_TYPE_BEQUAD:
+ case MAGIC_TYPE_UBEQUAD:
+ case MAGIC_TYPE_BEDOUBLE:
+ case MAGIC_TYPE_BEQDATE:
+ case MAGIC_TYPE_BEQLDATE:
+ case MAGIC_TYPE_UBEQDATE:
+ case MAGIC_TYPE_UBEQLDATE:
+ case MAGIC_TYPE_LEQUAD:
+ case MAGIC_TYPE_ULEQUAD:
+ case MAGIC_TYPE_LEDOUBLE:
+ case MAGIC_TYPE_LEQDATE:
+ case MAGIC_TYPE_LEQLDATE:
+ case MAGIC_TYPE_ULEQDATE:
+ case MAGIC_TYPE_ULEQLDATE:
+ case MAGIC_TYPE_LELONG:
+ case MAGIC_TYPE_ULELONG:
+ case MAGIC_TYPE_LEFLOAT:
+ case MAGIC_TYPE_LEDATE:
+ case MAGIC_TYPE_LELDATE:
+ case MAGIC_TYPE_ULEDATE:
+ case MAGIC_TYPE_ULELDATE:
+ case MAGIC_TYPE_MELONG:
+ case MAGIC_TYPE_MEDATE:
+ case MAGIC_TYPE_MELDATE:
+ n += 8 * MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case MAGIC_TYPE_STRING:
+ case MAGIC_TYPE_PSTRING:
+ n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case MAGIC_TYPE_BESTRING16:
+ case MAGIC_TYPE_LESTRING16:
+ n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2;
+ break;
+ case MAGIC_TYPE_REGEX:
+ case MAGIC_TYPE_SEARCH:
+ size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size;
+ if (size < 1)
+ size = 1;
+ n += ml->test_string_size * size;
+ break;
+ }
+ switch (ml->test_operator) {
+ case '=':
+ n += MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case '<':
+ case '>':
+ case '[':
+ case ']':
+ n -= 2 * MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ case '^':
+ case '&':
+ n -= MAGIC_STRENGTH_MULTIPLIER;
+ break;
+ }
+ return (n <= 0 ? 1 : n);
+}
+
+static int
+magic_get_string(char **line, char *out, size_t *outlen)
+{
+ char *start, *cp, c;
+ int d0, d1, d2;
+
+ start = out;
+ for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) {
+ if (*cp != '\\') {
+ *out++ = *cp;
+ continue;
+ }
+
+ switch (c = *++cp) {
+ case ' ':
+ *out++ = ' ';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ d0 = magic_odigit(cp[0]);
+ if (cp[0] != '\0')
+ d1 = magic_odigit(cp[1]);
+ else
+ d1 = -1;
+ if (cp[0] != '\0' && cp[1] != '\0')
+ d2 = magic_odigit(cp[2]);
+ else
+ d2 = -1;
+
+ if (d0 != -1 && d1 != -1 && d2 != -1) {
+ *out = d2 | (d1 << 3) | (d0 << 6);
+ cp += 2;
+ } else if (d0 != -1 && d1 != -1) {
+ *out = d1 | (d0 << 3);
+ cp++;
+ } else if (d0 != -1)
+ *out = d0;
+ else
+ return (-1);
+ out++;
+ break;
+ case 'x':
+ d0 = magic_xdigit(cp[1]);
+ if (cp[1] != '\0')
+ d1 = magic_xdigit(cp[2]);
+ else
+ d1 = -1;
+
+ if (d0 != -1 && d1 != -1) {
+ *out = d1 | (d0 << 4);
+ cp += 2;
+ } else if (d0 != -1) {
+ *out = d0;
+ cp++;
+ } else
+ return (-1);
+ out++;
+
+ break;
+ case 'a':
+ *out++ = '\a';
+ break;
+ case 'b':
+ *out++ = '\b';
+ break;
+ case 't':
+ *out++ = '\t';
+ break;
+ case 'f':
+ *out++ = '\f';
+ break;
+ case 'n':
+ *out++ = '\n';
+ break;
+ case 'r':
+ *out++ = '\r';
+ break;
+ case '\\':
+ *out++ = '\\';
+ break;
+ case '\'':
+ *out++ = '\'';
+ break;
+ case '\"':
+ *out++ = '\"';
+ break;
+ default:
+ *out++ = c;
+ break;
+ }
+ }
+ *out = '\0';
+ *outlen = out - start;
+
+ *line = cp;
+ return (0);
+}
+
+static int
+magic_parse_offset(struct magic_line *ml, char **line)
+{
+ char *copy, *s, *cp, *endptr;
+
+ while (isspace((u_char)**line))
+ (*line)++;
+ copy = s = cp = xmalloc(strlen(*line) + 1);
+ while (**line != '\0' && !isspace((u_char)**line))
+ *cp++ = *(*line)++;
+ *cp = '\0';
+
+ ml->offset = 0;
+ ml->offset_relative = 0;
+
+ ml->indirect_type = ' ';
+ ml->indirect_relative = 0;
+ ml->indirect_offset = 0;
+ ml->indirect_operator = ' ';
+ ml->indirect_operand = 0;
+
+ if (*s == '&') {
+ ml->offset_relative = 1;
+ s++;
+ }
+
+ if (*s != '(') {
+ endptr = magic_strtoll(s, &ml->offset);
+ if (endptr == NULL || *endptr != '\0') {
+ magic_warn(ml, "missing closing bracket");
+ goto fail;
+ }
+ if (ml->offset < 0 && !ml->offset_relative) {
+ magic_warn(ml, "negative absolute offset");
+ goto fail;
+ }
+ goto done;
+ }
+ s++;
+
+ if (*s == '&') {
+ ml->indirect_relative = 1;
+ s++;
+ }
+
+ endptr = magic_strtoll(s, &ml->indirect_offset);
+ if (endptr == NULL) {
+ magic_warn(ml, "can't parse offset");
+ goto fail;
+ }
+ s = endptr;
+ if (*s == ')')
+ goto done;
+
+ if (*s == '.') {
+ s++;
+ if (strchr("bslBSL", *s) == NULL) {
+ magic_warn(ml, "unknown offset type");
+ goto fail;
+ }
+ ml->indirect_type = *s;
+ s++;
+ if (*s == ')')
+ goto done;
+ }
+
+ if (strchr("+-*", *s) == NULL) {
+ magic_warn(ml, "unknown offset operator");
+ goto fail;
+ }
+ ml->indirect_operator = *s;
+ s++;
+ if (*s == ')')
+ goto done;
+
+ if (*s == '(') {
+ s++;
+ endptr = magic_strtoll(s, &ml->indirect_operand);
+ if (endptr == NULL || *endptr != ')') {
+ magic_warn(ml, "missing closing bracket");
+ goto fail;
+ }
+ if (*++endptr != ')') {
+ magic_warn(ml, "missing closing bracket");
+ goto fail;
+ }
+ } else {
+ endptr = magic_strtoll(s, &ml->indirect_operand);
+ if (endptr == NULL || *endptr != ')') {
+ magic_warn(ml, "missing closing bracket");
+ goto fail;
+ }
+ }
+
+done:
+ free(copy);
+ return (0);
+
+fail:
+ free(copy);
+ return (-1);
+}
+
+static int
+magic_parse_type(struct magic_line *ml, char **line)
+{
+ char *copy, *s, *cp, *endptr;
+
+ while (isspace((u_char)**line))
+ (*line)++;
+ copy = s = cp = xmalloc(strlen(*line) + 1);
+ while (**line != '\0' && !isspace((u_char)**line))
+ *cp++ = *(*line)++;
+ *cp = '\0';
+
+ ml->type = MAGIC_TYPE_NONE;
+ ml->type_string = xstrdup(s);
+
+ ml->type_operator = ' ';
+ ml->type_operand = 0;
+
+ if (strncmp(s, "string", (sizeof "string") - 1) == 0) {
+ ml->type = MAGIC_TYPE_STRING;
+ magic_mark_text(ml, 0);
+ goto done;
+ }
+ if (strncmp(s, "search", (sizeof "search") - 1) == 0) {
+ ml->type = MAGIC_TYPE_SEARCH;
+ goto done;
+ }
+ if (strncmp(s, "regex", (sizeof "regex") - 1) == 0) {
+ ml->type = MAGIC_TYPE_REGEX;
+ goto done;
+ }
+
+ cp = &s[strcspn(s, "-&")];
+ if (*cp != '\0') {
+ ml->type_operator = *cp;
+ endptr = magic_strtoull(cp + 1, &ml->type_operand);
+ if (endptr == NULL || *endptr != '\0') {
+ magic_warn(ml, "can't parse operand");
+ goto fail;
+ }
+ *cp = '\0';
+ }
+
+ if (strcmp(s, "byte") == 0)
+ ml->type = MAGIC_TYPE_BYTE;
+ else if (strcmp(s, "short") == 0)
+ ml->type = MAGIC_TYPE_SHORT;
+ else if (strcmp(s, "long") == 0)
+ ml->type = MAGIC_TYPE_LONG;
+ else if (strcmp(s, "quad") == 0)
+ ml->type = MAGIC_TYPE_QUAD;
+ else if (strcmp(s, "ubyte") == 0)
+ ml->type = MAGIC_TYPE_UBYTE;
+ else if (strcmp(s, "ushort") == 0)
+ ml->type = MAGIC_TYPE_USHORT;
+ else if (strcmp(s, "ulong") == 0)
+ ml->type = MAGIC_TYPE_ULONG;
+ else if (strcmp(s, "uquad") == 0)
+ ml->type = MAGIC_TYPE_UQUAD;
+ else if (strcmp(s, "float") == 0)
+ ml->type = MAGIC_TYPE_FLOAT;
+ else if (strcmp(s, "double") == 0)
+ ml->type = MAGIC_TYPE_DOUBLE;
+ else if (strcmp(s, "pstring") == 0)
+ ml->type = MAGIC_TYPE_PSTRING;
+ else if (strcmp(s, "date") == 0)
+ ml->type = MAGIC_TYPE_DATE;
+ else if (strcmp(s, "qdate") == 0)
+ ml->type = MAGIC_TYPE_QDATE;
+ else if (strcmp(s, "ldate") == 0)
+ ml->type = MAGIC_TYPE_LDATE;
+ else if (strcmp(s, "qldate") == 0)
+ ml->type = MAGIC_TYPE_QLDATE;
+ else if (strcmp(s, "udate") == 0)
+ ml->type = MAGIC_TYPE_UDATE;
+ else if (strcmp(s, "uqdate") == 0)
+ ml->type = MAGIC_TYPE_UQDATE;
+ else if (strcmp(s, "uldate") == 0)
+ ml->type = MAGIC_TYPE_ULDATE;
+ else if (strcmp(s, "uqldate") == 0)
+ ml->type = MAGIC_TYPE_UQLDATE;
+ else if (strcmp(s, "beshort") == 0)
+ ml->type = MAGIC_TYPE_BESHORT;
+ else if (strcmp(s, "belong") == 0)
+ ml->type = MAGIC_TYPE_BELONG;
+ else if (strcmp(s, "bequad") == 0)
+ ml->type = MAGIC_TYPE_BEQUAD;
+ else if (strcmp(s, "ubeshort") == 0)
+ ml->type = MAGIC_TYPE_UBESHORT;
+ else if (strcmp(s, "ubelong") == 0)
+ ml->type = MAGIC_TYPE_UBELONG;
+ else if (strcmp(s, "ubequad") == 0)
+ ml->type = MAGIC_TYPE_UBEQUAD;
+ else if (strcmp(s, "befloat") == 0)
+ ml->type = MAGIC_TYPE_BEFLOAT;
+ else if (strcmp(s, "bedouble") == 0)
+ ml->type = MAGIC_TYPE_BEDOUBLE;
+ else if (strcmp(s, "bedate") == 0)
+ ml->type = MAGIC_TYPE_BEDATE;
+ else if (strcmp(s, "beqdate") == 0)
+ ml->type = MAGIC_TYPE_BEQDATE;
+ else if (strcmp(s, "beldate") == 0)
+ ml->type = MAGIC_TYPE_BELDATE;
+ else if (strcmp(s, "beqldate") == 0)
+ ml->type = MAGIC_TYPE_BEQLDATE;
+ else if (strcmp(s, "ubedate") == 0)
+ ml->type = MAGIC_TYPE_UBEDATE;
+ else if (strcmp(s, "ubeqdate") == 0)
+ ml->type = MAGIC_TYPE_UBEQDATE;
+ else if (strcmp(s, "ubeldate") == 0)
+ ml->type = MAGIC_TYPE_UBELDATE;
+ else if (strcmp(s, "ubeqldate") == 0)
+ ml->type = MAGIC_TYPE_UBEQLDATE;
+ else if (strcmp(s, "bestring16") == 0)
+ ml->type = MAGIC_TYPE_BESTRING16;
+ else if (strcmp(s, "leshort") == 0)
+ ml->type = MAGIC_TYPE_LESHORT;
+ else if (strcmp(s, "lelong") == 0)
+ ml->type = MAGIC_TYPE_LELONG;
+ else if (strcmp(s, "lequad") == 0)
+ ml->type = MAGIC_TYPE_LEQUAD;
+ else if (strcmp(s, "uleshort") == 0)
+ ml->type = MAGIC_TYPE_ULESHORT;
+ else if (strcmp(s, "ulelong") == 0)
+ ml->type = MAGIC_TYPE_ULELONG;
+ else if (strcmp(s, "ulequad") == 0)
+ ml->type = MAGIC_TYPE_ULEQUAD;
+ else if (strcmp(s, "lefloat") == 0)
+ ml->type = MAGIC_TYPE_LEFLOAT;
+ else if (strcmp(s, "ledouble") == 0)
+ ml->type = MAGIC_TYPE_LEDOUBLE;
+ else if (strcmp(s, "ledate") == 0)
+ ml->type = MAGIC_TYPE_LEDATE;
+ else if (strcmp(s, "leqdate") == 0)
+ ml->type = MAGIC_TYPE_LEQDATE;
+ else if (strcmp(s, "leldate") == 0)
+ ml->type = MAGIC_TYPE_LELDATE;
+ else if (strcmp(s, "leqldate") == 0)
+ ml->type = MAGIC_TYPE_LEQLDATE;
+ else if (strcmp(s, "uledate") == 0)
+ ml->type = MAGIC_TYPE_ULEDATE;
+ else if (strcmp(s, "uleqdate") == 0)
+ ml->type = MAGIC_TYPE_ULEQDATE;
+ else if (strcmp(s, "uleldate") == 0)
+ ml->type = MAGIC_TYPE_ULELDATE;
+ else if (strcmp(s, "uleqldate") == 0)
+ ml->type = MAGIC_TYPE_ULEQLDATE;
+ else if (strcmp(s, "lestring16") == 0)
+ ml->type = MAGIC_TYPE_LESTRING16;
+ else if (strcmp(s, "melong") == 0)
+ ml->type = MAGIC_TYPE_MELONG;
+ else if (strcmp(s, "medate") == 0)
+ ml->type = MAGIC_TYPE_MEDATE;
+ else if (strcmp(s, "meldate") == 0)
+ ml->type = MAGIC_TYPE_MELDATE;
+ else if (strcmp(s, "default") == 0)
+ ml->type = MAGIC_TYPE_DEFAULT;
+ else {
+ magic_warn(ml, "unknown type");
+ goto fail;
+ }
+ magic_mark_text(ml, 0);
+
+done:
+ free(copy);
+ return (0);
+
+fail:
+ free(copy);
+ return (-1);
+}
+
+static int
+magic_parse_value(struct magic_line *ml, char **line)
+{
+ char *copy, *s, *cp, *endptr;
+ size_t slen;
+
+ while (isspace((u_char)**line))
+ (*line)++;
+
+ ml->test_operator = '=';
+ ml->test_not = 0;
+ ml->test_string = NULL;
+ ml->test_string_size = 0;
+ ml->test_unsigned = 0;
+ ml->test_signed = 0;
+
+ s = *line;
+ if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) {
+ (*line)++;
+ ml->test_operator = 'x';
+ return (0);
+ }
+
+ if (**line == '!') {
+ ml->test_not = 1;
+ (*line)++;
+ }
+
+ switch (ml->type) {
+ case MAGIC_TYPE_STRING:
+ case MAGIC_TYPE_PSTRING:
+ case MAGIC_TYPE_SEARCH:
+ if (**line == '>' || **line == '<' || **line == '=') {
+ ml->test_operator = **line;
+ (*line)++;
+ }
+ /* FALLTHROUGH */
+ case MAGIC_TYPE_REGEX:
+ copy = s = xmalloc(strlen(*line) + 1);
+ if (magic_get_string(line, s, &slen) != 0) {
+ magic_warn(ml, "can't parse string");
+ goto fail;
+ }
+ ml->test_string_size = slen;
+ ml->test_string = s;
+ return (0); /* do not free */
+ default:
+ break;
+ }
+
+ copy = s = cp = xmalloc(strlen(*line) + 1);
+ if ((*line)[0] == '=' && (*line)[1] == ' ') {
+ /*
+ * Extra spaces such as "byte&7 = 0" are accepted, which is
+ * annoying. But it seems to be only for =, so special case it.
+ */
+ *cp++ = '=';
+ (*line) += 2;
+ }
+ while (**line != '\0' && !isspace((u_char)**line))
+ *cp++ = *(*line)++;
+ *cp = '\0';
+
+ if (*s == '\0')
+ goto done;
+
+ if (s[0] == '<' && s[1] == '=') {
+ ml->test_operator = '[';
+ s += 2;
+ } else if (s[0] == '>' && s[1] == '=') {
+ ml->test_operator = ']';
+ s += 2;
+ } else if (strchr("=<>&^", *s) != NULL) {
+ ml->test_operator = *s;
+ s++;
+ }
+
+ if (*ml->type_string == 'u')
+ endptr = magic_strtoull(s, &ml->test_unsigned);
+ else
+ endptr = magic_strtoll(s, &ml->test_signed);
+ if (endptr == NULL || *endptr != '\0') {
+ magic_warn(ml, "can't parse number");
+ goto fail;
+ }
+
+done:
+ free(copy);
+ return (0);
+
+fail:
+ free(copy);
+ return (-1);
+}
+
+static void
+magic_free_line(struct magic_line *ml)
+{
+ free((void*)ml->type_string);
+
+ free((void*)ml->mimetype);
+ free((void*)ml->result);
+
+ free(ml);
+}
+
+int
+magic_compare(struct magic_line *ml1, struct magic_line *ml2)
+{
+ if (ml1->strength < ml2->strength)
+ return (1);
+ if (ml1->strength > ml2->strength)
+ return (-1);
+
+ /*
+ * The original file depends on the (undefined!) qsort(3) behaviour
+ * when the strength is equal. This is impossible to reproduce with an
+ * RB tree so just use the line number and hope for the best.
+ */
+ if (ml1->line < ml2->line)
+ return (-1);
+ if (ml1->line > ml2->line)
+ return (1);
+
+ return (0);
+}
+RB_GENERATE(magic_tree, magic_line, node, magic_compare);
+
+static void
+magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line)
+{
+ char *mimetype, *cp;
+
+ mimetype = line + (sizeof "!:mime") - 1;
+ while (isspace((u_char)*mimetype))
+ mimetype++;
+
+ cp = strchr(mimetype, '#');
+ if (cp != NULL)
+ *cp = '\0';
+
+ if (*mimetype != '\0') {
+ cp = mimetype + strlen(mimetype) - 1;
+ while (cp != mimetype && isspace((u_char)*cp))
+ *cp-- = '\0';
+ }
+
+ cp = mimetype;
+ while (*cp != '\0') {
+ if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL)
+ break;
+ cp++;
+ }
+ if (*mimetype == '\0' || *cp != '\0') {
+ fprintf(stderr, "%s:%u: invalid MIME type: %s\n", m->path, at,
+ mimetype);
+ return;
+ }
+ if (ml == NULL) {
+ fprintf(stderr, "%s:%u: stray MIME type: %s\n", m->path, at,
+ mimetype);
+ return;
+ }
+ ml->mimetype = xstrdup(mimetype);
+}
+
+struct magic *
+magic_load(FILE *f, const char *path, int warnings)
+{
+ struct magic *m;
+ struct magic_line *ml = NULL, *parent, *parent0;
+ char *line, *tmp;
+ size_t size;
+ u_int at, level, n, i;
+
+ m = xcalloc(1, sizeof *m);
+ m->path = xstrdup(path);
+ m->warnings = warnings;
+ RB_INIT(&m->tree);
+
+ parent = NULL;
+ parent0 = NULL;
+ level = 0;
+
+ at = 0;
+ tmp = NULL;
+ while ((line = fgetln(f, &size))) {
+ if (line[size - 1] == '\n')
+ line[size - 1] = '\0';
+ else {
+ tmp = xmalloc(size + 1);
+ memcpy(tmp, line, size);
+ tmp[size] = '\0';
+ line = tmp;
+ }
+ at++;
+
+ while (isspace((u_char)*line))
+ line++;
+ if (*line == '\0' || *line == '#')
+ continue;
+
+ if (strncmp (line, "!:mime", (sizeof "!:mime") - 1) == 0) {
+ magic_set_mimetype(m, at, ml, line);
+ continue;
+ }
+
+ n = 0;
+ for (; *line == '>'; line++)
+ n++;
+
+ ml = xcalloc(1, sizeof *ml);
+ ml->root = m;
+ ml->line = at;
+ ml->type = MAGIC_TYPE_NONE;
+ TAILQ_INIT(&ml->children);
+ ml->text = 1;
+
+ if (n == level + 1) {
+ parent = parent0;
+ } else if (n < level) {
+ for (i = n; i < level && parent != NULL; i++)
+ parent = parent->parent;
+ } else if (n != level) {
+ magic_warn(ml, "level skipped (%u->%u)", level, n);
+ free(ml);
+ continue;
+ }
+ ml->parent = parent;
+ level = n;
+
+ if (magic_parse_offset(ml, &line) != 0 ||
+ magic_parse_type(ml, &line) != 0 ||
+ magic_parse_value(ml, &line) != 0 ||
+ magic_set_result(ml, line) != 0) {
+ magic_free_line(ml);
+ ml = NULL;
+ continue;
+ }
+
+ ml->strength = magic_get_strength(ml);
+ if (ml->parent == NULL)
+ RB_INSERT(magic_tree, &m->tree, ml);
+ else
+ TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry);
+ parent0 = ml;
+ }
+ free(tmp);
+
+ fclose(f);
+ return (m);
+}
diff --git a/usr.bin/file/magic-test.c b/usr.bin/file/magic-test.c
new file mode 100644
index 00000000000..2c33b7f5545
--- /dev/null
+++ b/usr.bin/file/magic-test.c
@@ -0,0 +1,1121 @@
+/* $OpenBSD: magic-test.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+
+/*
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <vis.h>
+
+#include "magic.h"
+#include "xmalloc.h"
+
+static int
+magic_one_eq(char a, char b, int cflag)
+{
+ if (a == b)
+ return (1);
+ if (cflag && tolower((u_char)a) == tolower((u_char)b))
+ return (1);
+ return (0);
+}
+
+static int
+magic_test_eq(const char *ap, size_t asize, const char *bp, size_t bsize,
+ int cflag, int bflag, int Bflag)
+{
+ size_t aoff, boff, aspaces, bspaces;
+
+ aoff = boff = 0;
+ while (aoff != asize && boff != bsize) {
+ if (Bflag && isspace((u_char)ap[aoff])) {
+ aspaces = 0;
+ while (aoff != asize && isspace((u_char)ap[aoff])) {
+ aspaces++;
+ aoff++;
+ }
+ bspaces = 0;
+ while (boff != bsize && isspace((u_char)bp[boff])) {
+ bspaces++;
+ boff++;
+ }
+ if (bspaces >= aspaces)
+ continue;
+ return (1);
+ }
+ if (magic_one_eq(ap[aoff], bp[boff], cflag)) {
+ aoff++;
+ boff++;
+ continue;
+ }
+ if (bflag && isspace((u_char)bp[boff])) {
+ boff++;
+ continue;
+ }
+ if (ap[aoff] < bp[boff])
+ return (-1);
+ return (1);
+ }
+ return (0);
+}
+
+static int
+magic_copy_from(struct magic_state *ms, ssize_t offset, void *dst, size_t size)
+{
+ if (offset < 0)
+ offset = ms->offset;
+ if (offset + size > ms->size)
+ return (-1);
+ memcpy(dst, ms->base + offset, size);
+ return (0);
+}
+
+static void
+magic_add_result(struct magic_state *ms, struct magic_line *ml,
+ const char *fmt, ...)
+{
+ va_list ap;
+ int separate;
+ char *s, *tmp, *add;
+
+ va_start(ap, fmt);
+ if (ml->stringify) {
+ if (vasprintf(&s, fmt, ap) == -1) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ if (asprintf(&tmp, ml->result, s) == -1) {
+ free(s);
+ return;
+ }
+ free(s);
+ } else {
+ if (vasprintf(&tmp, ml->result, ap) == -1) {
+ va_end(ap);
+ return;
+ }
+ va_end(ap);
+ }
+
+ separate = 1;
+ if (tmp[0] == '\\' && tmp[1] == 'b') {
+ separate = 0;
+ add = tmp + 2;
+ } else
+ add = tmp;
+
+ if (separate && *ms->out != '\0')
+ strlcat(ms->out, " ", sizeof ms->out);
+ strlcat(ms->out, add, sizeof ms->out);
+
+ free(tmp);
+}
+
+static void
+magic_add_string(struct magic_state *ms, struct magic_line *ml,
+ const char* s, size_t slen)
+{
+ char *out;
+ size_t outlen, offset;
+
+ outlen = MAGIC_STRING_SIZE;
+ if (outlen > slen)
+ outlen = slen;
+ for (offset = 0; offset < outlen; offset++) {
+ if (s[offset] == '\0' || !isprint((u_char)s[offset])) {
+ outlen = offset;
+ break;
+ }
+ }
+ out = xreallocarray(NULL, 4, outlen + 1);
+ strvisx(out, s, outlen, VIS_TAB|VIS_NL|VIS_CSTYLE|VIS_OCTAL);
+ magic_add_result(ms, ml, "%s", out);
+ free(out);
+}
+
+static int
+magic_test_signed(struct magic_line *ml, int64_t value, int64_t wanted)
+{
+ switch (ml->test_operator) {
+ case 'x':
+ return (1);
+ case '<':
+ return (value < wanted);
+ case '[':
+ return (value <= wanted);
+ case '>':
+ return (value > wanted);
+ case ']':
+ return (value >= wanted);
+ case '=':
+ return (value == wanted);
+ case '&':
+ return ((value & wanted) == wanted);
+ case '^':
+ return ((~value & wanted) == wanted);
+ }
+ return (-1);
+}
+
+static int
+magic_test_unsigned(struct magic_line *ml, uint64_t value, uint64_t wanted)
+{
+ switch (ml->test_operator) {
+ case 'x':
+ return (1);
+ case '<':
+ return (value < wanted);
+ case '[':
+ return (value <= wanted);
+ case '>':
+ return (value > wanted);
+ case ']':
+ return (value >= wanted);
+ case '=':
+ return (value == wanted);
+ case '&':
+ return ((value & wanted) == wanted);
+ case '^':
+ return ((~value & wanted) == wanted);
+ }
+ return (-1);
+}
+
+static int
+magic_test_type_none(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (0);
+}
+
+static int
+magic_test_type_byte(struct magic_line *ml, struct magic_state *ms)
+{
+ int8_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+
+ if (ml->type_operator == '&')
+ value &= (int8_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_signed(ml, value, (int8_t)ml->test_signed);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%c", (int)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_short(struct magic_line *ml, struct magic_state *ms)
+{
+ int16_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BESHORT)
+ value = betoh16(value);
+ if (ml->type == MAGIC_TYPE_LESHORT)
+ value = letoh16(value);
+
+ if (ml->type_operator == '&')
+ value &= (int16_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_signed(ml, value, (int16_t)ml->test_signed);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%hd", (int)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_long(struct magic_line *ml, struct magic_state *ms)
+{
+ int32_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BELONG)
+ value = betoh32(value);
+ if (ml->type == MAGIC_TYPE_LELONG)
+ value = letoh32(value);
+
+ if (ml->type_operator == '&')
+ value &= (int32_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_signed(ml, value, (int32_t)ml->test_signed);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%d", (int)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_quad(struct magic_line *ml, struct magic_state *ms)
+{
+ int64_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BEQUAD)
+ value = betoh64(value);
+ if (ml->type == MAGIC_TYPE_LEQUAD)
+ value = letoh64(value);
+
+ if (ml->type_operator == '&')
+ value &= (int64_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_signed(ml, value, (int64_t)ml->test_signed);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%lld", (long long)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_ubyte(struct magic_line *ml, struct magic_state *ms)
+{
+ uint8_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+
+ if (ml->type_operator == '&')
+ value &= (uint8_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_unsigned(ml, value, (uint8_t)ml->test_unsigned);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%c", (unsigned int)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_ushort(struct magic_line *ml, struct magic_state *ms)
+{
+ uint16_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_UBESHORT)
+ value = betoh16(value);
+ if (ml->type == MAGIC_TYPE_ULESHORT)
+ value = letoh16(value);
+
+ if (ml->type_operator == '&')
+ value &= (uint16_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_unsigned(ml, value, (uint16_t)ml->test_unsigned);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%hu", (unsigned int)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_ulong(struct magic_line *ml, struct magic_state *ms)
+{
+ uint32_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_UBELONG)
+ value = betoh32(value);
+ if (ml->type == MAGIC_TYPE_ULELONG)
+ value = letoh32(value);
+
+ if (ml->type_operator == '&')
+ value &= (uint32_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_unsigned(ml, value, (uint32_t)ml->test_unsigned);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%u", (unsigned int)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_uquad(struct magic_line *ml, struct magic_state *ms)
+{
+ uint64_t value;
+ int result;
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_UBEQUAD)
+ value = betoh64(value);
+ if (ml->type == MAGIC_TYPE_ULEQUAD)
+ value = letoh64(value);
+
+ if (ml->type_operator == '&')
+ value &= (uint64_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_unsigned(ml, value, (uint64_t)ml->test_unsigned);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%llu", (unsigned long long)value);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_float(struct magic_line *ml, struct magic_state *ms)
+{
+ uint32_t value0;
+ double value;
+
+ if (magic_copy_from(ms, -1, &value0, sizeof value0) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BEFLOAT)
+ value0 = betoh32(value0);
+ if (ml->type == MAGIC_TYPE_LEFLOAT)
+ value0 = letoh32(value0);
+ memcpy(&value, &value0, sizeof value);
+
+ if (ml->type_operator != ' ')
+ return (-1);
+
+ if (ml->test_operator != 'x')
+ return (-1);
+
+ magic_add_result(ms, ml, "%g", value);
+ ms->offset += sizeof value0;
+ return (1);
+}
+
+static int
+magic_test_type_double(struct magic_line *ml, struct magic_state *ms)
+{
+ uint64_t value0;
+ double value;
+
+ if (magic_copy_from(ms, -1, &value0, sizeof value0) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BEDOUBLE)
+ value0 = betoh64(value0);
+ if (ml->type == MAGIC_TYPE_LEDOUBLE)
+ value0 = letoh64(value0);
+ memcpy(&value, &value0, sizeof value);
+
+ if (ml->type_operator != ' ')
+ return (-1);
+
+ if (ml->test_operator != 'x')
+ return (-1);
+
+ magic_add_result(ms, ml, "%g", value);
+ ms->offset += sizeof value0;
+ return (1);
+}
+
+static int
+magic_test_type_string(struct magic_line *ml, struct magic_state *ms)
+{
+ const char *s, *cp;
+ size_t slen;
+ int result, cflag = 0, bflag = 0, Bflag = 0;
+
+ cp = &ml->type_string[(sizeof "string") - 1];
+ if (*cp != '\0') {
+ if (*cp != '/')
+ return (-1);
+ cp++;
+ for (; *cp != '\0'; cp++) {
+ switch (*cp) {
+ case 'B':
+ Bflag = 1;
+ break;
+ case 'b':
+ bflag = 1;
+ break;
+ case 'c':
+ cflag = 1;
+ break;
+ default:
+ return (-1);
+ }
+ }
+ }
+
+ s = ms->base + ms->offset;
+ slen = ms->size - ms->offset;
+ if (slen < ml->test_string_size)
+ return (0);
+
+ result = magic_test_eq(s, slen, ml->test_string, ml->test_string_size,
+ cflag, bflag, Bflag);
+ switch (ml->test_operator) {
+ case 'x':
+ result = 1;
+ break;
+ case '<':
+ result = result < 0;
+ break;
+ case '>':
+ result = result > 0;
+ break;
+ case '=':
+ result = result == 0;
+ break;
+ default:
+ result = -1;
+ break;
+ }
+ if (result == !ml->test_not) {
+ if (ml->result != NULL)
+ magic_add_string(ms, ml, s, slen);
+ if (result && ml->test_operator == '=')
+ ms->offset = s - ms->base + ml->test_string_size;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_pstring(struct magic_line *ml, struct magic_state *ms)
+{
+ const char *s;
+ size_t slen;
+ int result;
+
+ s = ms->base + ms->offset;
+ if (ms->size - ms->offset < 1)
+ return (-1);
+ slen = *(u_char *)s;
+ if (slen > ms->size - ms->offset)
+ return (-1);
+ s++;
+
+ if (slen < ml->test_string_size)
+ result = -1;
+ else if (slen > ml->test_string_size)
+ result = 1;
+ else
+ result = memcmp(s, ml->test_string, ml->test_string_size);
+ switch (ml->test_operator) {
+ case 'x':
+ result = 1;
+ break;
+ case '<':
+ result = result < 0;
+ break;
+ case '>':
+ result = result > 0;
+ break;
+ case '=':
+ result = result == 0;
+ break;
+ default:
+ result = -1;
+ break;
+ }
+ if (result == !ml->test_not) {
+ if (ml->result != NULL)
+ magic_add_string(ms, ml, s, slen);
+ if (result)
+ ms->offset += slen + 1;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_date(struct magic_line *ml, struct magic_state *ms)
+{
+ int32_t value;
+ int result;
+ time_t t;
+ char s[64];
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BEDATE ||
+ ml->type == MAGIC_TYPE_BELDATE)
+ value = betoh32(value);
+ if (ml->type == MAGIC_TYPE_LEDATE ||
+ ml->type == MAGIC_TYPE_LELDATE)
+ value = letoh32(value);
+
+ if (ml->type_operator == '&')
+ value &= (int32_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_signed(ml, value, (int32_t)ml->test_signed);
+ if (result == !ml->test_not && ml->result != NULL) {
+ t = value;
+ switch (ml->type) {
+ case MAGIC_TYPE_LDATE:
+ case MAGIC_TYPE_LELDATE:
+ case MAGIC_TYPE_BELDATE:
+ ctime_r(&t, s);
+ break;
+ default:
+ asctime_r(localtime(&t), s);
+ break;
+ }
+ s[strcspn(s, "\n")] = '\0';
+ magic_add_result(ms, ml, "%s", s);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_qdate(struct magic_line *ml, struct magic_state *ms)
+{
+ int64_t value;
+ int result;
+ time_t t;
+ char s[64];
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BEQDATE ||
+ ml->type == MAGIC_TYPE_BEQLDATE)
+ value = betoh64(value);
+ if (ml->type == MAGIC_TYPE_LEQDATE ||
+ ml->type == MAGIC_TYPE_LEQLDATE)
+ value = letoh64(value);
+
+ if (ml->type_operator == '&')
+ value &= (int64_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_signed(ml, value, (int64_t)ml->test_signed);
+ if (result == !ml->test_not && ml->result != NULL) {
+ t = value;
+ switch (ml->type) {
+ case MAGIC_TYPE_QLDATE:
+ case MAGIC_TYPE_LEQLDATE:
+ case MAGIC_TYPE_BEQLDATE:
+ ctime_r(&t, s);
+ break;
+ default:
+ asctime_r(localtime(&t), s);
+ break;
+ }
+ s[strcspn(s, "\n")] = '\0';
+ magic_add_result(ms, ml, "%s", s);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_udate(struct magic_line *ml, struct magic_state *ms)
+{
+ uint32_t value;
+ int result;
+ time_t t;
+ char s[64];
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_BEDATE ||
+ ml->type == MAGIC_TYPE_BELDATE)
+ value = betoh32(value);
+ if (ml->type == MAGIC_TYPE_LEDATE ||
+ ml->type == MAGIC_TYPE_LELDATE)
+ value = letoh32(value);
+
+ if (ml->type_operator == '&')
+ value &= (uint32_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_unsigned(ml, value, (uint32_t)ml->test_unsigned);
+ if (result == !ml->test_not && ml->result != NULL) {
+ t = value;
+ switch (ml->type) {
+ case MAGIC_TYPE_LDATE:
+ case MAGIC_TYPE_LELDATE:
+ case MAGIC_TYPE_BELDATE:
+ ctime_r(&t, s);
+ break;
+ default:
+ asctime_r(gmtime(&t), s);
+ break;
+ }
+ s[strcspn(s, "\n")] = '\0';
+ magic_add_result(ms, ml, "%s", s);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_uqdate(struct magic_line *ml, struct magic_state *ms)
+{
+ uint64_t value;
+ int result;
+ time_t t;
+ char s[64];
+
+ if (magic_copy_from(ms, -1, &value, sizeof value) != 0)
+ return (0);
+ if (ml->type == MAGIC_TYPE_UBEQDATE ||
+ ml->type == MAGIC_TYPE_UBEQLDATE)
+ value = betoh64(value);
+ if (ml->type == MAGIC_TYPE_ULEQDATE ||
+ ml->type == MAGIC_TYPE_ULEQLDATE)
+ value = letoh64(value);
+
+ if (ml->type_operator == '&')
+ value &= (uint64_t)ml->type_operand;
+ else if (ml->type_operator != ' ')
+ return (-1);
+
+ result = magic_test_unsigned(ml, value, (uint64_t)ml->test_unsigned);
+ if (result == !ml->test_not && ml->result != NULL) {
+ t = value;
+ switch (ml->type) {
+ case MAGIC_TYPE_UQLDATE:
+ case MAGIC_TYPE_ULEQLDATE:
+ case MAGIC_TYPE_UBEQLDATE:
+ ctime_r(&t, s);
+ break;
+ default:
+ asctime_r(gmtime(&t), s);
+ break;
+ }
+ s[strcspn(s, "\n")] = '\0';
+ magic_add_result(ms, ml, "%s", s);
+ ms->offset += sizeof value;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_bestring16(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (-2);
+}
+
+static int
+magic_test_type_lestring16(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (-2);
+}
+
+static int
+magic_test_type_melong(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (-2);
+}
+
+static int
+magic_test_type_medate(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (-2);
+}
+
+static int
+magic_test_type_meldate(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (-2);
+}
+
+static int
+magic_test_type_regex(struct magic_line *ml, struct magic_state *ms)
+{
+ const char *cp;
+ regex_t re;
+ regmatch_t m;
+ int result, flags = 0, sflag = 0;
+
+ cp = &ml->type_string[(sizeof "regex") - 1];
+ if (*cp != '\0') {
+ if (*cp != '/')
+ return (-1);
+ cp++;
+ for (; *cp != '\0'; cp++) {
+ switch (*cp) {
+ case 's':
+ sflag = 1;
+ break;
+ case 'c':
+ flags |= REG_ICASE;
+ break;
+ default:
+ return (-1);
+ }
+ }
+ }
+
+ if (regcomp(&re, ml->test_string, REG_EXTENDED) != 0)
+ return (-1);
+ m.rm_so = ms->offset;
+ m.rm_eo = ms->size;
+
+ result = (regexec(&re, ms->base, 1, &m, REG_STARTEND) == 0);
+ if (result == !ml->test_not && ml->result != NULL) {
+ magic_add_result(ms, ml, "%s", "");
+ if (result) {
+ if (sflag)
+ ms->offset = m.rm_so;
+ else
+ ms->offset = m.rm_eo;
+ }
+ }
+ regfree(&re);
+ return (result);
+}
+
+static int
+magic_test_type_search(struct magic_line *ml, struct magic_state *ms)
+{
+ const char *cp, *endptr, *start, *found;
+ size_t size, end, i;
+ uint64_t range;
+ int result, n, cflag = 0, bflag = 0, Bflag = 0;
+
+ cp = &ml->type_string[(sizeof "search") - 1];
+ if (*cp != '\0') {
+ if (*cp != '/')
+ return (-1);
+ cp++;
+
+ endptr = magic_strtoull(cp, &range);
+ if (endptr == NULL || (*endptr != '/' && *endptr != '\0'))
+ return (-1);
+
+ if (*endptr == '/') {
+ for (cp = endptr + 1; *cp != '\0'; cp++) {
+ switch (*cp) {
+ case 'B':
+ Bflag = 1;
+ break;
+ case 'b':
+ bflag = 1;
+ break;
+ case 'c':
+ cflag = 1;
+ break;
+ default:
+ return (-1);
+ }
+ }
+ }
+ } else
+ range = UINT64_MAX;
+ if (range > (uint64_t)ms->size - ms->offset)
+ range = ms->size - ms->offset;
+ size = ml->test_string_size;
+
+ /* Want to search every starting position from up to range + size. */
+ end = range + size;
+ if (end > ms->size - ms->offset) {
+ if (size > ms->size - ms->offset)
+ end = 0;
+ else
+ end = ms->size - ms->offset - size;
+ }
+
+ /*
+ * < and > and the flags are only in /etc/magic with search/1 so don't
+ * support them with anything else.
+ */
+ start = ms->base + ms->offset;
+ if (end == 0)
+ found = NULL;
+ else if (ml->test_operator == 'x')
+ found = start;
+ else if (range == 1) {
+ n = magic_test_eq(start, ms->size - ms->offset, ml->test_string,
+ size, cflag, bflag, Bflag);
+ if (n == -1 && ml->test_operator == '<')
+ found = start;
+ else if (n == 1 && ml->test_operator == '>')
+ found = start;
+ else if (n == 0 && ml->test_operator == '=')
+ found = start;
+ else
+ found = NULL;
+ } else {
+ if (ml->test_operator != '=')
+ return (-2);
+ for (i = 0; i < end; i++) {
+ n = magic_test_eq(start + i, ms->size - ms->offset - i,
+ ml->test_string, size, cflag, bflag, Bflag);
+ if (n == 0) {
+ found = start + i;
+ break;
+ }
+ }
+ if (i == end)
+ found = NULL;
+ }
+ result = (found != NULL);
+
+ if (result == !ml->test_not && ml->result != NULL && found != NULL) {
+ magic_add_string(ms, ml, found, ms->size - ms->offset);
+ ms->offset = found - start + size;
+ }
+ return (result);
+}
+
+static int
+magic_test_type_default(__unused struct magic_line *ml,
+ __unused struct magic_state *ms)
+{
+ return (1);
+}
+
+static int (*magic_test_functions[])(struct magic_line *,
+ struct magic_state *) = {
+ magic_test_type_none,
+ magic_test_type_byte,
+ magic_test_type_short,
+ magic_test_type_long,
+ magic_test_type_quad,
+ magic_test_type_ubyte,
+ magic_test_type_ushort,
+ magic_test_type_ulong,
+ magic_test_type_uquad,
+ magic_test_type_float,
+ magic_test_type_double,
+ magic_test_type_string,
+ magic_test_type_pstring,
+ magic_test_type_date,
+ magic_test_type_qdate,
+ magic_test_type_date,
+ magic_test_type_qdate,
+ magic_test_type_udate,
+ magic_test_type_uqdate,
+ magic_test_type_udate,
+ magic_test_type_qdate,
+ magic_test_type_short,
+ magic_test_type_long,
+ magic_test_type_quad,
+ magic_test_type_ushort,
+ magic_test_type_ulong,
+ magic_test_type_uquad,
+ magic_test_type_float,
+ magic_test_type_double,
+ magic_test_type_date,
+ magic_test_type_qdate,
+ magic_test_type_date,
+ magic_test_type_qdate,
+ magic_test_type_udate,
+ magic_test_type_uqdate,
+ magic_test_type_udate,
+ magic_test_type_uqdate,
+ magic_test_type_bestring16,
+ magic_test_type_short,
+ magic_test_type_long,
+ magic_test_type_quad,
+ magic_test_type_ushort,
+ magic_test_type_ulong,
+ magic_test_type_uquad,
+ magic_test_type_float,
+ magic_test_type_double,
+ magic_test_type_date,
+ magic_test_type_qdate,
+ magic_test_type_date,
+ magic_test_type_qdate,
+ magic_test_type_udate,
+ magic_test_type_uqdate,
+ magic_test_type_udate,
+ magic_test_type_uqdate,
+ magic_test_type_lestring16,
+ magic_test_type_melong,
+ magic_test_type_medate,
+ magic_test_type_meldate,
+ magic_test_type_regex,
+ magic_test_type_search,
+ magic_test_type_default,
+};
+
+static int
+magic_test_line(struct magic_line *ml, struct magic_state *ms)
+{
+ struct magic_line *child;
+ int64_t offset, wanted, next;
+ int result;
+ uint8_t b;
+ uint16_t s;
+ uint32_t l;
+
+ if (ml->indirect_type == ' ')
+ wanted = ml->offset;
+ else {
+ wanted = ml->indirect_offset;
+ if (ml->indirect_relative) {
+ if (wanted < 0 && -wanted > ms->offset)
+ return (0);
+ if (wanted > 0 && ms->offset + wanted > ms->size)
+ return (0);
+ next = ms->offset + ml->indirect_offset;
+ } else
+ next = wanted;
+
+ switch (ml->indirect_type) {
+ case 'b':
+ case 'B':
+ if (magic_copy_from(ms, next, &b, sizeof b) != 0)
+ return (0);
+ wanted = b;
+ break;
+ case 's':
+ if (magic_copy_from(ms, next, &s, sizeof s) != 0)
+ return (0);
+ wanted = letoh16(s);
+ break;
+ case 'S':
+ if (magic_copy_from(ms, next, &s, sizeof s) != 0)
+ return (0);
+ wanted = betoh16(s);
+ break;
+ case 'l':
+ if (magic_copy_from(ms, next, &l, sizeof l) != 0)
+ return (0);
+ wanted = letoh16(l);
+ break;
+ case 'L':
+ if (magic_copy_from(ms, next, &l, sizeof l) != 0)
+ return (0);
+ wanted = betoh16(l);
+ break;
+ }
+
+ switch (ml->indirect_operator) {
+ case '+':
+ wanted += ml->indirect_operand;
+ break;
+ case '-':
+ wanted -= ml->indirect_operand;
+ break;
+ case '*':
+ wanted *= ml->indirect_operand;
+ break;
+ }
+ }
+
+ if (ml->offset_relative) {
+ if (wanted < 0 && -wanted > ms->offset)
+ return (0);
+ if (wanted > 0 && ms->offset + wanted > ms->size)
+ return (0);
+ offset = ms->offset + wanted;
+ } else
+ offset = wanted;
+ if (offset < 0 || offset > ms->size)
+ return (0);
+ ms->offset = offset;
+
+ result = magic_test_functions[ml->type](ml, ms);
+ if (result == -1) {
+ magic_warn(ml, "test %s/%c failed", ml->type_string,
+ ml->test_operator);
+ return (0);
+ }
+ if (result == -2) {
+ magic_warn(ml, "test %s/%c not implemented", ml->type_string,
+ ml->test_operator);
+ return (0);
+ }
+ if (result == ml->test_not)
+ return (0);
+ if (ml->mimetype != NULL)
+ ms->mimetype = ml->mimetype;
+
+ magic_warn(ml, "test %s/%c matched at offset %llu: '%s'",
+ ml->type_string, ml->test_operator, ms->offset,
+ ml->result == NULL ? "" : ml->result);
+
+ offset = ms->offset;
+ TAILQ_FOREACH(child, &ml->children, entry) {
+ ms->offset = offset;
+ magic_test_line(child, ms);
+ }
+ return (1);
+}
+
+const char *
+magic_test(struct magic *m, const void *base, size_t size, int flags)
+{
+ struct magic_line *ml;
+ static struct magic_state ms;
+
+ memset(&ms, 0, sizeof ms);
+
+ ms.base = base;
+ ms.size = size;
+
+ ms.text = !!(flags & MAGIC_TEST_TEXT);
+
+ RB_FOREACH(ml, magic_tree, &m->tree) {
+ ms.offset = 0;
+ if (ml->text == ms.text && magic_test_line(ml, &ms))
+ break;
+ }
+
+ if (*ms.out != '\0') {
+ if (flags & MAGIC_TEST_MIME) {
+ if (ms.mimetype)
+ return (xstrdup(ms.mimetype));
+ return (NULL);
+ }
+ return (xstrdup(ms.out));
+ }
+ return (NULL);
+}
diff --git a/usr.bin/file/magic.c b/usr.bin/file/magic.c
deleted file mode 100644
index 0e9fd1c66c6..00000000000
--- a/usr.bin/file/magic.c
+++ /dev/null
@@ -1,395 +0,0 @@
-/* $OpenBSD: magic.c,v 1.10 2015/01/16 08:24:04 doug Exp $ */
-/*
- * Copyright (c) Christos Zoulas 2003.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "file.h"
-#include "magic.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#ifdef QUICK
-#include <sys/mman.h>
-#endif
-#include <limits.h> /* for PIPE_BUF */
-
-#if defined(HAVE_UTIMES)
-# include <sys/time.h>
-#elif defined(HAVE_UTIME)
-# if defined(HAVE_SYS_UTIME_H)
-# include <sys/utime.h>
-# elif defined(HAVE_UTIME_H)
-# include <utime.h>
-# endif
-#endif
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h> /* for read() */
-#endif
-
-#ifdef HAVE_LOCALE_H
-#include <locale.h>
-#endif
-
-#include <netinet/in.h> /* for byte swapping */
-
-#include "patchlevel.h"
-
-#ifndef PIPE_BUF
-/* Get the PIPE_BUF from pathconf */
-#ifdef _PC_PIPE_BUF
-#define PIPE_BUF pathconf(".", _PC_PIPE_BUF)
-#else
-#define PIPE_BUF 512
-#endif
-#endif
-
-#ifdef __EMX__
-private char *apptypeName = NULL;
-protected int file_os2_apptype(struct magic_set *ms, const char *fn,
- const void *buf, size_t nb);
-#endif /* __EMX__ */
-
-private void free_mlist(struct mlist *);
-private void close_and_restore(const struct magic_set *, const char *, int,
- const struct stat *);
-private int info_from_stat(struct magic_set *, mode_t);
-#ifndef COMPILE_ONLY
-private const char *file_or_fd(struct magic_set *, const char *, int);
-#endif
-
-#ifndef STDIN_FILENO
-#define STDIN_FILENO 0
-#endif
-
-public struct magic_set *
-magic_open(int flags)
-{
- struct magic_set *ms;
-
- if ((ms = calloc((size_t)1, sizeof(struct magic_set))) == NULL)
- return NULL;
-
- if (magic_setflags(ms, flags) == -1) {
- errno = EINVAL;
- goto free;
- }
-
- ms->o.buf = ms->o.pbuf = NULL;
-
- ms->c.len = 10;
- ms->c.li = reallocarray(NULL, ms->c.len, sizeof(*ms->c.li));
- if (ms->c.li == NULL)
- goto free;
-
- ms->haderr = 0;
- ms->error = -1;
- ms->mlist = NULL;
- ms->file = "unknown";
- ms->line = 0;
- return ms;
-free:
- free(ms);
- return NULL;
-}
-
-private void
-free_mlist(struct mlist *mlist)
-{
- struct mlist *ml;
-
- if (mlist == NULL)
- return;
-
- for (ml = mlist->next; ml != mlist;) {
- struct mlist *next = ml->next;
- struct magic *mg = ml->magic;
- file_delmagic(mg, ml->mapped, ml->nmagic);
- free(ml);
- ml = next;
- }
- free(ml);
-}
-
-private int
-info_from_stat(struct magic_set *ms, mode_t md)
-{
- /* We cannot open it, but we were able to stat it. */
- if (md & 0222)
- if (file_printf(ms, "writable, ") == -1)
- return -1;
- if (md & 0111)
- if (file_printf(ms, "executable, ") == -1)
- return -1;
- if (S_ISREG(md))
- if (file_printf(ms, "regular file, ") == -1)
- return -1;
- if (file_printf(ms, "no read permission") == -1)
- return -1;
- return 0;
-}
-
-public void
-magic_close(struct magic_set *ms)
-{
- free_mlist(ms->mlist);
- free(ms->o.pbuf);
- free(ms->o.buf);
- free(ms->c.li);
- free(ms);
-}
-
-/*
- * load a magic file
- */
-public int
-magic_load(struct magic_set *ms, const char *magicfile)
-{
- struct mlist *ml = file_apprentice(ms, magicfile, FILE_LOAD);
- if (ml) {
- free_mlist(ms->mlist);
- ms->mlist = ml;
- return 0;
- }
- return -1;
-}
-
-public int
-magic_compile(struct magic_set *ms, const char *magicfile)
-{
- struct mlist *ml = file_apprentice(ms, magicfile, FILE_COMPILE);
- free_mlist(ml);
- return ml ? 0 : -1;
-}
-
-public int
-magic_check(struct magic_set *ms, const char *magicfile)
-{
- struct mlist *ml = file_apprentice(ms, magicfile, FILE_CHECK);
- free_mlist(ml);
- return ml ? 0 : -1;
-}
-
-private void
-close_and_restore(const struct magic_set *ms, const char *name, int fd,
- const struct stat *sb)
-{
- if (fd == STDIN_FILENO)
- return;
- (void) close(fd);
-
- if ((ms->flags & MAGIC_PRESERVE_ATIME) != 0) {
- /*
- * Try to restore access, modification times if read it.
- * This is really *bad* because it will modify the status
- * time of the file... And of course this will affect
- * backup programs
- */
-#ifdef HAVE_UTIMES
- struct timeval utsbuf[2];
- (void)memset(utsbuf, 0, sizeof(utsbuf));
- utsbuf[0].tv_sec = sb->st_atime;
- utsbuf[1].tv_sec = sb->st_mtime;
-
- (void) utimes(name, utsbuf); /* don't care if loses */
-#elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
- struct utimbuf utbuf;
-
- (void)memset(utbuf, 0, sizeof(utbuf));
- utbuf.actime = sb->st_atime;
- utbuf.modtime = sb->st_mtime;
- (void) utime(name, &utbuf); /* don't care if loses */
-#endif
- }
-}
-
-#ifndef COMPILE_ONLY
-
-/*
- * find type of descriptor
- */
-public const char *
-magic_descriptor(struct magic_set *ms, int fd)
-{
- return file_or_fd(ms, NULL, fd);
-}
-
-/*
- * find type of named file
- */
-public const char *
-magic_file(struct magic_set *ms, const char *inname)
-{
- return file_or_fd(ms, inname, STDIN_FILENO);
-}
-
-private const char *
-file_or_fd(struct magic_set *ms, const char *inname, int fd)
-{
- int rv = -1;
- unsigned char *buf;
- struct stat sb;
- ssize_t nbytes = 0; /* number of bytes read from a datafile */
- int ispipe = 0;
-
- /*
- * one extra for terminating '\0', and
- * some overlapping space for matches near EOF
- */
-#define SLOP (1 + sizeof(union VALUETYPE))
- if ((buf = malloc(HOWMANY + SLOP)) == NULL)
- return NULL;
-
- if (file_reset(ms) == -1)
- goto done;
-
- switch (file_fsmagic(ms, inname, &sb)) {
- case -1: /* error */
- goto done;
- case 0: /* nothing found */
- break;
- default: /* matched it and printed type */
- rv = 0;
- goto done;
- }
-
- if (inname == NULL) {
- if (fstat(fd, &sb) == 0 && S_ISFIFO(sb.st_mode))
- ispipe = 1;
- } else {
- int flags = O_RDONLY|O_BINARY;
-
- if (stat(inname, &sb) == 0 && S_ISFIFO(sb.st_mode)) {
- flags |= O_NONBLOCK;
- ispipe = 1;
- }
-
- errno = 0;
- if ((fd = open(inname, flags)) < 0) {
-#ifdef __CYGWIN__
- /* FIXME: Do this with EXEEXT from autotools */
- char *tmp = alloca(strlen(inname) + 5);
- (void)strcat(strcpy(tmp, inname), ".exe");
- if ((fd = open(tmp, flags)) < 0) {
-#endif
- fprintf(stderr, "couldn't open file\n");
- if (info_from_stat(ms, sb.st_mode) == -1)
- goto done;
- rv = 0;
- goto done;
-#ifdef __CYGWIN__
- }
-#endif
- }
-#ifdef O_NONBLOCK
- if ((flags = fcntl(fd, F_GETFL)) != -1) {
- flags &= ~O_NONBLOCK;
- (void)fcntl(fd, F_SETFL, flags);
- }
-#endif
- }
-
- /*
- * try looking at the first HOWMANY bytes
- */
- if (ispipe) {
- ssize_t r = 0;
-
- while ((r = sread(fd, (void *)&buf[nbytes],
- (size_t)(HOWMANY - nbytes), 1)) > 0) {
- nbytes += r;
- if (r < PIPE_BUF) break;
- }
-
- if (nbytes == 0) {
- /* We can not read it, but we were able to stat it. */
- if (info_from_stat(ms, sb.st_mode) == -1)
- goto done;
- rv = 0;
- goto done;
- }
-
- } else {
- if ((nbytes = read(fd, (char *)buf, HOWMANY)) == -1) {
- file_error(ms, errno, "cannot read `%s'", inname);
- goto done;
- }
- }
-
- (void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */
- if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1)
- goto done;
- rv = 0;
-done:
- free(buf);
- close_and_restore(ms, inname, fd, &sb);
- return rv == 0 ? file_getbuffer(ms) : NULL;
-}
-
-
-public const char *
-magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
-{
- if (file_reset(ms) == -1)
- return NULL;
- /*
- * The main work is done here!
- * We have the file name and/or the data buffer to be identified.
- */
- if (file_buffer(ms, -1, NULL, buf, nb) == -1) {
- return NULL;
- }
- return file_getbuffer(ms);
-}
-#endif
-
-public const char *
-magic_error(struct magic_set *ms)
-{
- return ms->haderr ? ms->o.buf : NULL;
-}
-
-public int
-magic_errno(struct magic_set *ms)
-{
- return ms->haderr ? ms->error : 0;
-}
-
-public int
-magic_setflags(struct magic_set *ms, int flags)
-{
-#if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES)
- if (flags & MAGIC_PRESERVE_ATIME)
- return -1;
-#endif
- ms->flags = flags;
- return 0;
-}
diff --git a/usr.bin/file/magic.h b/usr.bin/file/magic.h
index 03d0b1b0d6c..3ee8331e0f5 100644
--- a/usr.bin/file/magic.h
+++ b/usr.bin/file/magic.h
@@ -1,83 +1,180 @@
-/* $OpenBSD: magic.h,v 1.4 2009/04/24 18:54:34 chl Exp $ */
+/* $OpenBSD: magic.h,v 1.5 2015/04/24 16:24:11 nicm Exp $ */
+
/*
- * Copyright (c) Christos Zoulas 2003.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#ifndef _MAGIC_H
-#define _MAGIC_H
-
-#include <sys/types.h>
-
-#define MAGIC_NONE 0x000000 /* No flags */
-#define MAGIC_DEBUG 0x000001 /* Turn on debugging */
-#define MAGIC_SYMLINK 0x000002 /* Follow symlinks */
-#define MAGIC_COMPRESS 0x000004 /* Check inside compressed files */
-#define MAGIC_DEVICES 0x000008 /* Look at the contents of devices */
-#define MAGIC_MIME_TYPE 0x000010 /* Return only the MIME type */
-#define MAGIC_CONTINUE 0x000020 /* Return all matches */
-#define MAGIC_CHECK 0x000040 /* Print warnings to stderr */
-#define MAGIC_PRESERVE_ATIME 0x000080 /* Restore access time on exit */
-#define MAGIC_RAW 0x000100 /* Don't translate unprint chars */
-#define MAGIC_ERROR 0x000200 /* Handle ENOENT etc as real errors */
-#define MAGIC_MIME_ENCODING 0x000400 /* Return only the MIME encoding */
-#define MAGIC_MIME (MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING)
-#define MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */
-#define MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */
-#define MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */
-#define MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */
-#define MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */
-#define MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */
-#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */
-
-/* Defined for backwards compatibility; do nothing */
-#define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */
-#define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct magic_set *magic_t;
-magic_t magic_open(int);
-void magic_close(magic_t);
-
-const char *magic_file(magic_t, const char *);
-const char *magic_descriptor(magic_t, int);
-const char *magic_buffer(magic_t, const void *, size_t);
-
-const char *magic_error(magic_t);
-int magic_setflags(magic_t, int);
-
-int magic_load(magic_t, const char *);
-int magic_compile(magic_t, const char *);
-int magic_check(magic_t, const char *);
-int magic_errno(magic_t);
-
-#ifdef __cplusplus
+
+#ifndef MAGIC_H
+#define MAGIC_H
+
+#include <sys/param.h>
+#include <sys/tree.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <err.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MAGIC_STRING_SIZE 31
+#define MAGIC_STRENGTH_MULTIPLIER 20
+
+enum magic_type {
+ MAGIC_TYPE_NONE = 0,
+ MAGIC_TYPE_BYTE,
+ MAGIC_TYPE_SHORT,
+ MAGIC_TYPE_LONG,
+ MAGIC_TYPE_QUAD,
+ MAGIC_TYPE_UBYTE,
+ MAGIC_TYPE_USHORT,
+ MAGIC_TYPE_ULONG,
+ MAGIC_TYPE_UQUAD,
+ MAGIC_TYPE_FLOAT,
+ MAGIC_TYPE_DOUBLE,
+ MAGIC_TYPE_STRING,
+ MAGIC_TYPE_PSTRING,
+ MAGIC_TYPE_DATE,
+ MAGIC_TYPE_QDATE,
+ MAGIC_TYPE_LDATE,
+ MAGIC_TYPE_QLDATE,
+ MAGIC_TYPE_UDATE,
+ MAGIC_TYPE_UQDATE,
+ MAGIC_TYPE_ULDATE,
+ MAGIC_TYPE_UQLDATE,
+ MAGIC_TYPE_BESHORT,
+ MAGIC_TYPE_BELONG,
+ MAGIC_TYPE_BEQUAD,
+ MAGIC_TYPE_UBESHORT,
+ MAGIC_TYPE_UBELONG,
+ MAGIC_TYPE_UBEQUAD,
+ MAGIC_TYPE_BEFLOAT,
+ MAGIC_TYPE_BEDOUBLE,
+ MAGIC_TYPE_BEDATE,
+ MAGIC_TYPE_BEQDATE,
+ MAGIC_TYPE_BELDATE,
+ MAGIC_TYPE_BEQLDATE,
+ MAGIC_TYPE_UBEDATE,
+ MAGIC_TYPE_UBEQDATE,
+ MAGIC_TYPE_UBELDATE,
+ MAGIC_TYPE_UBEQLDATE,
+ MAGIC_TYPE_BESTRING16,
+ MAGIC_TYPE_LESHORT,
+ MAGIC_TYPE_LELONG,
+ MAGIC_TYPE_LEQUAD,
+ MAGIC_TYPE_ULESHORT,
+ MAGIC_TYPE_ULELONG,
+ MAGIC_TYPE_ULEQUAD,
+ MAGIC_TYPE_LEFLOAT,
+ MAGIC_TYPE_LEDOUBLE,
+ MAGIC_TYPE_LEDATE,
+ MAGIC_TYPE_LEQDATE,
+ MAGIC_TYPE_LELDATE,
+ MAGIC_TYPE_LEQLDATE,
+ MAGIC_TYPE_ULEDATE,
+ MAGIC_TYPE_ULEQDATE,
+ MAGIC_TYPE_ULELDATE,
+ MAGIC_TYPE_ULEQLDATE,
+ MAGIC_TYPE_LESTRING16,
+ MAGIC_TYPE_MELONG,
+ MAGIC_TYPE_MEDATE,
+ MAGIC_TYPE_MELDATE,
+ MAGIC_TYPE_REGEX,
+ MAGIC_TYPE_SEARCH,
+ MAGIC_TYPE_DEFAULT,
};
-#endif
-#endif /* _MAGIC_H */
+TAILQ_HEAD(magic_lines, magic_line);
+RB_HEAD(magic_tree, magic_line);
+
+struct magic_line {
+ struct magic *root;
+ u_int line;
+ u_int strength;
+ struct magic_line *parent;
+
+ int text;
+
+ int64_t offset;
+ int offset_relative;
+
+ char indirect_type;
+ int indirect_relative;
+ int64_t indirect_offset;
+ char indirect_operator;
+ int64_t indirect_operand;
+
+ enum magic_type type;
+ const char *type_string;
+ char type_operator;
+ int64_t type_operand;
+
+ char test_operator;
+ int test_not;
+ const char *test_string;
+ size_t test_string_size;
+ uint64_t test_unsigned;
+ int64_t test_signed;
+
+ int stringify;
+ const char *result;
+ const char *mimetype;
+
+ struct magic_lines children;
+ TAILQ_ENTRY(magic_line) entry;
+ RB_ENTRY(magic_line) node;
+};
+
+struct magic {
+ const char *path;
+ int warnings;
+
+ struct magic_tree tree;
+
+ int compiled;
+ regex_t format_short;
+ regex_t format_long;
+ regex_t format_quad;
+ regex_t format_float;
+ regex_t format_string;
+};
+
+struct magic_state {
+ char out[4096];
+ const char *mimetype;
+ int text;
+
+ const char *base;
+ size_t size;
+ int64_t offset;
+};
+
+#define MAGIC_TEST_TEXT 0x1
+#define MAGIC_TEST_MIME 0x2
+
+int magic_compare(struct magic_line *, struct magic_line *);
+RB_PROTOTYPE(magic_tree, magic_line, node, magic_compare);
+
+char *magic_strtoull(const char *, uint64_t *);
+char *magic_strtoll(const char *, int64_t *);
+void magic_warn(struct magic_line *, const char *, ...)
+ __attribute__ ((format (printf, 2, 3)));
+
+void magic_dump(struct magic *);
+struct magic *magic_load(FILE *, const char *, int);
+const char *magic_test(struct magic *, const void *, size_t, int);
+
+#endif /* MAGIC_H */
diff --git a/usr.bin/file/names.h b/usr.bin/file/names.h
deleted file mode 100644
index ef5fdf86d98..00000000000
--- a/usr.bin/file/names.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/* $OpenBSD: names.h,v 1.8 2009/04/24 18:54:34 chl Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Names.h - names and types used by ascmagic in file(1).
- * These tokens are here because they can appear anywhere in
- * the first HOWMANY bytes, while tokens in MAGIC must
- * appear at fixed offsets into the file. Don't make HOWMANY
- * too high unless you have a very fast CPU.
- *
- * $Id: names.h,v 1.8 2009/04/24 18:54:34 chl Exp $
- */
-
-/*
- modified by Chris Lowth - 9 April 2000
- to add mime type strings to the types table.
-*/
-
-/* these types are used to index the table 'types': keep em in sync! */
-#define L_C 0 /* first and foremost on UNIX */
-#define L_CC 1 /* Bjarne's postincrement */
-#define L_MAKE 2 /* Makefiles */
-#define L_PLI 3 /* PL/1 */
-#define L_MACH 4 /* some kinda assembler */
-#define L_ENG 5 /* English */
-#define L_PAS 6 /* Pascal */
-#define L_MAIL 7 /* Electronic mail */
-#define L_NEWS 8 /* Usenet Netnews */
-#define L_JAVA 9 /* Java code */
-#define L_HTML 10 /* HTML */
-#define L_BCPL 11 /* BCPL */
-#define L_M4 12 /* M4 */
-#define L_PO 13 /* PO */
-
-static const struct {
- char human[48];
- char mime[16];
-} types[] = {
- { "C program", "text/x-c", },
- { "C++ program", "text/x-c++" },
- { "make commands", "text/x-makefile" },
- { "PL/1 program", "text/x-pl1" },
- { "assembler program", "text/x-asm" },
- { "English", "text/plain" },
- { "Pascal program", "text/x-pascal" },
- { "mail", "text/x-mail" },
- { "news", "text/x-news" },
- { "Java program", "text/x-java" },
- { "HTML document", "text/html", },
- { "BCPL program", "text/x-bcpl" },
- { "M4 macro language pre-processor", "text/x-m4" },
- { "PO (gettext message catalogue)", "text/x-po" },
- { "cannot happen error on names.h/types", "error/x-error" }
-};
-
-/*
- * XXX - how should we distinguish Java from C++?
- * The trick used in a Debian snapshot, of having "extends" or "implements"
- * as tags for Java, doesn't work very well, given that those keywords
- * are often preceded by "class", which flags it as C++.
- *
- * Perhaps we need to be able to say
- *
- * If "class" then
- *
- * if "extends" or "implements" then
- * Java
- * else
- * C++
- * endif
- *
- * Or should we use other keywords, such as "package" or "import"?
- * Unfortunately, Ada95 uses "package", and Modula-3 uses "import",
- * although I infer from the language spec at
- *
- * http://www.research.digital.com/SRC/m3defn/html/m3.html
- *
- * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be
- * in all caps.
- *
- * So, for now, we go with "import". We must put it before the C++
- * stuff, so that we don't misidentify Java as C++. Not using "package"
- * means we won't identify stuff that defines a package but imports
- * nothing; hopefully, very little Java code imports nothing (one of the
- * reasons for doing OO programming is to import as much as possible
- * and write only what you need to, right?).
- *
- * Unfortunately, "import" may cause us to misidentify English text
- * as Java, as it comes after "the" and "The". Perhaps we need a fancier
- * heuristic to identify Java?
- */
-static const struct names {
- char name[14];
- short type;
-} names[] = {
- /* These must be sorted by eye for optimal hit rate */
- /* Add to this list only after substantial meditation */
- {"msgid", L_PO},
- {"dnl", L_M4},
- {"import", L_JAVA},
- {"\"libhdr\"", L_BCPL},
- {"\"LIBHDR\"", L_BCPL},
- {"//", L_CC},
- {"template", L_CC},
- {"virtual", L_CC},
- {"class", L_CC},
- {"public:", L_CC},
- {"private:", L_CC},
- {"/*", L_C}, /* must precede "The", "the", etc. */
- {"#include", L_C},
- {"char", L_C},
- {"The", L_ENG},
- {"the", L_ENG},
- {"double", L_C},
- {"extern", L_C},
- {"float", L_C},
- {"struct", L_C},
- {"union", L_C},
- {"CFLAGS", L_MAKE},
- {"LDFLAGS", L_MAKE},
- {"all:", L_MAKE},
- {".PRECIOUS", L_MAKE},
- {".ascii", L_MACH},
- {".asciiz", L_MACH},
- {".byte", L_MACH},
- {".even", L_MACH},
- {".globl", L_MACH},
- {".text", L_MACH},
- {"clr", L_MACH},
- {"(input,", L_PAS},
- {"program", L_PAS},
- {"record", L_PAS},
- {"dcl", L_PLI},
- {"Received:", L_MAIL},
- {">From", L_MAIL},
- {"Return-Path:",L_MAIL},
- {"Cc:", L_MAIL},
- {"Newsgroups:", L_NEWS},
- {"Path:", L_NEWS},
- {"Organization:",L_NEWS},
- {"href=", L_HTML},
- {"HREF=", L_HTML},
- {"<body", L_HTML},
- {"<BODY", L_HTML},
- {"<html", L_HTML},
- {"<HTML", L_HTML},
- {"<!--", L_HTML},
-};
-#define NNAMES (sizeof(names)/sizeof(struct names))
diff --git a/usr.bin/file/patchlevel.h b/usr.bin/file/patchlevel.h
deleted file mode 100644
index 4e540831195..00000000000
--- a/usr.bin/file/patchlevel.h
+++ /dev/null
@@ -1,348 +0,0 @@
-/* $OpenBSD: patchlevel.h,v 1.9 2009/04/24 18:54:34 chl Exp $ */
-
-#define FILE_VERSION_MAJOR 4
-#define patchlevel 24
-
-/*
- * Patchlevel file for Ian Darwin's MAGIC command.
- * $File: patchlevel.h,v 1.68 2008/03/22 21:39:43 christos Exp $
- *
- * $Log: patchlevel.h,v $
- * Revision 1.9 2009/04/24 18:54:34 chl
- * file update to 4.24
- *
- * The '-i' switch is now enabled so file(1) can output mime type strings.
- *
- * ok ian@
- * builk ports build test on amd64 by jasper@
- *
- * ok ray@ gilles@ on a almost identical diff
- * builk ports build test on sparc64 on this almost identical diff by ajacoutot@
- * also tested by landry@
- *
- * Revision 1.68 2008/03/22 21:39:43 christos
- * file 4.24
- *
- * Revision 1.67 2007/12/28 20:08:40 christos
- * welcome to 4.23.
- *
- * Revision 1.66 2007/12/27 16:38:24 christos
- * welcome to 4.22
- *
- * Revision 1.65 2007/05/24 17:22:27 christos
- * Welcome to 4.21
- *
- * Revision 1.64 2007/03/01 22:14:55 christos
- * welcome to 4.20
- *
- * Revision 1.63 2007/01/12 17:38:28 christos
- * Use File id.
- *
- * Revision 1.62 2006/12/11 21:49:58 christos
- * time for 4.19
- *
- * Revision 1.61 2006/10/31 21:18:09 christos
- * bump
- *
- * Revision 1.60 2006/03/02 22:15:12 christos
- * welcome to 4.17
- *
- * Revision 1.59 2005/10/17 17:15:21 christos
- * welcome to 4.16
- *
- * Revision 1.58 2005/08/18 15:52:56 christos
- * welcome to 4.15
- *
- * Revision 1.57 2005/06/25 15:52:14 christos
- * Welcome to 4.14
- *
- * Revision 1.56 2005/02/09 19:25:13 christos
- * Welcome to 4.13
- *
- * Revision 1.55 2004/11/24 18:57:47 christos
- * Re-do the autoconf stuff once more; passes make dist now.
- *
- * Revision 1.54 2004/11/21 05:52:05 christos
- * ready for 4.11
- *
- * Revision 1.53 2004/07/24 20:40:46 christos
- * welcome to 4.10
- *
- * Revision 1.52 2004/04/07 00:32:25 christos
- * welcome to 4.09
- *
- * Revision 1.51 2004/03/22 21:17:11 christos
- * welcome to 4.08.
- *
- * Revision 1.50 2003/12/23 17:34:04 christos
- * 4.07
- *
- * Revision 1.49 2003/10/15 02:08:27 christos
- * welcome to 4.06
- *
- * Revision 1.48 2003/09/12 19:41:14 christos
- * this is 4.04
- *
- * Revision 1.47 2003/05/23 21:38:21 christos
- * welcome to 4.03
- *
- * Revision 1.46 2003/04/02 18:57:43 christos
- * prepare for 4.02
- *
- * Revision 1.45 2003/03/26 15:37:25 christos
- * - Pass lint
- * - make NULL in magic_file mean stdin
- * - Fix "-" argument to file to pass NULL to magic_file
- * - avoid pointer casts by using memcpy
- * - rename magic_buf -> magic_buffer
- * - keep only the first error
- * - manual page: new sentence, new line
- * - fix typo in api function (magic_buf -> magic_buffer)
- *
- * Revision 1.44 2003/03/23 22:23:31 christos
- * finish librarification.
- *
- * Revision 1.43 2003/03/23 21:16:26 christos
- * update copyrights.
- *
- * Revision 1.42 2003/03/23 04:06:05 christos
- * Library re-organization
- *
- * Revision 1.41 2003/02/27 20:53:45 christos
- * - fix memory allocation problem (Jeff Johnson)
- * - fix stack overflow corruption (David Endler)
- * - fixes from NetBSD source (Antti Kantee)
- * - magic fixes
- *
- * Revision 1.40 2003/02/08 18:33:53 christos
- * - detect inttypes.h too (Dave Love <d.love@dl.ac.uk>)
- * - eliminate unsigned char warnings (Petter Reinholdtsen <pere@hungry.com>)
- * - better elf PT_NOTE handling (Nalin Dahyabhai <nalin@redhat.com>)
- * - add options to format the output differently
- * - much more magic.
- *
- * Revision 1.39 2002/07/03 18:57:52 christos
- * - ansify/c99ize
- * - more magic
- * - better COMPILE_ONLY support.
- * - new magic files.
- * - fix solaris compilation problems.
- *
- * Revision 1.38 2002/05/16 18:45:56 christos
- * - pt_note elf additions from NetBSD
- * - EMX os specific changes (Alexander Mai)
- * - stdint.h detection, acconfig.h fixes (Maciej W. Rozycki, Franz Korntner)
- * - regex file additions (Kim Cromie)
- * - getopt_long support and misc cleanups (Michael Piefel)
- * - many magic fixes and additions
- *
- * Revision 1.37 2001/09/03 14:44:22 christos
- * daylight/tm_isdst detection
- * magic fixes
- * don't eat the whole file if it has only nulls
- *
- * Revision 1.36 2001/07/22 21:04:15 christos
- * - magic fixes
- * - add new operators, pascal strings, UTC date printing, $HOME/.magic
- * [from "Tom N Harris" <telliamed@mac.com>]
- *
- * Revision 1.35 2001/04/24 14:40:25 christos
- * - rename magic file sgi to mips and fix it
- * - add support for building magic.mgc
- * - portability fixes for mmap()
- * - try gzip before uncompress, because uncompress sometimes hangs
- * - be more conservative about pipe reads and writes
- * - many magic fixes
- *
- * Revision 1.34 2001/03/12 05:05:57 christos
- * - new compiled magic format
- * - lots of magic additions
- *
- * Revision 1.33 2000/11/13 00:30:50 christos
- * - wordperfect magic fix: freebsd pr 9388
- * - more msdos fixes from freebsd pr's 20131 and 20812
- * - sas and spss magic [Bruce Foster]
- * - mkinstalldirs [John Fremlin]
- * - sgi opengl fixes [Michael Pruett]
- * - netbsd magic fixes [Ignatios Souvatzis]
- * - audio additions [Michael Pruett]
- * - fix problem with non ansi RCSID [Andreas Ley]
- * - oggs magic [Felix von Leitner]
- * - gmon magic [Eugen Dedu]
- * - TNEF magic [Joomy]
- * - netpbm magic and misc other image stuff [Bryan Henderson]
- *
- * Revision 1.32 2000/08/05 18:24:18 christos
- * Correct indianness detection in elf (Charles Hannum)
- * FreeBSD elf core support (Guy Harris)
- * Use gzip in systems that don't have uncompress (Anthon van der Neut)
- * Internationalization/EBCDIC support (Eric Fisher)
- * Many many magic changes
- *
- * Revision 1.31 2000/05/14 17:58:36 christos
- * - new magic for claris files
- * - new magic for mathematica and maple files
- * - new magic for msvc files
- * - new -k flag to keep going matching all possible entries
- * - add the word executable on #! magic files, and fix the usage of
- * the word script
- * - lots of other magic fixes
- * - fix typo test -> text
- *
- * Revision 1.30 2000/04/11 02:41:17 christos
- * - add support for mime output (-i)
- * - make sure we free memory in case realloc fails
- * - magic fixes
- *
- * Revision 1.29 1999/11/28 20:02:29 christos
- * new string/[Bcb] magic from anthon, and adjustments to the magic files to
- * use it.
- *
- * Revision 1.28 1999/10/31 22:11:48 christos
- * - add "char" type for compatibility with HP/UX
- * - recognize HP/UX syntax &=n etc.
- * - include errno.h for CYGWIN
- * - conditionalize the S_IS* macros
- * - revert the SHT_DYNSYM test that broke the linux stripped binaries test
- * - lots of Magdir changes
- *
- * Revision 1.27 1999/02/14 17:21:41 christos
- * Automake support and misc cleanups from Rainer Orth
- * Enable reading character and block special files from Dale R. Worley
- *
- * Revision 1.26 1998/09/12 13:19:39 christos
- * - add support for bi-endian indirect offsets (Richard Verhoeven)
- * - add recognition for bcpl (Joseph Myers)
- * - remove non magic files from Magdir to avoid difficulties building
- * on os2 where files are case independent
- * - magic fixes.
- *
- * Revision 1.25 1998/06/27 14:04:04 christos
- * OLF patch Guy Harris
- * Recognize java/html (debian linux)
- * Const poisoning (debian linux)
- * More magic!
- *
- * Revision 1.24 1998/02/15 23:20:38 christos
- * Autoconf patch: Felix von Leitner <leitner@math.fu-berlin.de>
- * More magic fixes
- * Elf64 fixes
- *
- * Revision 1.23 1997/11/05 16:03:37 christos
- * - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com]
- * - handle 64 bit time_t's correctly [ewt@redhat.com]
- * - new mime style magic [clarosse@netvista.net]
- * - new TI calculator magic [rmcguire@freenet.columbus.oh.us]
- * - new figlet fonts [obrien@freebsd.org]
- * - new cisco magic, and elf fixes [jhawk@bbnplanet.com]
- * - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com]
- * - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com]
- * - Windows/NT registry files, audio code [guy@netapp.com]
- * - libGrx graphics lib fonts [guy@netapp.com]
- * - PNG fixes [guy@netapp.com]
- * - more m$ document magic [guy@netapp.com]
- * - PPD files [guy@netapp.com]
- * - archive magic cleanup [guy@netapp.com]
- * - linux kernel magic cleanup [guy@netapp.com]
- * - lecter magic [guy@netapp.com]
- * - vgetty magic [guy@netapp.com]
- * - sniffer additions [guy@netapp.com]
- *
- * Revision 1.22 1997/01/15 17:23:24 christos
- * - add support for elf core files: find the program name under SVR4 [Ken Pizzini]
- * - print strings only up to the first carriage return [various]
- * - freebsd international ascii support [J Wunsch]
- * - magic fixes and additions [Guy Harris]
- * - 64 bit fixes [Larry Schwimmer]
- * - support for both utime and utimes, but don't restore file access times
- * by default [various]
- * - \xXX only takes 2 hex digits, not 3.
- * - re-implement support for core files [Guy Harris]
- *
- * Revision 1.21 1996/10/05 18:15:29 christos
- * Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF
- * More magic fixes
- *
- * Revision 1.20 1996/06/22 22:15:52 christos
- * - support relative offsets of the form >&
- * - fix bug with truncating magic strings that contain \n
- * - file -f - did not read from stdin as documented
- * - support elf file parsing using our own elf support.
- * - as always magdir fixes and additions.
- *
- * Revision 1.19 1995/10/27 23:14:46 christos
- * Ability to parse colon separated list of magic files
- * New LEGAL.NOTICE
- * Various magic file changes
- *
- * Revision 1.18 1995/05/20 22:09:21 christos
- * Passed incorrect argument to eatsize().
- * Use %ld and %lx where appropriate.
- * Remove unused variables
- * ELF support for both big and little endian
- * Fixes for small files again.
- *
- * Revision 1.17 1995/04/28 17:29:13 christos
- * - Incorrect nroff detection fix from der Mouse
- * - Lost and incorrect magic entries.
- * - Added ELF stripped binary detection [in C; ugh]
- * - Look for $MAGIC to find the magic file.
- * - Eat trailing size specifications from numbers i.e. ignore 10L
- * - More fixes for very short files
- *
- * Revision 1.16 1995/03/25 22:06:45 christos
- * - use strtoul() where it exists.
- * - fix sign-extend bug
- * - try to detect tar archives before nroff files, otherwise
- * tar files where the first file starts with a . will not work
- *
- * Revision 1.15 1995/01/21 21:03:35 christos
- * Added CSECTION for the file man page
- * Added version flag -v
- * Fixed bug with -f input flag (from iorio@violet.berkeley.edu)
- * Lots of magic fixes and reorganization...
- *
- * Revision 1.14 1994/05/03 17:58:23 christos
- * changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned
- *
- * Revision 1.13 1994/01/21 01:27:01 christos
- * Fixed null termination bug from Don Seeley at BSDI in ascmagic.c
- *
- * Revision 1.12 1993/10/27 20:59:05 christos
- * Changed -z flag to understand gzip format too.
- * Moved builtin compression detection to a table, and move
- * the compress magic entry out of the source.
- * Made printing of numbers unsigned, and added the mask to it.
- * Changed the buffer size to 8k, because gzip will refuse to
- * unzip just a few bytes.
- *
- * Revision 1.11 1993/09/24 18:49:06 christos
- * Fixed small bug in softmagic.c introduced by
- * copying the data to be examined out of the input
- * buffer. Changed the Makefile to use sed to create
- * the correct man pages.
- *
- * Revision 1.10 1993/09/23 21:56:23 christos
- * Passed purify. Fixed indirections. Fixed byte order printing.
- * Fixed segmentation faults caused by referencing past the end
- * of the magic buffer. Fixed bus errors caused by referencing
- * unaligned shorts or longs.
- *
- * Revision 1.9 1993/03/24 14:23:40 ian
- * Batch of minor changes from several contributors.
- *
- * Revision 1.8 93/02/19 15:01:26 ian
- * Numerous changes from Guy Harris too numerous to mention but including
- * byte-order independance, fixing "old-style masking", etc. etc. A bugfix
- * for broken symlinks from martin@@d255s004.zfe.siemens.de.
- *
- * Revision 1.7 93/01/05 14:57:27 ian
- * Couple of nits picked by Christos (again, thanks).
- *
- * Revision 1.6 93/01/05 13:51:09 ian
- * Lotsa work on the Magic directory.
- *
- * Revision 1.5 92/09/14 14:54:51 ian
- * Fix a tiny null-pointer bug in previous fix for tar archive + uncompress.
- *
- */
diff --git a/usr.bin/file/print.c b/usr.bin/file/print.c
deleted file mode 100644
index 8b5b5d63b90..00000000000
--- a/usr.bin/file/print.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/* $OpenBSD: print.c,v 1.17 2013/04/17 15:01:26 deraadt Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * print.c - debugging printout routines
- */
-
-#include "file.h"
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <time.h>
-
-#define SZOF(a) (sizeof(a) / sizeof(a[0]))
-
-#ifndef COMPILE_ONLY
-protected void
-file_mdump(struct magic *m)
-{
- private const char optyp[] = { FILE_OPS };
-
- (void) fprintf(stderr, "[%u", m->lineno);
- (void) fprintf(stderr, ">>>>>>>> %u" + 8 - (m->cont_level & 7),
- m->offset);
-
- if (m->flag & INDIR) {
- (void) fprintf(stderr, "(%s,",
- /* Note: type is unsigned */
- (m->in_type < file_nnames) ?
- file_names[m->in_type] : "*bad*");
- if (m->in_op & FILE_OPINVERSE)
- (void) fputc('~', stderr);
- (void) fprintf(stderr, "%c%u),",
- ((m->in_op & FILE_OPS_MASK) < SZOF(optyp)) ?
- optyp[m->in_op & FILE_OPS_MASK] : '?',
- m->in_offset);
- }
- (void) fprintf(stderr, " %s%s", (m->flag & UNSIGNED) ? "u" : "",
- /* Note: type is unsigned */
- (m->type < file_nnames) ? file_names[m->type] : "*bad*");
- if (m->mask_op & FILE_OPINVERSE)
- (void) fputc('~', stderr);
-
- if (IS_STRING(m->type)) {
- if (m->str_flags) {
- (void) fputc('/', stderr);
- if (m->str_flags & STRING_COMPACT_BLANK)
- (void) fputc(CHAR_COMPACT_BLANK, stderr);
- if (m->str_flags & STRING_COMPACT_OPTIONAL_BLANK)
- (void) fputc(CHAR_COMPACT_OPTIONAL_BLANK,
- stderr);
- if (m->str_flags & STRING_IGNORE_LOWERCASE)
- (void) fputc(CHAR_IGNORE_LOWERCASE, stderr);
- if (m->str_flags & STRING_IGNORE_UPPERCASE)
- (void) fputc(CHAR_IGNORE_UPPERCASE, stderr);
- if (m->str_flags & REGEX_OFFSET_START)
- (void) fputc(CHAR_REGEX_OFFSET_START, stderr);
- }
- if (m->str_range)
- (void) fprintf(stderr, "/%u", m->str_range);
- }
- else {
- if ((m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
- (void) fputc(optyp[m->mask_op & FILE_OPS_MASK], stderr);
- else
- (void) fputc('?', stderr);
-
- if (m->num_mask) {
- (void) fprintf(stderr, "%.8llx",
- (unsigned long long)m->num_mask);
- }
- }
- (void) fprintf(stderr, ",%c", m->reln);
-
- if (m->reln != 'x') {
- switch (m->type) {
- case FILE_BYTE:
- case FILE_SHORT:
- case FILE_LONG:
- case FILE_LESHORT:
- case FILE_LELONG:
- case FILE_MELONG:
- case FILE_BESHORT:
- case FILE_BELONG:
- (void) fprintf(stderr, "%d", m->value.l);
- break;
- case FILE_BEQUAD:
- case FILE_LEQUAD:
- case FILE_QUAD:
- (void) fprintf(stderr, "%lld",
- (unsigned long long)m->value.q);
- break;
- case FILE_PSTRING:
- case FILE_STRING:
- case FILE_REGEX:
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- case FILE_SEARCH:
- file_showstr(stderr, m->value.s, (size_t)m->vallen);
- break;
- case FILE_DATE:
- case FILE_LEDATE:
- case FILE_BEDATE:
- case FILE_MEDATE:
- (void)fprintf(stderr, "%s,",
- file_fmttime(m->value.l, 1));
- break;
- case FILE_LDATE:
- case FILE_LELDATE:
- case FILE_BELDATE:
- case FILE_MELDATE:
- (void)fprintf(stderr, "%s,",
- file_fmttime(m->value.l, 0));
- break;
- case FILE_QDATE:
- case FILE_LEQDATE:
- case FILE_BEQDATE:
- (void)fprintf(stderr, "%s,",
- file_fmttime(m->value.q, 1));
- break;
- case FILE_QLDATE:
- case FILE_LEQLDATE:
- case FILE_BEQLDATE:
- (void)fprintf(stderr, "%s,",
- file_fmttime(m->value.q, 0));
- break;
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- (void) fprintf(stderr, "%G", m->value.f);
- break;
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- (void) fprintf(stderr, "%G", m->value.d);
- break;
- case FILE_DEFAULT:
- /* XXX - do anything here? */
- break;
- default:
- (void) fputs("*bad*", stderr);
- break;
- }
- }
- (void) fprintf(stderr, ",\"%s\"]\n", m->desc);
-}
-#endif
-
-/*VARARGS*/
-protected void
-file_magwarn(struct magic_set *ms, const char *f, ...)
-{
- va_list va;
-
- /* cuz we use stdout for most, stderr here */
- (void) fflush(stdout);
-
- if (ms->file)
- (void) fprintf(stderr, "%s, %lu: ", ms->file,
- (unsigned long)ms->line);
- (void) fprintf(stderr, "Warning: ");
- va_start(va, f);
- (void) vfprintf(stderr, f, va);
- va_end(va);
- (void) fputc('\n', stderr);
-}
-
-protected const char *
-file_fmttime(uint64_t v, int local)
-{
- char *pp;
- time_t t = (time_t)v;
- struct tm *tm;
-
- if (local) {
- pp = ctime(&t);
- } else {
-#ifndef HAVE_DAYLIGHT
- private int daylight = 0;
-#ifdef HAVE_TM_ISDST
- private time_t now = (time_t)0;
-
- if (now == (time_t)0) {
- struct tm *tm1;
- (void)time(&now);
- tm1 = localtime(&now);
- if (tm1 == NULL)
- return "*Invalid time*";
- daylight = tm1->tm_isdst;
- }
-#endif /* HAVE_TM_ISDST */
-#endif /* HAVE_DAYLIGHT */
- if (daylight)
- t += 3600;
- tm = gmtime(&t);
- if (tm == NULL)
- return "*Invalid time*";
- pp = asctime(tm);
- }
-
- pp[strcspn(pp, "\n")] = '\0';
- return pp;
-}
diff --git a/usr.bin/file/readelf.c b/usr.bin/file/readelf.c
deleted file mode 100644
index f328873f73e..00000000000
--- a/usr.bin/file/readelf.c
+++ /dev/null
@@ -1,1020 +0,0 @@
-/* $OpenBSD: readelf.c,v 1.12 2014/11/04 16:18:54 deraadt Exp $ */
-/*
- * Copyright (c) Christos Zoulas 2003.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include "file.h"
-
-#ifdef BUILTIN_ELF
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "readelf.h"
-#include "magic.h"
-
-#ifdef ELFCORE
-private int dophn_core(struct magic_set *, int, int, int, off_t, int, size_t,
- off_t, int *);
-#endif
-private int dophn_exec(struct magic_set *, int, int, int, off_t, int, size_t,
- off_t, int *);
-private int doshn(struct magic_set *, int, int, int, off_t, int, size_t, int *);
-private size_t donote(struct magic_set *, unsigned char *, size_t, size_t, int,
- int, size_t, int *);
-
-#define ELF_ALIGN(a) ((((a) + align - 1) / align) * align)
-
-#define isquote(c) (strchr("'\"`", (c)) != NULL)
-
-private uint16_t getu16(int, uint16_t);
-private uint32_t getu32(int, uint32_t);
-private uint64_t getu64(int, uint64_t);
-
-private uint16_t
-getu16(int swap, uint16_t value)
-{
- union {
- uint16_t ui;
- char c[2];
- } retval, tmpval;
-
- if (swap) {
- tmpval.ui = value;
-
- retval.c[0] = tmpval.c[1];
- retval.c[1] = tmpval.c[0];
-
- return retval.ui;
- } else
- return value;
-}
-
-private uint32_t
-getu32(int swap, uint32_t value)
-{
- union {
- uint32_t ui;
- char c[4];
- } retval, tmpval;
-
- if (swap) {
- tmpval.ui = value;
-
- retval.c[0] = tmpval.c[3];
- retval.c[1] = tmpval.c[2];
- retval.c[2] = tmpval.c[1];
- retval.c[3] = tmpval.c[0];
-
- return retval.ui;
- } else
- return value;
-}
-
-private uint64_t
-getu64(int swap, uint64_t value)
-{
- union {
- uint64_t ui;
- char c[8];
- } retval, tmpval;
-
- if (swap) {
- tmpval.ui = value;
-
- retval.c[0] = tmpval.c[7];
- retval.c[1] = tmpval.c[6];
- retval.c[2] = tmpval.c[5];
- retval.c[3] = tmpval.c[4];
- retval.c[4] = tmpval.c[3];
- retval.c[5] = tmpval.c[2];
- retval.c[6] = tmpval.c[1];
- retval.c[7] = tmpval.c[0];
-
- return retval.ui;
- } else
- return value;
-}
-
-#define elf_getu16(swap, value) getu16(swap, value)
-#define elf_getu32(swap, value) getu32(swap, value)
-#ifdef USE_ARRAY_FOR_64BIT_TYPES
-# define elf_getu64(swap, array) \
- ((swap ? ((uint64_t)elf_getu32(swap, array[0])) << 32 : elf_getu32(swap, array[0])) + \
- (swap ? elf_getu32(swap, array[1]) : ((uint64_t)elf_getu32(swap, array[1]) << 32)))
-#else
-# define elf_getu64(swap, value) getu64(swap, value)
-#endif
-
-#define xsh_addr (class == ELFCLASS32 \
- ? (void *) &sh32 \
- : (void *) &sh64)
-#define xsh_sizeof (class == ELFCLASS32 \
- ? sizeof sh32 \
- : sizeof sh64)
-#define xsh_size (class == ELFCLASS32 \
- ? elf_getu32(swap, sh32.sh_size) \
- : elf_getu64(swap, sh64.sh_size))
-#define xsh_offset (class == ELFCLASS32 \
- ? elf_getu32(swap, sh32.sh_offset) \
- : elf_getu64(swap, sh64.sh_offset))
-#define xsh_type (class == ELFCLASS32 \
- ? elf_getu32(swap, sh32.sh_type) \
- : elf_getu32(swap, sh64.sh_type))
-#define xph_addr (class == ELFCLASS32 \
- ? (void *) &ph32 \
- : (void *) &ph64)
-#define xph_sizeof (class == ELFCLASS32 \
- ? sizeof ph32 \
- : sizeof ph64)
-#define xph_type (class == ELFCLASS32 \
- ? elf_getu32(swap, ph32.p_type) \
- : elf_getu32(swap, ph64.p_type))
-#define xph_offset (off_t)(class == ELFCLASS32 \
- ? elf_getu32(swap, ph32.p_offset) \
- : elf_getu64(swap, ph64.p_offset))
-#define xph_align (size_t)((class == ELFCLASS32 \
- ? (off_t) (ph32.p_align ? \
- elf_getu32(swap, ph32.p_align) : 4) \
- : (off_t) (ph64.p_align ? \
- elf_getu64(swap, ph64.p_align) : 4)))
-#define xph_filesz (size_t)((class == ELFCLASS32 \
- ? elf_getu32(swap, ph32.p_filesz) \
- : elf_getu64(swap, ph64.p_filesz)))
-#define xnh_addr (class == ELFCLASS32 \
- ? (void *) &nh32 \
- : (void *) &nh64)
-#define xph_memsz (size_t)((class == ELFCLASS32 \
- ? elf_getu32(swap, ph32.p_memsz) \
- : elf_getu64(swap, ph64.p_memsz)))
-#define xnh_sizeof (class == ELFCLASS32 \
- ? sizeof nh32 \
- : sizeof nh64)
-#define xnh_type (class == ELFCLASS32 \
- ? elf_getu32(swap, nh32.n_type) \
- : elf_getu32(swap, nh64.n_type))
-#define xnh_namesz (class == ELFCLASS32 \
- ? elf_getu32(swap, nh32.n_namesz) \
- : elf_getu32(swap, nh64.n_namesz))
-#define xnh_descsz (class == ELFCLASS32 \
- ? elf_getu32(swap, nh32.n_descsz) \
- : elf_getu32(swap, nh64.n_descsz))
-#define prpsoffsets(i) (class == ELFCLASS32 \
- ? prpsoffsets32[i] \
- : prpsoffsets64[i])
-
-#ifdef ELFCORE
-/*
- * Try larger offsets first to avoid false matches
- * from earlier data that happen to look like strings.
- */
-static const size_t prpsoffsets32[] = {
-#ifdef USE_NT_PSINFO
- 104, /* SunOS 5.x (command line) */
- 88, /* SunOS 5.x (short name) */
-#endif /* USE_NT_PSINFO */
-
- 100, /* SunOS 5.x (command line) */
- 84, /* SunOS 5.x (short name) */
-
- 44, /* Linux (command line) */
- 28, /* Linux 2.0.36 (short name) */
-
- 8, /* FreeBSD */
-};
-
-static const size_t prpsoffsets64[] = {
-#ifdef USE_NT_PSINFO
- 152, /* SunOS 5.x (command line) */
- 136, /* SunOS 5.x (short name) */
-#endif /* USE_NT_PSINFO */
-
- 136, /* SunOS 5.x, 64-bit (command line) */
- 120, /* SunOS 5.x, 64-bit (short name) */
-
- 56, /* Linux (command line) */
- 40, /* Linux (tested on core from 2.4.x, short name) */
-
- 16, /* FreeBSD, 64-bit */
-};
-
-#define NOFFSETS32 (sizeof prpsoffsets32 / sizeof prpsoffsets32[0])
-#define NOFFSETS64 (sizeof prpsoffsets64 / sizeof prpsoffsets64[0])
-
-#define NOFFSETS (class == ELFCLASS32 ? NOFFSETS32 : NOFFSETS64)
-
-/*
- * Look through the program headers of an executable image, searching
- * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE" or
- * "FreeBSD"; if one is found, try looking in various places in its
- * contents for a 16-character string containing only printable
- * characters - if found, that string should be the name of the program
- * that dropped core. Note: right after that 16-character string is,
- * at least in SunOS 5.x (and possibly other SVR4-flavored systems) and
- * Linux, a longer string (80 characters, in 5.x, probably other
- * SVR4-flavored systems, and Linux) containing the start of the
- * command line for that program.
- *
- * SunOS 5.x core files contain two PT_NOTE sections, with the types
- * NT_PRPSINFO (old) and NT_PSINFO (new). These structs contain the
- * same info about the command name and command line, so it probably
- * isn't worthwhile to look for NT_PSINFO, but the offsets are provided
- * above (see USE_NT_PSINFO), in case we ever decide to do so. The
- * NT_PRPSINFO and NT_PSINFO sections are always in order and adjacent;
- * the SunOS 5.x file command relies on this (and prefers the latter).
- *
- * The signal number probably appears in a section of type NT_PRSTATUS,
- * but that's also rather OS-dependent, in ways that are harder to
- * dissect with heuristics, so I'm not bothering with the signal number.
- * (I suppose the signal number could be of interest in situations where
- * you don't have the binary of the program that dropped core; if you
- * *do* have that binary, the debugger will probably tell you what
- * signal it was.)
- */
-
-#define OS_STYLE_SVR4 0
-#define OS_STYLE_FREEBSD 1
-#define OS_STYLE_NETBSD 2
-
-private const char os_style_names[][8] = {
- "SVR4",
- "FreeBSD",
- "NetBSD",
-};
-
-#define FLAGS_DID_CORE 1
-#define FLAGS_DID_NOTE 2
-#define FLAGS_DID_CORE_STYLE 4
-
-private int
-dophn_core(struct magic_set *ms, int class, int swap, int fd, off_t off,
- int num, size_t size, off_t fsize, int *flags)
-{
- Elf32_Phdr ph32;
- Elf64_Phdr ph64;
- size_t offset;
- unsigned char nbuf[BUFSIZ];
- ssize_t bufsize;
- off_t savedoffset;
- struct stat st;
-
- if (fstat(fd, &st) < 0) {
- file_badread(ms);
- return -1;
- }
-
- if (size != xph_sizeof) {
- if (file_printf(ms, ", corrupted program header size") == -1)
- return -1;
- return 0;
- }
-
- /*
- * Loop through all the program headers.
- */
- for ( ; num; num--) {
- if ((savedoffset = lseek(fd, off, SEEK_SET)) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- if (read(fd, xph_addr, xph_sizeof) == -1) {
- file_badread(ms);
- return -1;
- }
- if (xph_offset > fsize) {
- if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- continue;
- }
-
- off += size;
- if (xph_type != PT_NOTE)
- continue;
-
- /*
- * This is a PT_NOTE section; loop through all the notes
- * in the section.
- */
- if (lseek(fd, xph_offset, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- bufsize = read(fd, nbuf,
- ((xph_filesz < sizeof(nbuf)) ? xph_filesz : sizeof(nbuf)));
- if (bufsize == -1) {
- file_badread(ms);
- return -1;
- }
- offset = 0;
- for (;;) {
- if (offset >= (size_t)bufsize)
- break;
- offset = donote(ms, nbuf, offset, (size_t)bufsize,
- class, swap, 4, flags);
- if (offset == 0)
- break;
-
- }
- }
- return 0;
-}
-#endif
-
-private size_t
-donote(struct magic_set *ms, unsigned char *nbuf, size_t offset, size_t size,
- int class, int swap, size_t align, int *flags)
-{
- Elf32_Nhdr nh32;
- Elf64_Nhdr nh64;
- size_t noff, doff;
-#ifdef ELFCORE
- int os_style = -1;
-#endif
- uint32_t namesz, descsz;
-
- if (xnh_sizeof + offset > size) {
- /*
- * We're out of note headers.
- */
- return xnh_sizeof + offset;
- }
-
- (void)memcpy(xnh_addr, &nbuf[offset], xnh_sizeof);
- offset += xnh_sizeof;
-
- namesz = xnh_namesz;
- descsz = xnh_descsz;
- if ((namesz == 0) && (descsz == 0)) {
- /*
- * We're out of note headers.
- */
- return (offset >= size) ? offset : size;
- }
-
- if (namesz & 0x80000000) {
- (void)file_printf(ms, ", bad note name size 0x%lx",
- (unsigned long)namesz);
- return offset;
- }
-
- if (descsz & 0x80000000) {
- (void)file_printf(ms, ", bad note description size 0x%lx",
- (unsigned long)descsz);
- return offset;
- }
-
-
- noff = offset;
- doff = ELF_ALIGN(offset + namesz);
-
- if (offset + namesz > size) {
- /*
- * We're past the end of the buffer.
- */
- return doff;
- }
-
- offset = ELF_ALIGN(doff + descsz);
- if (doff + descsz > size) {
- /*
- * We're past the end of the buffer.
- */
- return (offset >= size) ? offset : size;
- }
-
- if (*flags & FLAGS_DID_NOTE)
- goto core;
-
- if (namesz == 4 && strcmp((char *)&nbuf[noff], "GNU") == 0 &&
- xnh_type == NT_GNU_VERSION && descsz == 16) {
- uint32_t desc[4];
- (void)memcpy(desc, &nbuf[doff], sizeof(desc));
-
- if (file_printf(ms, ", for GNU/") == -1)
- return size;
- switch (elf_getu32(swap, desc[0])) {
- case GNU_OS_LINUX:
- if (file_printf(ms, "Linux") == -1)
- return size;
- break;
- case GNU_OS_HURD:
- if (file_printf(ms, "Hurd") == -1)
- return size;
- break;
- case GNU_OS_SOLARIS:
- if (file_printf(ms, "Solaris") == -1)
- return size;
- break;
- case GNU_OS_KFREEBSD:
- if (file_printf(ms, "kFreeBSD") == -1)
- return size;
- break;
- case GNU_OS_KNETBSD:
- if (file_printf(ms, "kNetBSD") == -1)
- return size;
- break;
- default:
- if (file_printf(ms, "<unknown>") == -1)
- return size;
- }
- if (file_printf(ms, " %d.%d.%d", elf_getu32(swap, desc[1]),
- elf_getu32(swap, desc[2]), elf_getu32(swap, desc[3])) == -1)
- return size;
- *flags |= FLAGS_DID_NOTE;
- return size;
- }
-
- if (namesz == 7 && strcmp((char *)&nbuf[noff], "NetBSD") == 0 &&
- xnh_type == NT_NETBSD_VERSION && descsz == 4) {
- uint32_t desc;
- (void)memcpy(&desc, &nbuf[doff], sizeof(desc));
- desc = elf_getu32(swap, desc);
-
- if (file_printf(ms, ", for NetBSD") == -1)
- return size;
- /*
- * The version number used to be stuck as 199905, and was thus
- * basically content-free. Newer versions of NetBSD have fixed
- * this and now use the encoding of __NetBSD_Version__:
- *
- * MMmmrrpp00
- *
- * M = major version
- * m = minor version
- * r = release ["",A-Z,Z[A-Z] but numeric]
- * p = patchlevel
- */
- if (desc > 100000000U) {
- uint32_t ver_patch = (desc / 100) % 100;
- uint32_t ver_rel = (desc / 10000) % 100;
- uint32_t ver_min = (desc / 1000000) % 100;
- uint32_t ver_maj = desc / 100000000;
-
- if (file_printf(ms, " %u.%u", ver_maj, ver_min) == -1)
- return size;
- if (ver_rel == 0 && ver_patch != 0) {
- if (file_printf(ms, ".%u", ver_patch) == -1)
- return size;
- } else if (ver_rel != 0) {
- while (ver_rel > 26) {
- if (file_printf(ms, "Z") == -1)
- return size;
- ver_rel -= 26;
- }
- if (file_printf(ms, "%c", 'A' + ver_rel - 1)
- == -1)
- return size;
- }
- }
- *flags |= FLAGS_DID_NOTE;
- return size;
- }
-
- if (namesz == 8 && strcmp((char *)&nbuf[noff], "FreeBSD") == 0 &&
- xnh_type == NT_FREEBSD_VERSION && descsz == 4) {
- uint32_t desc;
- (void)memcpy(&desc, &nbuf[doff], sizeof(desc));
- desc = elf_getu32(swap, desc);
- if (file_printf(ms, ", for FreeBSD") == -1)
- return size;
-
- /*
- * Contents is __FreeBSD_version, whose relation to OS
- * versions is defined by a huge table in the Porter's
- * Handbook. This is the general scheme:
- *
- * Releases:
- * Mmp000 (before 4.10)
- * Mmi0p0 (before 5.0)
- * Mmm0p0
- *
- * Development branches:
- * Mmpxxx (before 4.6)
- * Mmp1xx (before 4.10)
- * Mmi1xx (before 5.0)
- * M000xx (pre-M.0)
- * Mmm1xx
- *
- * M = major version
- * m = minor version
- * i = minor version increment (491000 -> 4.10)
- * p = patchlevel
- * x = revision
- *
- * The first release of FreeBSD to use ELF by default
- * was version 3.0.
- */
- if (desc == 460002) {
- if (file_printf(ms, " 4.6.2") == -1)
- return size;
- } else if (desc < 460100) {
- if (file_printf(ms, " %d.%d", desc / 100000,
- desc / 10000 % 10) == -1)
- return size;
- if (desc / 1000 % 10 > 0)
- if (file_printf(ms, ".%d", desc / 1000 % 10)
- == -1)
- return size;
- if ((desc % 1000 > 0) || (desc % 100000 == 0))
- if (file_printf(ms, " (%d)", desc) == -1)
- return size;
- } else if (desc < 500000) {
- if (file_printf(ms, " %d.%d", desc / 100000,
- desc / 10000 % 10 + desc / 1000 % 10) == -1)
- return size;
- if (desc / 100 % 10 > 0) {
- if (file_printf(ms, " (%d)", desc) == -1)
- return size;
- } else if (desc / 10 % 10 > 0) {
- if (file_printf(ms, ".%d", desc / 10 % 10)
- == -1)
- return size;
- }
- } else {
- if (file_printf(ms, " %d.%d", desc / 100000,
- desc / 1000 % 100) == -1)
- return size;
- if ((desc / 100 % 10 > 0) ||
- (desc % 100000 / 100 == 0)) {
- if (file_printf(ms, " (%d)", desc) == -1)
- return size;
- } else if (desc / 10 % 10 > 0) {
- if (file_printf(ms, ".%d", desc / 10 % 10)
- == -1)
- return size;
- }
- }
- *flags |= FLAGS_DID_NOTE;
- return size;
- }
-
- if (namesz == 8 && strcmp((char *)&nbuf[noff], "OpenBSD") == 0 &&
- xnh_type == NT_OPENBSD_VERSION && descsz == 4) {
- if (file_printf(ms, ", for OpenBSD") == -1)
- return size;
- /* Content of note is always 0 */
- *flags |= FLAGS_DID_NOTE;
- return size;
- }
-
- if (namesz == 10 && strcmp((char *)&nbuf[noff], "DragonFly") == 0 &&
- xnh_type == NT_DRAGONFLY_VERSION && descsz == 4) {
- uint32_t desc;
- if (file_printf(ms, ", for DragonFly") == -1)
- return size;
- (void)memcpy(&desc, &nbuf[doff], sizeof(desc));
- desc = elf_getu32(swap, desc);
- if (file_printf(ms, " %d.%d.%d", desc / 100000,
- desc / 10000 % 10, desc % 10000) == -1)
- return size;
- *flags |= FLAGS_DID_NOTE;
- return size;
- }
-
-core:
- /*
- * Sigh. The 2.0.36 kernel in Debian 2.1, at
- * least, doesn't correctly implement name
- * sections, in core dumps, as specified by
- * the "Program Linking" section of "UNIX(R) System
- * V Release 4 Programmer's Guide: ANSI C and
- * Programming Support Tools", because my copy
- * clearly says "The first 'namesz' bytes in 'name'
- * contain a *null-terminated* [emphasis mine]
- * character representation of the entry's owner
- * or originator", but the 2.0.36 kernel code
- * doesn't include the terminating null in the
- * name....
- */
- if ((namesz == 4 && strncmp((char *)&nbuf[noff], "CORE", 4) == 0) ||
- (namesz == 5 && strcmp((char *)&nbuf[noff], "CORE") == 0)) {
- os_style = OS_STYLE_SVR4;
- }
-
- if ((namesz == 8 && strcmp((char *)&nbuf[noff], "FreeBSD") == 0)) {
- os_style = OS_STYLE_FREEBSD;
- }
-
- if ((namesz >= 11 && strncmp((char *)&nbuf[noff], "NetBSD-CORE", 11)
- == 0)) {
- os_style = OS_STYLE_NETBSD;
- }
-
-#ifdef ELFCORE
- if ((*flags & FLAGS_DID_CORE) != 0)
- return size;
-
- if (os_style != -1 && (*flags & FLAGS_DID_CORE_STYLE) == 0) {
- if (file_printf(ms, ", %s-style", os_style_names[os_style])
- == -1)
- return size;
- *flags |= FLAGS_DID_CORE_STYLE;
- }
-
- switch (os_style) {
- case OS_STYLE_NETBSD:
- if (xnh_type == NT_NETBSD_CORE_PROCINFO) {
- uint32_t signo;
- /*
- * Extract the program name. It is at
- * offset 0x7c, and is up to 32-bytes,
- * including the terminating NUL.
- */
- if (file_printf(ms, ", from '%.31s'",
- &nbuf[doff + 0x7c]) == -1)
- return size;
-
- /*
- * Extract the signal number. It is at
- * offset 0x08.
- */
- (void)memcpy(&signo, &nbuf[doff + 0x08],
- sizeof(signo));
- if (file_printf(ms, " (signal %u)",
- elf_getu32(swap, signo)) == -1)
- return size;
- *flags |= FLAGS_DID_CORE;
- return size;
- }
- break;
-
- default:
- if (xnh_type == NT_PRPSINFO) {
- size_t i, j;
- unsigned char c;
- /*
- * Extract the program name. We assume
- * it to be 16 characters (that's what it
- * is in SunOS 5.x and Linux).
- *
- * Unfortunately, it's at a different offset
- * in various OSes, so try multiple offsets.
- * If the characters aren't all printable,
- * reject it.
- */
- for (i = 0; i < NOFFSETS; i++) {
- unsigned char *cname, *cp;
- size_t reloffset = prpsoffsets(i);
- size_t noffset = doff + reloffset;
- for (j = 0; j < 16; j++, noffset++,
- reloffset++) {
- /*
- * Make sure we're not past
- * the end of the buffer; if
- * we are, just give up.
- */
- if (noffset >= size)
- goto tryanother;
-
- /*
- * Make sure we're not past
- * the end of the contents;
- * if we are, this obviously
- * isn't the right offset.
- */
- if (reloffset >= descsz)
- goto tryanother;
-
- c = nbuf[noffset];
- if (c == '\0') {
- /*
- * A '\0' at the
- * beginning is
- * obviously wrong.
- * Any other '\0'
- * means we're done.
- */
- if (j == 0)
- goto tryanother;
- else
- break;
- } else {
- /*
- * A nonprintable
- * character is also
- * wrong.
- */
- if (!isprint(c) || isquote(c))
- goto tryanother;
- }
- }
- /*
- * Well, that worked.
- */
- cname = (unsigned char *)
- &nbuf[doff + prpsoffsets(i)];
- for (cp = cname; *cp && isprint(*cp); cp++)
- continue;
- /*
- * Linux apparently appends a space at the end
- * of the command line: remove it.
- */
- while (cp > cname && isspace(cp[-1]))
- cp--;
- if (file_printf(ms, ", from '%.*s'",
- (int)(cp - cname), cname) == -1)
- return size;
- *flags |= FLAGS_DID_CORE;
- return size;
-
- tryanother:
- ;
- }
- }
- break;
- }
-#endif
- return offset;
-}
-
-private int
-doshn(struct magic_set *ms, int class, int swap, int fd, off_t off, int num,
- size_t size, int *flags)
-{
- Elf32_Shdr sh32;
- Elf64_Shdr sh64;
- int stripped = 1;
- void *nbuf;
- off_t noff;
-
- if (size != xsh_sizeof) {
- if (file_printf(ms, ", corrupted section header size") == -1)
- return -1;
- return 0;
- }
-
- if (lseek(fd, off, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
-
- for ( ; num; num--) {
- if (read(fd, xsh_addr, xsh_sizeof) == -1) {
- file_badread(ms);
- return -1;
- }
- switch (xsh_type) {
- case SHT_SYMTAB:
-#if 0
- case SHT_DYNSYM:
-#endif
- stripped = 0;
- break;
- case SHT_NOTE:
- if ((off = lseek(fd, (off_t)0, SEEK_CUR)) ==
- (off_t)-1) {
- file_badread(ms);
- return -1;
- }
- if ((nbuf = malloc((size_t)xsh_size)) == NULL) {
- file_error(ms, errno, "Cannot allocate memory"
- " for note");
- return -1;
- }
- if ((noff = lseek(fd, (off_t)xsh_offset, SEEK_SET)) ==
- (off_t)-1) {
- file_badread(ms);
- free(nbuf);
- return -1;
- }
- if (read(fd, nbuf, (size_t)xsh_size) !=
- (ssize_t)xsh_size) {
- free(nbuf);
- file_badread(ms);
- return -1;
- }
-
- noff = 0;
- for (;;) {
- if (noff >= (size_t)xsh_size)
- break;
- noff = donote(ms, nbuf, (size_t)noff,
- (size_t)xsh_size, class, swap, 4,
- flags);
- if (noff == 0)
- break;
- }
- if ((lseek(fd, off, SEEK_SET)) == (off_t)-1) {
- free(nbuf);
- file_badread(ms);
- return -1;
- }
- free(nbuf);
- break;
- }
- }
- if (file_printf(ms, ", %sstripped", stripped ? "" : "not ") == -1)
- return -1;
- return 0;
-}
-
-/*
- * Look through the program headers of an executable image, searching
- * for a PT_INTERP section; if one is found, it's dynamically linked,
- * otherwise it's statically linked.
- */
-private int
-dophn_exec(struct magic_set *ms, int class, int swap, int fd, off_t off,
- int num, size_t size, off_t fsize, int *flags)
-{
- Elf32_Phdr ph32;
- Elf64_Phdr ph64;
- const char *linking_style = "statically";
- const char *shared_libraries = "";
- unsigned char nbuf[BUFSIZ];
- int bufsize;
- size_t offset, align;
- off_t savedoffset = (off_t)-1;
- struct stat st;
-
- if (fstat(fd, &st) < 0) {
- file_badread(ms);
- return -1;
- }
-
- if (size != xph_sizeof) {
- if (file_printf(ms, ", corrupted program header size") == -1)
- return -1;
- return 0;
- }
-
- if (lseek(fd, off, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
-
- for ( ; num; num--) {
- if (read(fd, xph_addr, xph_sizeof) == -1) {
- file_badread(ms);
- return -1;
- }
- if (xph_offset > st.st_size && savedoffset != (off_t)-1) {
- if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- continue;
- }
-
- if ((savedoffset = lseek(fd, (off_t)0, SEEK_CUR)) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
-
- if (xph_offset > fsize) {
- if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- continue;
- }
-
- switch (xph_type) {
- case PT_DYNAMIC:
- linking_style = "dynamically";
- break;
- case PT_INTERP:
- shared_libraries = " (uses shared libs)";
- break;
- case PT_NOTE:
- if ((align = xph_align) & 0x80000000) {
- if (file_printf(ms,
- ", invalid note alignment 0x%lx",
- (unsigned long)align) == -1)
- return -1;
- align = 4;
- }
- /*
- * This is a PT_NOTE section; loop through all the notes
- * in the section.
- */
- if (lseek(fd, xph_offset, SEEK_SET)
- == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- bufsize = read(fd, nbuf, ((xph_filesz < sizeof(nbuf)) ?
- xph_filesz : sizeof(nbuf)));
- if (bufsize == -1) {
- file_badread(ms);
- return -1;
- }
- offset = 0;
- for (;;) {
- if (offset >= (size_t)bufsize)
- break;
- offset = donote(ms, nbuf, offset,
- (size_t)bufsize, class, swap, align,
- flags);
- if (offset == 0)
- break;
- }
- if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) {
- file_badseek(ms);
- return -1;
- }
- break;
- default:
- break;
- }
- }
- if (file_printf(ms, ", %s linked%s", linking_style, shared_libraries)
- == -1)
- return -1;
- return 0;
-}
-
-
-protected int
-file_tryelf(struct magic_set *ms, int fd, const unsigned char *buf,
- size_t nbytes)
-{
- union {
- int32_t l;
- char c[sizeof (int32_t)];
- } u;
- int class;
- int swap;
- struct stat st;
- off_t fsize;
- int flags = 0;
- Elf32_Ehdr elf32hdr;
- Elf64_Ehdr elf64hdr;
- uint16_t type;
-
- if (ms->flags & MAGIC_MIME)
- return 0;
- /*
- * ELF executables have multiple section headers in arbitrary
- * file locations and thus file(1) cannot determine it from easily.
- * Instead we traverse thru all section headers until a symbol table
- * one is found or else the binary is stripped.
- * Return immediately if it's not ELF (so we avoid pipe2file unless needed).
- */
- if (buf[EI_MAG0] != ELFMAG0
- || (buf[EI_MAG1] != ELFMAG1 && buf[EI_MAG1] != OLFMAG1)
- || buf[EI_MAG2] != ELFMAG2 || buf[EI_MAG3] != ELFMAG3)
- return 0;
-
- /*
- * If we cannot seek, it must be a pipe, socket or fifo.
- */
- if((lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) && (errno == ESPIPE))
- fd = file_pipe2file(ms, fd, buf, nbytes);
-
- if (fstat(fd, &st) == -1) {
- file_badread(ms);
- return -1;
- }
- fsize = st.st_size;
-
- class = buf[EI_CLASS];
-
- switch (class) {
- case ELFCLASS32:
-#undef elf_getu
-#define elf_getu(a, b) elf_getu32(a, b)
-#undef elfhdr
-#define elfhdr elf32hdr
-#include "elfclass.h"
- case ELFCLASS64:
-#undef elf_getu
-#define elf_getu(a, b) elf_getu64(a, b)
-#undef elfhdr
-#define elfhdr elf64hdr
-#include "elfclass.h"
- default:
- if (file_printf(ms, ", unknown class %d", class) == -1)
- return -1;
- break;
- }
- return 0;
-}
-#endif
diff --git a/usr.bin/file/readelf.h b/usr.bin/file/readelf.h
deleted file mode 100644
index 6d5bc30158f..00000000000
--- a/usr.bin/file/readelf.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* $OpenBSD: readelf.h,v 1.7 2009/04/24 18:54:34 chl Exp $ */
-/*
- * Copyright (c) Christos Zoulas 2003.
- * All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * @(#)Id: readelf.h,v 1.9 2002/05/16 18:45:56 christos Exp
- *
- * Provide elf data structures for non-elf machines, allowing file
- * non-elf hosts to determine if an elf binary is stripped.
- * Note: cobbled from the linux header file, with modifications
- */
-#ifndef __fake_elf_h__
-#define __fake_elf_h__
-
-#if HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-typedef uint32_t Elf32_Addr;
-typedef uint32_t Elf32_Off;
-typedef uint16_t Elf32_Half;
-typedef uint32_t Elf32_Word;
-typedef uint8_t Elf32_Char;
-
-#if SIZEOF_LONG_LONG != 8
-#define USE_ARRAY_FOR_64BIT_TYPES
-typedef uint32_t Elf64_Addr[2];
-typedef uint32_t Elf64_Off[2];
-typedef uint32_t Elf64_Xword[2];
-#else
-#undef USE_ARRAY_FOR_64BIT_TYPES
-typedef uint64_t Elf64_Addr;
-typedef uint64_t Elf64_Off;
-typedef uint64_t Elf64_Xword;
-#endif
-typedef uint16_t Elf64_Half;
-typedef uint32_t Elf64_Word;
-typedef uint8_t Elf64_Char;
-
-#define EI_NIDENT 16
-
-typedef struct {
- Elf32_Char e_ident[EI_NIDENT];
- Elf32_Half e_type;
- Elf32_Half e_machine;
- Elf32_Word e_version;
- Elf32_Addr e_entry; /* Entry point */
- Elf32_Off e_phoff;
- Elf32_Off e_shoff;
- Elf32_Word e_flags;
- Elf32_Half e_ehsize;
- Elf32_Half e_phentsize;
- Elf32_Half e_phnum;
- Elf32_Half e_shentsize;
- Elf32_Half e_shnum;
- Elf32_Half e_shstrndx;
-} Elf32_Ehdr;
-
-typedef struct {
- Elf64_Char e_ident[EI_NIDENT];
- Elf64_Half e_type;
- Elf64_Half e_machine;
- Elf64_Word e_version;
- Elf64_Addr e_entry; /* Entry point */
- Elf64_Off e_phoff;
- Elf64_Off e_shoff;
- Elf64_Word e_flags;
- Elf64_Half e_ehsize;
- Elf64_Half e_phentsize;
- Elf64_Half e_phnum;
- Elf64_Half e_shentsize;
- Elf64_Half e_shnum;
- Elf64_Half e_shstrndx;
-} Elf64_Ehdr;
-
-/* e_type */
-#define ET_REL 1
-#define ET_EXEC 2
-#define ET_DYN 3
-#define ET_CORE 4
-
-/* sh_type */
-#define SHT_SYMTAB 2
-#define SHT_NOTE 7
-#define SHT_DYNSYM 11
-
-/* elf type */
-#define ELFDATANONE 0 /* e_ident[EI_DATA] */
-#define ELFDATA2LSB 1
-#define ELFDATA2MSB 2
-
-/* elf class */
-#define ELFCLASSNONE 0
-#define ELFCLASS32 1
-#define ELFCLASS64 2
-
-/* magic number */
-#define EI_MAG0 0 /* e_ident[] indexes */
-#define EI_MAG1 1
-#define EI_MAG2 2
-#define EI_MAG3 3
-#define EI_CLASS 4
-#define EI_DATA 5
-#define EI_VERSION 6
-#define EI_PAD 7
-
-#define ELFMAG0 0x7f /* EI_MAG */
-#define ELFMAG1 'E'
-#define ELFMAG2 'L'
-#define ELFMAG3 'F'
-#define ELFMAG "\177ELF"
-
-#define OLFMAG1 'O'
-#define OLFMAG "\177OLF"
-
-typedef struct {
- Elf32_Word p_type;
- Elf32_Off p_offset;
- Elf32_Addr p_vaddr;
- Elf32_Addr p_paddr;
- Elf32_Word p_filesz;
- Elf32_Word p_memsz;
- Elf32_Word p_flags;
- Elf32_Word p_align;
-} Elf32_Phdr;
-
-typedef struct {
- Elf64_Word p_type;
- Elf64_Word p_flags;
- Elf64_Off p_offset;
- Elf64_Addr p_vaddr;
- Elf64_Addr p_paddr;
- Elf64_Xword p_filesz;
- Elf64_Xword p_memsz;
- Elf64_Xword p_align;
-} Elf64_Phdr;
-
-#define PT_NULL 0 /* p_type */
-#define PT_LOAD 1
-#define PT_DYNAMIC 2
-#define PT_INTERP 3
-#define PT_NOTE 4
-#define PT_SHLIB 5
-#define PT_PHDR 6
-#define PT_NUM 7
-
-typedef struct {
- Elf32_Word sh_name;
- Elf32_Word sh_type;
- Elf32_Word sh_flags;
- Elf32_Addr sh_addr;
- Elf32_Off sh_offset;
- Elf32_Word sh_size;
- Elf32_Word sh_link;
- Elf32_Word sh_info;
- Elf32_Word sh_addralign;
- Elf32_Word sh_entsize;
-} Elf32_Shdr;
-
-typedef struct {
- Elf64_Word sh_name;
- Elf64_Word sh_type;
- Elf64_Off sh_flags;
- Elf64_Addr sh_addr;
- Elf64_Off sh_offset;
- Elf64_Off sh_size;
- Elf64_Word sh_link;
- Elf64_Word sh_info;
- Elf64_Off sh_addralign;
- Elf64_Off sh_entsize;
-} Elf64_Shdr;
-
-#define NT_NETBSD_CORE_PROCINFO 1
-
-/* Note header in a PT_NOTE section */
-typedef struct elf_note {
- Elf32_Word n_namesz; /* Name size */
- Elf32_Word n_descsz; /* Content size */
- Elf32_Word n_type; /* Content type */
-} Elf32_Nhdr;
-
-typedef struct {
- Elf64_Word n_namesz;
- Elf64_Word n_descsz;
- Elf64_Word n_type;
-} Elf64_Nhdr;
-
-/* Notes used in ET_CORE */
-#define NT_PRSTATUS 1
-#define NT_PRFPREG 2
-#define NT_PRPSINFO 3
-#define NT_PRXREG 4
-#define NT_TASKSTRUCT 4
-#define NT_PLATFORM 5
-#define NT_AUXV 6
-
-/* Note types used in executables */
-/* NetBSD executables (name = "NetBSD") */
-#define NT_NETBSD_VERSION 1
-#define NT_NETBSD_EMULATION 2
-#define NT_FREEBSD_VERSION 1
-#define NT_OPENBSD_VERSION 1
-#define NT_DRAGONFLY_VERSION 1
-/* GNU executables (name = "GNU") */
-#define NT_GNU_VERSION 1
-
-/* GNU OS tags */
-#define GNU_OS_LINUX 0
-#define GNU_OS_HURD 1
-#define GNU_OS_SOLARIS 2
-#define GNU_OS_KFREEBSD 3
-#define GNU_OS_KNETBSD 4
-
-#endif
diff --git a/usr.bin/file/softmagic.c b/usr.bin/file/softmagic.c
deleted file mode 100644
index 6aa16d2725f..00000000000
--- a/usr.bin/file/softmagic.c
+++ /dev/null
@@ -1,1821 +0,0 @@
-/* $OpenBSD: softmagic.c,v 1.17 2013/04/17 15:01:26 deraadt Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * softmagic - interpret variable magic from MAGIC
- */
-
-#include "file.h"
-#include "magic.h"
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <time.h>
-
-
-private int match(struct magic_set *, struct magic *, uint32_t,
- const unsigned char *, size_t, int);
-private int mget(struct magic_set *, const unsigned char *,
- struct magic *, size_t, unsigned int);
-private int magiccheck(struct magic_set *, struct magic *);
-private int32_t mprint(struct magic_set *, struct magic *);
-private void mdebug(uint32_t, const char *, size_t);
-private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
- const unsigned char *, uint32_t, size_t, size_t);
-private int mconvert(struct magic_set *, struct magic *);
-private int print_sep(struct magic_set *, int);
-private void cvt_8(union VALUETYPE *, const struct magic *);
-private void cvt_16(union VALUETYPE *, const struct magic *);
-private void cvt_32(union VALUETYPE *, const struct magic *);
-private void cvt_64(union VALUETYPE *, const struct magic *);
-
-/*
- * Macro to give description string according to whether we want plain
- * text or MIME type
- */
-#define MAGIC_DESC ((ms->flags & MAGIC_MIME) ? m->mimetype : m->desc)
-
-/*
- * softmagic - lookup one file in parsed, in-memory copy of database
- * Passed the name and FILE * of one file to be typed.
- */
-/*ARGSUSED1*/ /* nbytes passed for regularity, maybe need later */
-protected int
-file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, int mode)
-{
- struct mlist *ml;
- int rv;
- for (ml = ms->mlist->next; ml != ms->mlist; ml = ml->next)
- if ((rv = match(ms, ml->magic, ml->nmagic, buf, nbytes, mode)) != 0)
- return rv;
-
- return 0;
-}
-
-/*
- * Go through the whole list, stopping if you find a match. Process all
- * the continuations of that match before returning.
- *
- * We support multi-level continuations:
- *
- * At any time when processing a successful top-level match, there is a
- * current continuation level; it represents the level of the last
- * successfully matched continuation.
- *
- * Continuations above that level are skipped as, if we see one, it
- * means that the continuation that controls them - i.e, the
- * lower-level continuation preceding them - failed to match.
- *
- * Continuations below that level are processed as, if we see one,
- * it means we've finished processing or skipping higher-level
- * continuations under the control of a successful or unsuccessful
- * lower-level continuation, and are now seeing the next lower-level
- * continuation and should process it. The current continuation
- * level reverts to the level of the one we're seeing.
- *
- * Continuations at the current level are processed as, if we see
- * one, there's no lower-level continuation that may have failed.
- *
- * If a continuation matches, we bump the current continuation level
- * so that higher-level continuations are processed.
- */
-private int
-match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
- const unsigned char *s, size_t nbytes, int mode)
-{
- uint32_t magindex = 0;
- unsigned int cont_level = 0;
- int need_separator = 0;
- int returnval = 0; /* if a match is found it is set to 1*/
- int firstline = 1; /* a flag to print X\n X\n- X */
- int printed_something = 0;
-
- if (file_check_mem(ms, cont_level) == -1)
- return -1;
-
- for (magindex = 0; magindex < nmagic; magindex++) {
- int flush;
- struct magic *m = &magic[magindex];
-
- if ((m->flag & BINTEST) != mode) {
- /* Skip sub-tests */
- while (magic[magindex + 1].cont_level != 0 &&
- ++magindex < nmagic)
- continue;
- continue; /* Skip to next top-level test*/
- }
-
- ms->offset = m->offset;
- ms->line = m->lineno;
-
- /* if main entry matches, print it... */
- flush = !mget(ms, s, m, nbytes, cont_level);
- if (flush) {
- if (m->reln == '!')
- flush = 0;
- } else {
- switch (magiccheck(ms, m)) {
- case -1:
- return -1;
- case 0:
- flush++;
- break;
- default:
- break;
- }
- }
- if (flush) {
- /*
- * main entry didn't match,
- * flush its continuations
- */
- while (magindex < nmagic - 1 &&
- magic[magindex + 1].cont_level != 0)
- magindex++;
- continue;
- }
-
- /*
- * If we are going to print something, we'll need to print
- * a blank before we print something else.
- */
- if (*MAGIC_DESC) {
- need_separator = 1;
- printed_something = 1;
- if (print_sep(ms, firstline) == -1)
- return -1;
- }
-
- if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
- return -1;
-
- /* and any continuations that match */
- if (file_check_mem(ms, ++cont_level) == -1)
- return -1;
-
- while (magic[magindex+1].cont_level != 0 &&
- ++magindex < nmagic) {
- m = &magic[magindex];
- ms->line = m->lineno; /* for messages */
-
- if (cont_level < m->cont_level)
- continue;
- if (cont_level > m->cont_level) {
- /*
- * We're at the end of the level
- * "cont_level" continuations.
- */
- cont_level = m->cont_level;
- }
- ms->offset = m->offset;
- if (m->flag & OFFADD) {
- ms->offset +=
- ms->c.li[cont_level - 1].off;
- }
-
-#ifdef ENABLE_CONDITIONALS
- if (m->cond == COND_ELSE ||
- m->cond == COND_ELIF) {
- if (ms->c.li[cont_level].last_match == 1)
- continue;
- }
-#endif
- flush = !mget(ms, s, m, nbytes, cont_level);
- if (flush && m->reln != '!')
- continue;
-
- switch (flush ? 1 : magiccheck(ms, m)) {
- case -1:
- return -1;
- case 0:
-#ifdef ENABLE_CONDITIONALS
- ms->c.li[cont_level].last_match = 0;
-#endif
- break;
- default:
-#ifdef ENABLE_CONDITIONALS
- ms->c.li[cont_level].last_match = 1;
-#endif
- if (m->type != FILE_DEFAULT)
- ms->c.li[cont_level].got_match = 1;
- else if (ms->c.li[cont_level].got_match) {
- ms->c.li[cont_level].got_match = 0;
- break;
- }
- /*
- * If we are going to print something,
- * make sure that we have a separator first.
- */
- if (*MAGIC_DESC) {
- printed_something = 1;
- if (print_sep(ms, firstline) == -1)
- return -1;
- }
- /*
- * This continuation matched. Print
- * its message, with a blank before it
- * if the previous item printed and
- * this item isn't empty.
- */
- /* space if previous printed */
- if (need_separator
- && ((m->flag & NOSPACE) == 0)
- && *MAGIC_DESC) {
- if (file_printf(ms, " ") == -1)
- return -1;
- need_separator = 0;
- }
- if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1)
- return -1;
- if (*MAGIC_DESC)
- need_separator = 1;
-
- /*
- * If we see any continuations
- * at a higher level,
- * process them.
- */
- if (file_check_mem(ms, ++cont_level) == -1)
- return -1;
- break;
- }
- }
- if (printed_something) {
- firstline = 0;
- returnval = 1;
- }
- if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) {
- return 1; /* don't keep searching */
- }
- }
- return returnval; /* This is hit if -k is set or there is no match */
-}
-
-private int
-check_fmt(struct magic_set *ms, struct magic *m)
-{
- regex_t rx;
- int rc;
-
- if (strchr(MAGIC_DESC, '%') == NULL)
- return 0;
-
- rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB);
- if (rc) {
- char errmsg[512];
- (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
- file_magerror(ms, "regex error %d, (%s)", rc, errmsg);
- return -1;
- } else {
- rc = regexec(&rx, MAGIC_DESC, 0, 0, 0);
- regfree(&rx);
- return !rc;
- }
-}
-
-#ifndef HAVE_STRNDUP
-char * strndup(const char *, size_t);
-
-char *
-strndup(const char *str, size_t n)
-{
- size_t len;
- char *copy;
-
- for (len = 0; len < n && str[len]; len++)
- continue;
- if ((copy = malloc(len + 1)) == NULL)
- return NULL;
- (void)memcpy(copy, str, len);
- copy[len] = '\0';
- return copy;
-}
-#endif /* HAVE_STRNDUP */
-
-private int32_t
-mprint(struct magic_set *ms, struct magic *m)
-{
- uint64_t v;
- float vf;
- double vd;
- int64_t t = 0;
- char *buf;
- union VALUETYPE *p = &ms->ms_value;
-
- switch (m->type) {
- case FILE_BYTE:
- v = file_signextend(ms, m, (uint64_t)p->b);
- switch (check_fmt(ms, m)) {
- case -1:
- return -1;
- case 1:
- if (asprintf(&buf, "%c", (unsigned char)v) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
- return -1;
- break;
- default:
- if (file_printf(ms, MAGIC_DESC, (unsigned char) v) == -1)
- return -1;
- break;
- }
- t = ms->offset + sizeof(char);
- break;
-
- case FILE_SHORT:
- case FILE_BESHORT:
- case FILE_LESHORT:
- v = file_signextend(ms, m, (uint64_t)p->h);
- switch (check_fmt(ms, m)) {
- case -1:
- return -1;
- case 1:
- if (asprintf(&buf, "%hu", (unsigned short)v) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
- return -1;
- break;
- default:
- if (file_printf(ms, MAGIC_DESC, (unsigned short) v) == -1)
- return -1;
- break;
- }
- t = ms->offset + sizeof(short);
- break;
-
- case FILE_LONG:
- case FILE_BELONG:
- case FILE_LELONG:
- case FILE_MELONG:
- v = file_signextend(ms, m, (uint64_t)p->l);
- switch (check_fmt(ms, m)) {
- case -1:
- return -1;
- case 1:
- if (asprintf(&buf, "%u", (uint32_t)v) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
- return -1;
- break;
- default:
- if (file_printf(ms, MAGIC_DESC, (uint32_t) v) == -1)
- return -1;
- break;
- }
- t = ms->offset + sizeof(int32_t);
- break;
-
- case FILE_QUAD:
- case FILE_BEQUAD:
- case FILE_LEQUAD:
- v = file_signextend(ms, m, p->q);
- if (file_printf(ms, MAGIC_DESC, (uint64_t) v) == -1)
- return -1;
- t = ms->offset + sizeof(int64_t);
- break;
-
- case FILE_STRING:
- case FILE_PSTRING:
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- if (m->reln == '=' || m->reln == '!') {
- if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
- return -1;
- t = ms->offset + m->vallen;
- }
- else {
- if (*m->value.s == '\0')
- p->s[strcspn(p->s, "\n")] = '\0';
- if (file_printf(ms, MAGIC_DESC, p->s) == -1)
- return -1;
- t = ms->offset + strlen(p->s);
- if (m->type == FILE_PSTRING)
- t++;
- }
- break;
-
- case FILE_DATE:
- case FILE_BEDATE:
- case FILE_LEDATE:
- case FILE_MEDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 1)) == -1)
- return -1;
- t = ms->offset + sizeof(int32_t);
- break;
-
- case FILE_LDATE:
- case FILE_BELDATE:
- case FILE_LELDATE:
- case FILE_MELDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 0)) == -1)
- return -1;
- t = ms->offset + sizeof(int32_t);
- break;
-
- case FILE_QDATE:
- case FILE_BEQDATE:
- case FILE_LEQDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime(p->q, 1))
- == -1)
- return -1;
- t = ms->offset + sizeof(uint64_t);
- break;
-
- case FILE_QLDATE:
- case FILE_BEQLDATE:
- case FILE_LEQLDATE:
- if (file_printf(ms, MAGIC_DESC, file_fmttime(p->q, 0))
- == -1)
- return -1;
- t = ms->offset + sizeof(uint64_t);
- break;
-
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- vf = p->f;
- switch (check_fmt(ms, m)) {
- case -1:
- return -1;
- case 1:
- if (asprintf(&buf, "%g", vf) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
- return -1;
- break;
- default:
- if (file_printf(ms, MAGIC_DESC, vf) == -1)
- return -1;
- break;
- }
- t = ms->offset + sizeof(float);
- break;
-
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- vd = p->d;
- switch (check_fmt(ms, m)) {
- case -1:
- return -1;
- case 1:
- if (asprintf(&buf, "%g", vd) < 0)
- return -1;
- if (file_printf(ms, MAGIC_DESC, buf) == -1)
- return -1;
- break;
- default:
- if (file_printf(ms, MAGIC_DESC, vd) == -1)
- return -1;
- break;
- }
- t = ms->offset + sizeof(double);
- break;
-
- case FILE_REGEX: {
- char *cp;
- int rval;
-
- cp = strndup((const char *)ms->search.s, ms->search.rm_len);
- if (cp == NULL) {
- file_oomem(ms, ms->search.rm_len);
- return -1;
- }
- rval = file_printf(ms, MAGIC_DESC, cp);
- free(cp);
-
- if (rval == -1)
- return -1;
-
- if ((m->str_flags & REGEX_OFFSET_START))
- t = ms->search.offset;
- else
- t = ms->search.offset + ms->search.rm_len;
- break;
- }
-
- case FILE_SEARCH:
- if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
- return -1;
- if ((m->str_flags & REGEX_OFFSET_START))
- t = ms->search.offset;
- else
- t = ms->search.offset + m->vallen;
- break;
-
- case FILE_DEFAULT:
- if (file_printf(ms, MAGIC_DESC, m->value.s) == -1)
- return -1;
- t = ms->offset;
- break;
-
- default:
- file_magerror(ms, "invalid m->type (%d) in mprint()", m->type);
- return -1;
- }
- return(t);
-}
-
-
-#define DO_CVT(fld, cast) \
- if (m->num_mask) \
- switch (m->mask_op & FILE_OPS_MASK) { \
- case FILE_OPAND: \
- p->fld &= cast m->num_mask; \
- break; \
- case FILE_OPOR: \
- p->fld |= cast m->num_mask; \
- break; \
- case FILE_OPXOR: \
- p->fld ^= cast m->num_mask; \
- break; \
- case FILE_OPADD: \
- p->fld += cast m->num_mask; \
- break; \
- case FILE_OPMINUS: \
- p->fld -= cast m->num_mask; \
- break; \
- case FILE_OPMULTIPLY: \
- p->fld *= cast m->num_mask; \
- break; \
- case FILE_OPDIVIDE: \
- p->fld /= cast m->num_mask; \
- break; \
- case FILE_OPMODULO: \
- p->fld %= cast m->num_mask; \
- break; \
- } \
- if (m->mask_op & FILE_OPINVERSE) \
- p->fld = ~p->fld \
-
-private void
-cvt_8(union VALUETYPE *p, const struct magic *m)
-{
- DO_CVT(b, (uint8_t));
-}
-
-private void
-cvt_16(union VALUETYPE *p, const struct magic *m)
-{
- DO_CVT(h, (uint16_t));
-}
-
-private void
-cvt_32(union VALUETYPE *p, const struct magic *m)
-{
- DO_CVT(l, (uint32_t));
-}
-
-private void
-cvt_64(union VALUETYPE *p, const struct magic *m)
-{
- DO_CVT(q, (uint64_t));
-}
-
-#define DO_CVT2(fld, cast) \
- if (m->num_mask) \
- switch (m->mask_op & FILE_OPS_MASK) { \
- case FILE_OPADD: \
- p->fld += cast m->num_mask; \
- break; \
- case FILE_OPMINUS: \
- p->fld -= cast m->num_mask; \
- break; \
- case FILE_OPMULTIPLY: \
- p->fld *= cast m->num_mask; \
- break; \
- case FILE_OPDIVIDE: \
- p->fld /= cast m->num_mask; \
- break; \
- } \
-
-private void
-cvt_float(union VALUETYPE *p, const struct magic *m)
-{
- DO_CVT2(f, (float));
-}
-
-private void
-cvt_double(union VALUETYPE *p, const struct magic *m)
-{
- DO_CVT2(d, (double));
-}
-
-/*
- * Convert the byte order of the data we are looking at
- * While we're here, let's apply the mask operation
- * (unless you have a better idea)
- */
-private int
-mconvert(struct magic_set *ms, struct magic *m)
-{
- union VALUETYPE *p = &ms->ms_value;
-
- switch (m->type) {
- case FILE_BYTE:
- cvt_8(p, m);
- return 1;
- case FILE_SHORT:
- cvt_16(p, m);
- return 1;
- case FILE_LONG:
- case FILE_DATE:
- case FILE_LDATE:
- cvt_32(p, m);
- return 1;
- case FILE_QUAD:
- case FILE_QDATE:
- case FILE_QLDATE:
- cvt_64(p, m);
- return 1;
- case FILE_STRING:
- case FILE_BESTRING16:
- case FILE_LESTRING16: {
- size_t len;
-
- /* Null terminate and eat *trailing* return */
- p->s[sizeof(p->s) - 1] = '\0';
- len = strlen(p->s);
- if (len-- && p->s[len] == '\n')
- p->s[len] = '\0';
- return 1;
- }
- case FILE_PSTRING: {
- char *ptr1 = p->s, *ptr2 = ptr1 + 1;
- size_t len = *p->s;
- if (len >= sizeof(p->s))
- len = sizeof(p->s) - 1;
- while (len--)
- *ptr1++ = *ptr2++;
- *ptr1 = '\0';
- len = strlen(p->s);
- if (len-- && p->s[len] == '\n')
- p->s[len] = '\0';
- return 1;
- }
- case FILE_BESHORT:
- p->h = (short)((p->hs[0]<<8)|(p->hs[1]));
- cvt_16(p, m);
- return 1;
- case FILE_BELONG:
- case FILE_BEDATE:
- case FILE_BELDATE:
- p->l = (int32_t)
- ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3]));
- cvt_32(p, m);
- return 1;
- case FILE_BEQUAD:
- case FILE_BEQDATE:
- case FILE_BEQLDATE:
- p->q = (uint64_t)
- (((uint64_t)p->hq[0]<<56)|((uint64_t)p->hq[1]<<48)|
- ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)|
- ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)|
- ((uint64_t)p->hq[6]<<8)|((uint64_t)p->hq[7]));
- cvt_64(p, m);
- return 1;
- case FILE_LESHORT:
- p->h = (short)((p->hs[1]<<8)|(p->hs[0]));
- cvt_16(p, m);
- return 1;
- case FILE_LELONG:
- case FILE_LEDATE:
- case FILE_LELDATE:
- p->l = (int32_t)
- ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0]));
- cvt_32(p, m);
- return 1;
- case FILE_LEQUAD:
- case FILE_LEQDATE:
- case FILE_LEQLDATE:
- p->q = (uint64_t)
- (((uint64_t)p->hq[7]<<56)|((uint64_t)p->hq[6]<<48)|
- ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)|
- ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)|
- ((uint64_t)p->hq[1]<<8)|((uint64_t)p->hq[0]));
- cvt_64(p, m);
- return 1;
- case FILE_MELONG:
- case FILE_MEDATE:
- case FILE_MELDATE:
- p->l = (int32_t)
- ((p->hl[1]<<24)|(p->hl[0]<<16)|(p->hl[3]<<8)|(p->hl[2]));
- cvt_32(p, m);
- return 1;
- case FILE_FLOAT:
- cvt_float(p, m);
- return 1;
- case FILE_BEFLOAT:
- p->l = ((uint32_t)p->hl[0]<<24)|((uint32_t)p->hl[1]<<16)|
- ((uint32_t)p->hl[2]<<8) |((uint32_t)p->hl[3]);
- cvt_float(p, m);
- return 1;
- case FILE_LEFLOAT:
- p->l = ((uint32_t)p->hl[3]<<24)|((uint32_t)p->hl[2]<<16)|
- ((uint32_t)p->hl[1]<<8) |((uint32_t)p->hl[0]);
- cvt_float(p, m);
- return 1;
- case FILE_DOUBLE:
- cvt_double(p, m);
- return 1;
- case FILE_BEDOUBLE:
- p->q = ((uint64_t)p->hq[0]<<56)|((uint64_t)p->hq[1]<<48)|
- ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)|
- ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)|
- ((uint64_t)p->hq[6]<<8) |((uint64_t)p->hq[7]);
- cvt_double(p, m);
- return 1;
- case FILE_LEDOUBLE:
- p->q = ((uint64_t)p->hq[7]<<56)|((uint64_t)p->hq[6]<<48)|
- ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)|
- ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)|
- ((uint64_t)p->hq[1]<<8) |((uint64_t)p->hq[0]);
- cvt_double(p, m);
- return 1;
- case FILE_REGEX:
- case FILE_SEARCH:
- case FILE_DEFAULT:
- return 1;
- default:
- file_magerror(ms, "invalid type %d in mconvert()", m->type);
- return 0;
- }
-}
-
-
-private void
-mdebug(uint32_t offset, const char *str, size_t len)
-{
- (void) fprintf(stderr, "mget @%d: ", offset);
- file_showstr(stderr, str, len);
- (void) fputc('\n', stderr);
- (void) fputc('\n', stderr);
-}
-
-private int
-mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
- const unsigned char *s, uint32_t offset, size_t nbytes, size_t linecnt)
-{
- /*
- * Note: FILE_SEARCH and FILE_REGEX do not actually copy
- * anything, but setup pointers into the source
- */
- if (indir == 0) {
- switch (type) {
- case FILE_SEARCH:
- ms->search.s = (const char *)s + offset;
- ms->search.s_len = nbytes - offset;
- ms->search.offset = offset;
- return 0;
-
- case FILE_REGEX: {
- const char *b;
- const char *c;
- const char *last; /* end of search region */
- const char *buf; /* start of search region */
- size_t lines;
-
- if (s == NULL) {
- ms->search.s_len = 0;
- ms->search.s = NULL;
- return 0;
- }
- buf = (const char *)s + offset;
- last = (const char *)s + nbytes;
- /* mget() guarantees buf <= last */
- for (lines = linecnt, b = buf;
- lines && ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r')));
- lines--, b++) {
- last = b;
- if (b[0] == '\r' && b[1] == '\n')
- b++;
- }
- if (lines)
- last = (const char *)s + nbytes;
-
- ms->search.s = buf;
- ms->search.s_len = last - buf;
- ms->search.offset = offset;
- ms->search.rm_len = 0;
- return 0;
- }
- case FILE_BESTRING16:
- case FILE_LESTRING16: {
- const unsigned char *src = s + offset;
- const unsigned char *esrc = s + nbytes;
- char *dst = p->s;
- char *edst = &p->s[sizeof(p->s) - 1];
-
- if (type == FILE_BESTRING16)
- src++;
-
- /* check for pointer overflow */
- if (src < s) {
- file_magerror(ms, "invalid offset %zu in mcopy()",
- offset);
- return -1;
- }
- for (/*EMPTY*/; src < esrc; src += 2, dst++) {
- if (dst < edst)
- *dst = *src;
- else
- break;
- if (*dst == '\0') {
- if (type == FILE_BESTRING16 ?
- *(src - 1) != '\0' :
- *(src + 1) != '\0')
- *dst = ' ';
- }
- }
- *edst = '\0';
- return 0;
- }
- case FILE_STRING: /* XXX - these two should not need */
- case FILE_PSTRING: /* to copy anything, but do anyway. */
- default:
- break;
- }
- }
-
- if (offset >= nbytes) {
- (void)memset(p, '\0', sizeof(*p));
- return 0;
- }
- if (nbytes - offset < sizeof(*p))
- nbytes = nbytes - offset;
- else
- nbytes = sizeof(*p);
-
- (void)memcpy(p, s + offset, nbytes);
-
- /*
- * the usefulness of padding with zeroes eludes me, it
- * might even cause problems
- */
- if (nbytes < sizeof(*p))
- (void)memset(((char *)(void *)p) + nbytes, '\0',
- sizeof(*p) - nbytes);
- return 0;
-}
-
-private int
-mget(struct magic_set *ms, const unsigned char *s,
- struct magic *m, size_t nbytes, unsigned int cont_level)
-{
- uint32_t offset = ms->offset;
- uint32_t count = m->str_range;
- union VALUETYPE *p = &ms->ms_value;
-
- if (mcopy(ms, p, m->type, m->flag & INDIR, s, offset, nbytes, count) == -1)
- return -1;
-
- if ((ms->flags & MAGIC_DEBUG) != 0) {
- mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
- file_mdump(m);
- }
-
- if (m->flag & INDIR) {
- int off = m->in_offset;
- if (m->in_op & FILE_OPINDIRECT) {
- const union VALUETYPE *q =
- ((const void *)(s + offset + off));
- switch (m->in_type) {
- case FILE_BYTE:
- off = q->b;
- break;
- case FILE_SHORT:
- off = q->h;
- break;
- case FILE_BESHORT:
- off = (short)((q->hs[0]<<8)|(q->hs[1]));
- break;
- case FILE_LESHORT:
- off = (short)((q->hs[1]<<8)|(q->hs[0]));
- break;
- case FILE_LONG:
- off = q->l;
- break;
- case FILE_BELONG:
- off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)|
- (q->hl[2]<<8)|(q->hl[3]));
- break;
- case FILE_LELONG:
- off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)|
- (q->hl[1]<<8)|(q->hl[0]));
- break;
- case FILE_MELONG:
- off = (int32_t)((q->hl[1]<<24)|(q->hl[0]<<16)|
- (q->hl[3]<<8)|(q->hl[2]));
- break;
- }
- }
- switch (m->in_type) {
- case FILE_BYTE:
- if (nbytes < (offset + 1))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = p->b & off;
- break;
- case FILE_OPOR:
- offset = p->b | off;
- break;
- case FILE_OPXOR:
- offset = p->b ^ off;
- break;
- case FILE_OPADD:
- offset = p->b + off;
- break;
- case FILE_OPMINUS:
- offset = p->b - off;
- break;
- case FILE_OPMULTIPLY:
- offset = p->b * off;
- break;
- case FILE_OPDIVIDE:
- offset = p->b / off;
- break;
- case FILE_OPMODULO:
- offset = p->b % off;
- break;
- }
- } else
- offset = p->b;
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_BESHORT:
- if (nbytes < (offset + 2))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) &
- off;
- break;
- case FILE_OPOR:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) |
- off;
- break;
- case FILE_OPXOR:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) ^
- off;
- break;
- case FILE_OPADD:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) +
- off;
- break;
- case FILE_OPMINUS:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) -
- off;
- break;
- case FILE_OPMULTIPLY:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) *
- off;
- break;
- case FILE_OPDIVIDE:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) /
- off;
- break;
- case FILE_OPMODULO:
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1])) %
- off;
- break;
- }
- } else
- offset = (short)((p->hs[0]<<8)|
- (p->hs[1]));
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_LESHORT:
- if (nbytes < (offset + 2))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) &
- off;
- break;
- case FILE_OPOR:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) |
- off;
- break;
- case FILE_OPXOR:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) ^
- off;
- break;
- case FILE_OPADD:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) +
- off;
- break;
- case FILE_OPMINUS:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) -
- off;
- break;
- case FILE_OPMULTIPLY:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) *
- off;
- break;
- case FILE_OPDIVIDE:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) /
- off;
- break;
- case FILE_OPMODULO:
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0])) %
- off;
- break;
- }
- } else
- offset = (short)((p->hs[1]<<8)|
- (p->hs[0]));
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_SHORT:
- if (nbytes < (offset + 2))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = p->h & off;
- break;
- case FILE_OPOR:
- offset = p->h | off;
- break;
- case FILE_OPXOR:
- offset = p->h ^ off;
- break;
- case FILE_OPADD:
- offset = p->h + off;
- break;
- case FILE_OPMINUS:
- offset = p->h - off;
- break;
- case FILE_OPMULTIPLY:
- offset = p->h * off;
- break;
- case FILE_OPDIVIDE:
- offset = p->h / off;
- break;
- case FILE_OPMODULO:
- offset = p->h % off;
- break;
- }
- }
- else
- offset = p->h;
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_BELONG:
- if (nbytes < (offset + 4))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) &
- off;
- break;
- case FILE_OPOR:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) |
- off;
- break;
- case FILE_OPXOR:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) ^
- off;
- break;
- case FILE_OPADD:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) +
- off;
- break;
- case FILE_OPMINUS:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) -
- off;
- break;
- case FILE_OPMULTIPLY:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) *
- off;
- break;
- case FILE_OPDIVIDE:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) /
- off;
- break;
- case FILE_OPMODULO:
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3])) %
- off;
- break;
- }
- } else
- offset = (int32_t)((p->hl[0]<<24)|
- (p->hl[1]<<16)|
- (p->hl[2]<<8)|
- (p->hl[3]));
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_LELONG:
- if (nbytes < (offset + 4))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) &
- off;
- break;
- case FILE_OPOR:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) |
- off;
- break;
- case FILE_OPXOR:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) ^
- off;
- break;
- case FILE_OPADD:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) +
- off;
- break;
- case FILE_OPMINUS:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) -
- off;
- break;
- case FILE_OPMULTIPLY:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) *
- off;
- break;
- case FILE_OPDIVIDE:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) /
- off;
- break;
- case FILE_OPMODULO:
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0])) %
- off;
- break;
- }
- } else
- offset = (int32_t)((p->hl[3]<<24)|
- (p->hl[2]<<16)|
- (p->hl[1]<<8)|
- (p->hl[0]));
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_MELONG:
- if (nbytes < (offset + 4))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) &
- off;
- break;
- case FILE_OPOR:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) |
- off;
- break;
- case FILE_OPXOR:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) ^
- off;
- break;
- case FILE_OPADD:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) +
- off;
- break;
- case FILE_OPMINUS:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) -
- off;
- break;
- case FILE_OPMULTIPLY:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) *
- off;
- break;
- case FILE_OPDIVIDE:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) /
- off;
- break;
- case FILE_OPMODULO:
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2])) %
- off;
- break;
- }
- } else
- offset = (int32_t)((p->hl[1]<<24)|
- (p->hl[0]<<16)|
- (p->hl[3]<<8)|
- (p->hl[2]));
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- case FILE_LONG:
- if (nbytes < (offset + 4))
- return 0;
- if (off) {
- switch (m->in_op & FILE_OPS_MASK) {
- case FILE_OPAND:
- offset = p->l & off;
- break;
- case FILE_OPOR:
- offset = p->l | off;
- break;
- case FILE_OPXOR:
- offset = p->l ^ off;
- break;
- case FILE_OPADD:
- offset = p->l + off;
- break;
- case FILE_OPMINUS:
- offset = p->l - off;
- break;
- case FILE_OPMULTIPLY:
- offset = p->l * off;
- break;
- case FILE_OPDIVIDE:
- offset = p->l / off;
- break;
- case FILE_OPMODULO:
- offset = p->l % off;
- break;
- }
- } else
- offset = p->l;
- if (m->in_op & FILE_OPINVERSE)
- offset = ~offset;
- break;
- }
-
- if (m->flag & INDIROFFADD)
- offset += ms->c.li[cont_level-1].off;
- if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1)
- return -1;
- ms->offset = offset;
-
- if ((ms->flags & MAGIC_DEBUG) != 0) {
- mdebug(offset, (char *)(void *)p,
- sizeof(union VALUETYPE));
- file_mdump(m);
- }
- }
-
- /* Verify we have enough data to match magic type */
- switch (m->type) {
- case FILE_BYTE:
- if (nbytes < (offset + 1)) /* should alway be true */
- return 0;
- break;
-
- case FILE_SHORT:
- case FILE_BESHORT:
- case FILE_LESHORT:
- if (nbytes < (offset + 2))
- return 0;
- break;
-
- case FILE_LONG:
- case FILE_BELONG:
- case FILE_LELONG:
- case FILE_MELONG:
- case FILE_DATE:
- case FILE_BEDATE:
- case FILE_LEDATE:
- case FILE_MEDATE:
- case FILE_LDATE:
- case FILE_BELDATE:
- case FILE_LELDATE:
- case FILE_MELDATE:
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- if (nbytes < (offset + 4))
- return 0;
- break;
-
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- if (nbytes < (offset + 8))
- return 0;
- break;
-
- case FILE_STRING:
- case FILE_PSTRING:
- case FILE_SEARCH:
- if (nbytes < (offset + m->vallen))
- return 0;
- break;
-
- case FILE_REGEX:
- if (nbytes < offset)
- return 0;
- break;
-
- case FILE_DEFAULT: /* nothing to check */
- default:
- break;
- }
- if (!mconvert(ms, m))
- return 0;
- return 1;
-}
-
-private uint64_t
-file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags)
-{
- /*
- * Convert the source args to unsigned here so that (1) the
- * compare will be unsigned as it is in strncmp() and (2) so
- * the ctype functions will work correctly without extra
- * casting.
- */
- const unsigned char *a = (const unsigned char *)s1;
- const unsigned char *b = (const unsigned char *)s2;
- uint64_t v;
-
- /*
- * What we want here is v = strncmp(s1, s2, len),
- * but ignoring any nulls.
- */
- v = 0;
- if (0L == flags) { /* normal string: do it fast */
- while (len-- > 0)
- if ((v = *b++ - *a++) != '\0')
- break;
- }
- else { /* combine the others */
- while (len-- > 0) {
- if ((flags & STRING_IGNORE_LOWERCASE) &&
- islower(*a)) {
- if ((v = tolower(*b++) - *a++) != '\0')
- break;
- }
- else if ((flags & STRING_IGNORE_UPPERCASE) &&
- isupper(*a)) {
- if ((v = toupper(*b++) - *a++) != '\0')
- break;
- }
- else if ((flags & STRING_COMPACT_BLANK) &&
- isspace(*a)) {
- a++;
- if (isspace(*b++)) {
- while (isspace(*b))
- b++;
- }
- else {
- v = 1;
- break;
- }
- }
- else if ((flags & STRING_COMPACT_OPTIONAL_BLANK) &&
- isspace(*a)) {
- a++;
- while (isspace(*b))
- b++;
- }
- else {
- if ((v = *b++ - *a++) != '\0')
- break;
- }
- }
- }
- return v;
-}
-
-private uint64_t
-file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags)
-{
- /*
- * XXX - The 16-bit string compare probably needs to be done
- * differently, especially if the flags are to be supported.
- * At the moment, I am unsure.
- */
- flags = 0;
- return file_strncmp(a, b, len, flags);
-}
-
-private int
-magiccheck(struct magic_set *ms, struct magic *m)
-{
- uint64_t l = m->value.q;
- uint64_t v;
- float fl, fv;
- double dl, dv;
- int matched;
- union VALUETYPE *p = &ms->ms_value;
-
- switch (m->type) {
- case FILE_BYTE:
- v = p->b;
- break;
-
- case FILE_SHORT:
- case FILE_BESHORT:
- case FILE_LESHORT:
- v = p->h;
- break;
-
- case FILE_LONG:
- case FILE_BELONG:
- case FILE_LELONG:
- case FILE_MELONG:
- case FILE_DATE:
- case FILE_BEDATE:
- case FILE_LEDATE:
- case FILE_MEDATE:
- case FILE_LDATE:
- case FILE_BELDATE:
- case FILE_LELDATE:
- case FILE_MELDATE:
- v = p->l;
- break;
-
- case FILE_QUAD:
- case FILE_LEQUAD:
- case FILE_BEQUAD:
- case FILE_QDATE:
- case FILE_BEQDATE:
- case FILE_LEQDATE:
- case FILE_QLDATE:
- case FILE_BEQLDATE:
- case FILE_LEQLDATE:
- v = p->q;
- break;
-
- case FILE_FLOAT:
- case FILE_BEFLOAT:
- case FILE_LEFLOAT:
- fl = m->value.f;
- fv = p->f;
- switch (m->reln) {
- case 'x':
- matched = 1;
- break;
-
- case '!':
- matched = fv != fl;
- break;
-
- case '=':
- matched = fv == fl;
- break;
-
- case '>':
- matched = fv > fl;
- break;
-
- case '<':
- matched = fv < fl;
- break;
-
- default:
- matched = 0;
- file_magerror(ms, "cannot happen with float: invalid relation `%c'", m->reln);
- return -1;
- }
- return matched;
-
- case FILE_DOUBLE:
- case FILE_BEDOUBLE:
- case FILE_LEDOUBLE:
- dl = m->value.d;
- dv = p->d;
- switch (m->reln) {
- case 'x':
- matched = 1;
- break;
-
- case '!':
- matched = dv != dl;
- break;
-
- case '=':
- matched = dv == dl;
- break;
-
- case '>':
- matched = dv > dl;
- break;
-
- case '<':
- matched = dv < dl;
- break;
-
- default:
- matched = 0;
- file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln);
- return -1;
- }
- return matched;
-
- case FILE_DEFAULT:
- l = 0;
- v = 0;
- break;
-
- case FILE_STRING:
- case FILE_PSTRING:
- l = 0;
- v = file_strncmp(m->value.s, p->s, (size_t)m->vallen, m->str_flags);
- break;
-
- case FILE_BESTRING16:
- case FILE_LESTRING16:
- l = 0;
- v = file_strncmp16(m->value.s, p->s, (size_t)m->vallen, m->str_flags);
- break;
-
- case FILE_SEARCH: { /* search ms->search.s for the string m->value.s */
- size_t slen;
- size_t idx;
-
- if (ms->search.s == NULL)
- return 0;
-
- slen = MIN(m->vallen, sizeof(m->value.s));
- l = 0;
- v = 0;
-
- for (idx = 0; m->str_range == 0 || idx < m->str_range; idx++) {
- if (slen + idx > ms->search.s_len)
- break;
-
- v = file_strncmp(m->value.s, ms->search.s + idx, slen, m->str_flags);
- if (v == 0) { /* found match */
- ms->search.offset += idx;
- break;
- }
- }
- break;
- }
- case FILE_REGEX: {
- int rc;
- regex_t rx;
- char errmsg[512];
-
- if (ms->search.s == NULL)
- return 0;
-
- l = 0;
- rc = regcomp(&rx, m->value.s,
- REG_EXTENDED|REG_NEWLINE|
- ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
- if (rc) {
- (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
- file_magerror(ms, "regex error %d, (%s)",
- rc, errmsg);
- v = (uint64_t)-1;
- }
- else {
- regmatch_t pmatch[1];
-#ifndef REG_STARTEND
-#define REG_STARTEND 0
- size_t l = ms->search.s_len - 1;
- char c = ms->search.s[l];
- ((char *)(intptr_t)ms->search.s)[l] = '\0';
-#else
- pmatch[0].rm_so = 0;
- pmatch[0].rm_eo = ms->search.s_len;
-#endif
- rc = regexec(&rx, (const char *)ms->search.s,
- 1, pmatch, REG_STARTEND);
-#if REG_STARTEND == 0
- ((char *)(intptr_t)ms->search.s)[l] = c;
-#endif
- switch (rc) {
- case 0:
- ms->search.s += (int)pmatch[0].rm_so;
- ms->search.offset += (size_t)pmatch[0].rm_so;
- ms->search.rm_len =
- (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
- v = 0;
- break;
-
- case REG_NOMATCH:
- v = 1;
- break;
-
- default:
- (void)regerror(rc, &rx, errmsg, sizeof(errmsg));
- file_magerror(ms, "regexec error %d, (%s)",
- rc, errmsg);
- v = (uint64_t)-1;
- break;
- }
- regfree(&rx);
- }
- if (v == (uint64_t)-1)
- return -1;
- break;
- }
- default:
- file_magerror(ms, "invalid type %d in magiccheck()", m->type);
- return -1;
- }
-
- v = file_signextend(ms, m, v);
-
- switch (m->reln) {
- case 'x':
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%llu == *any* = 1\n",
- (unsigned long long)v);
- matched = 1;
- break;
-
- case '!':
- matched = v != l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%llu != %llu = %d\n",
- (unsigned long long)v, (unsigned long long)l,
- matched);
- break;
-
- case '=':
- matched = v == l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%llu == %llu = %d\n",
- (unsigned long long)v, (unsigned long long)l,
- matched);
- break;
-
- case '>':
- if (m->flag & UNSIGNED) {
- matched = v > l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%llu > %llu = %d\n",
- (unsigned long long)v,
- (unsigned long long)l, matched);
- }
- else {
- matched = (int64_t) v > (int64_t) l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%lld > %lld = %d\n",
- (long long)v, (long long)l, matched);
- }
- break;
-
- case '<':
- if (m->flag & UNSIGNED) {
- matched = v < l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%llu < %llu = %d\n",
- (unsigned long long)v,
- (unsigned long long)l, matched);
- }
- else {
- matched = (int64_t) v < (int64_t) l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "%lld < %lld = %d\n",
- (long long)v, (long long)l, matched);
- }
- break;
-
- case '&':
- matched = (v & l) == l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "((%llx & %llx) == %llx) = %d\n",
- (unsigned long long)v, (unsigned long long)l,
- (unsigned long long)l, matched);
- break;
-
- case '^':
- matched = (v & l) != l;
- if ((ms->flags & MAGIC_DEBUG) != 0)
- (void) fprintf(stderr, "((%llx & %llx) != %llx) = %d\n",
- (unsigned long long)v, (unsigned long long)l,
- (unsigned long long)l, matched);
- break;
-
- default:
- matched = 0;
- file_magerror(ms, "cannot happen: invalid relation `%c'",
- m->reln);
- return -1;
- }
-
- return matched;
-}
-
-private int
-print_sep(struct magic_set *ms, int firstline)
-{
- if (firstline)
- return 0;
- /*
- * we found another match
- * put a newline and '-' to do some simple formatting
- */
- return file_printf(ms, "\n- ");
-}
diff --git a/usr.bin/file/tar.h b/usr.bin/file/tar.h
deleted file mode 100644
index 9e6f3a84f3b..00000000000
--- a/usr.bin/file/tar.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* $OpenBSD: tar.h,v 1.7 2009/04/24 18:54:34 chl Exp $ */
-/*
- * Copyright (c) Ian F. Darwin 1986-1995.
- * Software written by Ian F. Darwin and others;
- * maintained 1995-present by Christos Zoulas and others.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice immediately at the beginning of the file, without modification,
- * this list of conditions, and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Header file for public domain tar (tape archive) program.
- *
- * @(#)tar.h 1.20 86/10/29 Public Domain.
- *
- * Created 25 August 1985 by John Gilmore, ihnp4!hoptoad!gnu.
- *
- * $Id: tar.h,v 1.7 2009/04/24 18:54:34 chl Exp $ # checkin only
- */
-
-/*
- * Header block on tape.
- *
- * I'm going to use traditional DP naming conventions here.
- * A "block" is a big chunk of stuff that we do I/O on.
- * A "record" is a piece of info that we care about.
- * Typically many "record"s fit into a "block".
- */
-#define RECORDSIZE 512
-#define NAMSIZ 100
-#define TUNMLEN 32
-#define TGNMLEN 32
-
-union record {
- char charptr[RECORDSIZE];
- struct header {
- char name[NAMSIZ];
- char mode[8];
- char uid[8];
- char gid[8];
- char size[12];
- char mtime[12];
- char chksum[8];
- char linkflag;
- char linkname[NAMSIZ];
- char magic[8];
- char uname[TUNMLEN];
- char gname[TGNMLEN];
- char devmajor[8];
- char devminor[8];
- } header;
-};
-
-/* The magic field is filled with this if uname and gname are valid. */
-#define TMAGIC "ustar" /* 5 chars and a null */
-#define GNUTMAGIC "ustar " /* 7 chars and a null */
diff --git a/usr.bin/file/text.c b/usr.bin/file/text.c
new file mode 100644
index 00000000000..f835c50cee0
--- /dev/null
+++ b/usr.bin/file/text.c
@@ -0,0 +1,168 @@
+/* $OpenBSD: text.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+
+/*
+ * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
+ * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+
+#include "file.h"
+#include "magic.h"
+#include "xmalloc.h"
+
+static const char *text_words[][3] = {
+ { "msgid", "PO (gettext message catalogue)", "text/x-po" },
+ { "dnl", "M4 macro language pre-processor", "text/x-m4" },
+ { "import", "Java program", "text/x-java" },
+ { "\"libhdr\"", "BCPL program", "text/x-bcpl" },
+ { "\"LIBHDR\"", "BCPL program", "text/x-bcpl" },
+ { "//", "C++ program", "text/x-c++" },
+ { "virtual", "C++ program", "text/x-c++" },
+ { "class", "C++ program", "text/x-c++" },
+ { "public:", "C++ program", "text/x-c++" },
+ { "private:", "C++ program", "text/x-c++" },
+ { "/*", "C program", "text/x-c" },
+ { "#include", "C program", "text/x-c" },
+ { "char", "C program", "text/x-c" },
+ { "The", "English", "text/plain" },
+ { "the", "English", "text/plain" },
+ { "double", "C program", "text/x-c" },
+ { "extern", "C program", "text/x-c" },
+ { "float", "C program", "text/x-c" },
+ { "struct", "C program", "text/x-c" },
+ { "union", "C program", "text/x-c" },
+ { "CFLAGS", "make commands", "text/x-makefile" },
+ { "LDFLAGS", "make commands", "text/x-makefile" },
+ { "all:", "make commands", "text/x-makefile" },
+ { ".PRECIOUS", "make commands", "text/x-makefile" },
+ { ".ascii", "assembler program", "text/x-asm" },
+ { ".asciiz", "assembler program", "text/x-asm" },
+ { ".byte", "assembler program", "text/x-asm" },
+ { ".even", "assembler program", "text/x-asm" },
+ { ".globl", "assembler program", "text/x-asm" },
+ { ".text", "assembler program", "text/x-asm" },
+ { "clr", "assembler program", "text/x-asm" },
+ { "(input", "Pascal program", "text/x-pascal" },
+ { "program", "Pascal program", "text/x-pascal" },
+ { "record", "Pascal program", "text/x-pascal" },
+ { "dcl", "PL/1 program", "text/x-pl1" },
+ { "Received:", "mail", "text/x-mail" },
+ { ">From", "mail", "text/x-mail" },
+ { "Return-Path:", "mail", "text/x-mail" },
+ { "Cc:", "mail", "text/x-mail" },
+ { "Newsgroups:", "news", "text/x-news" },
+ { "Path:", "news", "text/x-news" },
+ { "Organization:", "news", "text/x-news" },
+ { "href=", "HTML document", "text/html" },
+ { "HREF=", "HTML document", "text/html" },
+ { "<body", "HTML document", "text/html" },
+ { "<BODY", "HTML document", "text/html" },
+ { "<html", "HTML document", "text/html" },
+ { "<HTML", "HTML document", "text/html" },
+ { "<!--", "HTML document", "text/html" },
+ { NULL, NULL, NULL }
+};
+
+static int
+text_is_ascii(u_char c)
+{
+ const char cc[] = "\007\010\011\012\014\015\033";
+
+ if (c == '\0')
+ return (0);
+ if (strchr(cc, c) != NULL)
+ return (1);
+ return (c > 31 && c < 127);
+}
+
+static int
+text_is_latin1(u_char c)
+{
+ if (c >= 160)
+ return (1);
+ return (text_is_ascii(c));
+}
+
+static int
+text_is_extended(u_char c)
+{
+ if (c >= 128)
+ return (1);
+ return (text_is_ascii(c));
+}
+
+static int
+text_try_test(const void *base, size_t size, int (*f)(u_char))
+{
+ const u_char *data = base;
+ size_t offset;
+
+ for (offset = 0; offset < size; offset++) {
+ if (!f(data[offset]))
+ return (0);
+ }
+ return (1);
+}
+
+const char *
+text_get_type(const void *base, size_t size)
+{
+ if (text_try_test(base, size, text_is_ascii))
+ return ("ASCII");
+ if (text_try_test(base, size, text_is_latin1))
+ return ("ISO-8859");
+ if (text_try_test(base, size, text_is_extended))
+ return ("Non-ISO extended-ASCII");
+ return (NULL);
+}
+
+const char *
+text_try_words(const void *base, size_t size, int flags)
+{
+ const char *cp, *end, *next, *word;
+ size_t wordlen;
+ u_int i;
+
+ end = (char*)base + size;
+ for (cp = base; cp != end; /* nothing */) {
+ while (cp != end && isspace((u_char)*cp))
+ cp++;
+
+ next = cp;
+ while (next != end && !isspace((u_char)*next))
+ next++;
+
+ for (i = 0; /* nothing */; i++) {
+ word = text_words[i][0];
+ if (word == NULL)
+ break;
+ wordlen = strlen(word);
+
+ if ((size_t)(next - cp) != wordlen)
+ continue;
+ if (memcmp(cp, word, wordlen) != 0)
+ continue;
+ if (flags & MAGIC_TEST_MIME)
+ return (text_words[i][2]);
+ return (text_words[i][1]);
+ }
+
+ cp = next;
+ }
+ return (NULL);
+}
diff --git a/usr.bin/file/xmalloc.c b/usr.bin/file/xmalloc.c
new file mode 100644
index 00000000000..857bcd91569
--- /dev/null
+++ b/usr.bin/file/xmalloc.c
@@ -0,0 +1,103 @@
+/* $OpenBSD: xmalloc.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+/*
+ * Author: Tatu Ylonen <ylo@cs.hut.fi>
+ * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
+ * All rights reserved
+ * Versions of malloc and friends that check their results, and never return
+ * failure (they call fatal if they encounter an error).
+ *
+ * As far as I am concerned, the code I have written for this software
+ * can be used freely for any purpose. Any derived versions of this
+ * software must be clearly marked as such, and if the derived work is
+ * incompatible with the protocol description in the RFC file, it must be
+ * called by a name other than "ssh" or "Secure Shell".
+ */
+
+#include <err.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "xmalloc.h"
+
+void *
+xmalloc(size_t size)
+{
+ void *ptr;
+
+ if (size == 0)
+ errx(1, "xmalloc: zero size");
+ ptr = malloc(size);
+ if (ptr == NULL)
+ errx(1,
+ "xmalloc: out of memory (allocating %zu bytes)",
+ size);
+ return ptr;
+}
+
+void *
+xcalloc(size_t nmemb, size_t size)
+{
+ void *ptr;
+
+ if (size == 0 || nmemb == 0)
+ errx(1, "xcalloc: zero size");
+ if (SIZE_MAX / nmemb < size)
+ errx(1, "xcalloc: nmemb * size > SIZE_MAX");
+ ptr = calloc(nmemb, size);
+ if (ptr == NULL)
+ errx(1, "xcalloc: out of memory (allocating %zu bytes)",
+ (size * nmemb));
+ return ptr;
+}
+
+void *
+xreallocarray(void *ptr, size_t nmemb, size_t size)
+{
+ void *new_ptr;
+
+ new_ptr = reallocarray(ptr, nmemb, size);
+ if (new_ptr == NULL)
+ errx(1, "xreallocarray: out of memory (new_size %zu bytes)",
+ nmemb * size);
+ return new_ptr;
+}
+
+void
+xfree(void *ptr)
+{
+ if (ptr == NULL)
+ errx(1, "xfree: NULL pointer given as argument");
+ free(ptr);
+}
+
+char *
+xstrdup(const char *str)
+{
+ size_t len;
+ char *cp;
+
+ len = strlen(str) + 1;
+ cp = xmalloc(len);
+ if (strlcpy(cp, str, len) >= len)
+ errx(1, "xstrdup: string truncated");
+ return cp;
+}
+
+int
+xasprintf(char **ret, const char *fmt, ...)
+{
+ va_list ap;
+ int i;
+
+ va_start(ap, fmt);
+ i = vasprintf(ret, fmt, ap);
+ va_end(ap);
+
+ if (i < 0 || *ret == NULL)
+ errx(1, "xasprintf: could not allocate memory");
+
+ return (i);
+}
diff --git a/usr.bin/file/xmalloc.h b/usr.bin/file/xmalloc.h
new file mode 100644
index 00000000000..8adb3e6f104
--- /dev/null
+++ b/usr.bin/file/xmalloc.h
@@ -0,0 +1,31 @@
+/* $OpenBSD: xmalloc.h,v 1.1 2015/04/24 16:24:11 nicm Exp $ */
+
+/*
+ * Author: Tatu Ylonen <ylo@cs.hut.fi>
+ * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
+ * All rights reserved
+ * Created: Mon Mar 20 22:09:17 1995 ylo
+ *
+ * Versions of malloc and friends that check their results, and never return
+ * failure (they call fatal if they encounter an error).
+ *
+ * As far as I am concerned, the code I have written for this software
+ * can be used freely for any purpose. Any derived versions of this
+ * software must be clearly marked as such, and if the derived work is
+ * incompatible with the protocol description in the RFC file, it must be
+ * called by a name other than "ssh" or "Secure Shell".
+ */
+
+#ifndef XMALLOC_H
+#define XMALLOC_H
+
+void *xmalloc(size_t);
+void *xcalloc(size_t, size_t);
+void *xreallocarray(void *, size_t, size_t);
+void xfree(void *);
+char *xstrdup(const char *);
+int xasprintf(char **, const char *, ...)
+ __attribute__((__format__ (printf, 2, 3)))
+ __attribute__((__nonnull__ (2)));
+
+#endif /* XMALLOC_H */