summaryrefslogtreecommitdiff
path: root/usr.bin/file/magdir/archive
diff options
context:
space:
mode:
Diffstat (limited to 'usr.bin/file/magdir/archive')
-rw-r--r--usr.bin/file/magdir/archive472
1 files changed, 449 insertions, 23 deletions
diff --git a/usr.bin/file/magdir/archive b/usr.bin/file/magdir/archive
index 95fec013ef8..6c9351b516f 100644
--- a/usr.bin/file/magdir/archive
+++ b/usr.bin/file/magdir/archive
@@ -1,4 +1,4 @@
-# $OpenBSD: archive,v 1.4 2004/06/03 03:14:19 tedu Exp $
+# $OpenBSD: archive,v 1.5 2008/05/08 01:40:57 chl Exp $
#------------------------------------------------------------------------------
# archive: file(1) magic for archive formats (see also "msdos" for self-
@@ -30,12 +30,17 @@
# Debian package (needs to go before regular portable archives)
#
-0 string !<arch>\ndebian
+0 string =!<arch>\ndebian
>8 string debian-split part of multipart Debian package
>8 string debian-binary Debian binary package
>68 string >\0 (format %s)
->81 string bz2 \b, uses bzip2 compression
->84 string gz \b, uses gzip compression
+# These next two lines do not work, because a bzip2 Debian archive
+# still uses gzip for the control.tar (first in the archive). Only
+# data.tar varies, and the location of its filename varies too.
+# file/libmagic does not current have support for ascii-string based
+# (offsets) as of 2005-09-15.
+#>81 string bz2 \b, uses bzip2 compression
+#>84 string gz \b, uses gzip compression
#>136 ledate x created: %s
# other archives
@@ -48,7 +53,7 @@
# MIPS archive (needs to go before regular portable archives)
#
-0 string !<arch>\n__________E MIPS archive
+0 string =!<arch>\n__________E MIPS archive
>20 string U with MIPS Ucode members
>21 string L with MIPSEL members
>21 string B with MIPSEB members
@@ -62,7 +67,7 @@
# XXX - why are there multiple <ar> thingies? Note that 0x213c6172 is
# "!<ar", so, for new-style (4.xBSD/SVR2andup) archives, we have:
#
-# 0 string !<arch> current ar archive
+# 0 string =!<arch> current ar archive
# 0 long 0x213c6172 archive file
#
# and for SVR1 archives, we have:
@@ -74,7 +79,7 @@
# and absolute code program modules in the same format as new-style
# "ar" archives?
#
-0 string !<arch> current ar archive
+0 string =!<arch> current ar archive
>8 string __.SYMDEF random library
>0 belong =65538 - pre SR9.5
>0 belong =65539 - post SR9.5
@@ -122,14 +127,312 @@
0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
+# [JW] stuff taken from idarc, obviously ARC successors:
+0 lelong&0x8080ffff 0x00000a1a PAK archive data
+0 lelong&0x8080ffff 0x0000141a ARC+ archive data
+0 lelong&0x8080ffff 0x0000481a HYP archive data
# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
# I can't create either SPARK or ArcFS archives so I have not tested this stuff
# [GRR: the original entries collide with ARC, above; replaced with combined
# version (not tested)]
-#0 byte 0x1a RISC OS archive
-#>1 string archive (ArcFS format)
+#0 byte 0x1a RISC OS archive (spark format)
0 string \032archive RISC OS archive (ArcFS format)
+0 string Archive\000 RISC OS archive (ArcFS format)
+
+# All these were taken from idarc, many could not be verified. Unfortunately,
+# there were many low-quality sigs, i.e. easy to trigger false positives.
+# Please notify me of any real-world fishy/ambiguous signatures and I'll try
+# to get my hands on the actual archiver and see if I find something better. [JW]
+# probably many can be enhanced by finding some 0-byte or control char near the start
+
+# idarc calls this Crush/Uncompressed... *shrug*
+0 string CRUSH Crush archive data
+# Squeeze It (.sqz)
+0 string HLSQZ Squeeze It archive data
+# SQWEZ
+0 string SQWEZ SQWEZ archive data
+# HPack (.hpk)
+0 string HPAK HPack archive data
+# HAP
+0 string \x91\x33HF HAP archive data
+# MD/MDCD
+0 string MDmd MDCD archive data
+# LIM
+0 string LIM\x1a LIM archive data
+# SAR
+3 string LH5 SAR archive data
+# BSArc/BS2
+0 string \212\3SB \0 BSArc/BS2 archive data
+# MAR
+2 string =-ah MAR archive data
+# ACB
+0 belong&0x00f800ff 0x00800000 ACB archive data
+# CPZ
+# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data
+# JRC
+0 string JRchive JRC archive data
+# Quantum
+0 string DS\0 Quantum archive data
+# ReSOF
+0 string PK\3\6 ReSOF archive data
+# QuArk
+0 string 7\4 QuArk archive data
+# YAC
+14 string YC YAC archive data
+# X1
+0 string X1 X1 archive data
+0 string XhDr X1 archive data
+# CDC Codec (.dqt)
+0 belong&0xffffe000 0x76ff2000 CDC Codec archive data
+# AMGC
+0 string \xad6" AMGC archive data
+# NuLIB
+0 string NõFélå NuLIB archive data
+# PakLeo
+0 string LEOLZW PAKLeo archive data
+# ChArc
+0 string SChF ChArc archive data
+# PSA
+0 string PSA PSA archive data
+# CrossePAC
+0 string DSIGDCC CrossePAC archive data
+# Freeze
+0 string \x1f\x9f\x4a\x10\x0a Freeze archive data
+# KBoom
+0 string ¨MP¨ KBoom archive data
+# NSQ, must go after CDC Codec
+0 string \x76\xff NSQ archive data
+# DPA
+0 string Dirk\ Paehl DPA archive data
+# BA
+# TODO: idarc says "bytes 0-2 == bytes 3-5"
+# TTComp
+0 string \0\6 TTComp archive data
+# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
+0 string ESP ESP archive data
+# ZPack
+0 string \1ZPK\1 ZPack archive data
+# Sky
+0 string \xbc\x40 Sky archive data
+# UFA
+0 string UFA UFA archive data
+# Dry
+0 string =-H2O DRY archive data
+# FoxSQZ
+0 string FOXSQZ FoxSQZ archive data
+# AR7
+0 string ,AR7 AR7 archive data
+# PPMZ
+0 string PPMZ PPMZ archive data
+# MS Compress
+4 string \x88\xf0\x27 MS Compress archive data
+# updated by Joerg Jenderek
+>9 string \0
+>>0 string KWAJ
+>>>7 string \321\003 MS Compress archive data
+>>>>14 ulong >0 \b, original size: %ld bytes
+>>>>18 ubyte >0x65
+>>>>>18 string x \b, was %.8s
+>>>>>(10.b-4) string x \b.%.3s
+# MP3 (archiver, not lossy audio compression)
+0 string MP3\x1a MP3-Archiver archive data
+# ZET
+0 string OZÝ ZET archive data
+# TSComp
+0 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
+# ARQ
+0 string gW\4\1 ARQ archive data
+# Squash
+3 string OctSqu Squash archive data
+# Terse
+0 string \5\1\1\0 Terse archive data
+# PUCrunch
+0 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
+# UHarc
+0 string UHA UHarc archive data
+# ABComp
+0 string \2AB ABComp archive data
+0 string \3AB2 ABComp archive data
+# CMP
+0 string CO\0 CMP archive data
+# Splint
+0 string \x93\xb9\x06 Splint archive data
+# InstallShield
+0 string \x13\x5d\x65\x8c InstallShield Z archive Data
+# Gather
+1 string GTH Gather archive data
+# BOA
+0 string BOA BOA archive data
+# RAX
+0 string ULEB\xa RAX archive data
+# Xtreme
+0 string ULEB\0 Xtreme archive data
+# Pack Magic
+0 string @â\1\0 Pack Magic archive data
+# BTS
+0 belong&0xfeffffff 0x1a034465 BTS archive data
+# ELI 5750
+0 string Ora\ ELI 5750 archive data
+# QFC
+0 string \x1aFC\x1a QFC archive data
+0 string \x1aQF\x1a QFC archive data
+# PRO-PACK
+0 string RNC PRO-PACK archive data
+# 777
+0 string 777 777 archive data
+# LZS221
+0 string sTaC LZS221 archive data
+# HPA
+0 string HPA HPA archive data
+# Arhangel
+0 string LG Arhangel archive data
+# EXP1, uses bzip2
+0 string 0123456789012345BZh EXP1 archive data
+# IMP
+0 string IMP\xa IMP archive data
+# NRV
+0 string \x00\x9E\x6E\x72\x76\xFF NRV archive data
+# Squish
+0 string \x73\xb2\x90\xf4 Squish archive data
+# Par
+0 string PHILIPP Par archive data
+0 string PAR Par archive data
+# HIT
+0 string UB HIT archive data
+# SBX
+0 belong&0xfffff000 0x53423000 SBX archive data
+# NaShrink
+0 string NSK NaShrink archive data
+# SAPCAR
+0 string #\ CAR\ archive\ header SAPCAR archive data
+0 string CAR\ 2.00RG SAPCAR archive data
+# Disintegrator
+0 string DST Disintegrator archive data
+# ASD
+0 string ASD ASD archive data
+# InstallShield CAB
+0 string ISc( InstallShield CAB
+# TOP4
+0 string T4\x1a TOP4 archive data
+# BatComp left out: sig looks like COM executable
+# so TODO: get real 4dos batcomp file and find sig
+# BlakHole
+0 string BH\5\7 BlakHole archive data
+# BIX
+0 string BIX0 BIX archive data
+# ChiefLZA
+0 string ChfLZ ChiefLZA archive data
+# Blink
+0 string Blink Blink archive data
+# Logitech Compress
+0 string \xda\xfa Logitech Compress archive data
+# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
+1 string (C)\ STEPANYUK ARS-Sfx archive data
+# AKT/AKT32
+0 string AKT32 AKT32 archive data
+0 string AKT AKT archive data
+# NPack
+0 string MSTSM NPack archive data
+# PFT
+0 string \0\x50\0\x14 PFT archive data
+# SemOne
+0 string SEM SemOne archive data
+# PPMD
+0 string \x8f\xaf\xac\x84 PPMD archive data
+# FIZ
+0 string FIZ FIZ archive data
+# MSXiE
+0 belong&0xfffff0f0 0x4d530000 MSXiE archive data
+# DeepFreezer
+0 belong&0xfffffff0 0x797a3030 DeepFreezer archive data
+# DC
+0 string =<DC- DC archive data
+# TPac
+0 string \4TPAC\3 TPac archive data
+# Ai
+0 string Ai\1\1\0 Ai archive data
+0 string Ai\1\0\0 Ai archive data
+# Ai32
+0 string Ai\2\0 Ai32 archive data
+0 string Ai\2\1 Ai32 archive data
+# SBC
+0 string SBC SBC archive data
+# Ybs
+0 string YBS Ybs archive data
+# DitPack
+0 string \x9e\0\0 DitPack archive data
+# DMS
+0 string DMS! DMS archive data
+# EPC
+0 string \x8f\xaf\xac\x8c EPC archive data
+# VSARC
+0 string VS\x1a VSARC archive data
+# PDZ
+0 string PDZ PDZ archive data
+# ReDuq
+0 string rdqx ReDuq archive data
+# GCA
+0 string GCAX GCA archive data
+# PPMN
+0 string pN PPMN archive data
+# WinImage
+3 string WINIMAGE WinImage archive data
+# Compressia
+0 string CMP0CMP Compressia archive data
+# UHBC
+0 string UHB UHBC archive data
+# WinHKI
+0 string \x61\x5C\x04\x05 WinHKI archive data
+# WWPack data file
+0 string WWP WWPack archive data
+# BSN (BSA, PTS-DOS)
+0 string \xffBSG BSN archive data
+1 string \xffBSG BSN archive data
+3 string \xffBSG BSN archive data
+1 string \0\xae\2 BSN archive data
+1 string \0\xae\3 BSN archive data
+1 string \0\xae\7 BSN archive data
+# AIN
+0 string \x33\x18 AIN archive data
+0 string \x33\x17 AIN archive data
+# XPA32
+0 string xpa\0\1 XPA32 archive data
+# SZip (TODO: doesn't catch all versions)
+0 string SZ\x0a\4 SZip archive data
+# XPack DiskImage
+0 string jm XPack DiskImage archive data
+# XPack Data
+0 string xpa XPack archive data
+# XPack Single Data
+0 string Í\ jm XPack single archive data
+
+# TODO: missing due to unknown magic/magic at end of file:
+#DWC
+#ARG
+#ZAR
+#PC/3270
+#InstallIt
+#RKive
+#RK
+#XPack Diskimage
+
+# These were inspired by idarc, but actually verified
+# Dzip archiver (.dz)
+0 string DZ Dzip archive data
+>2 byte x \b, version %i
+>3 byte x \b.%i
+# ZZip archiver (.zz)
+0 string ZZ\ \0\0 ZZip archive data
+0 string ZZ0 ZZip archive data
+# PAQ archiver (.paq)
+0 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
+0 string PAQ PAQ archive data
+>3 byte&0xf0 0x30
+>>3 byte x (v%c)
+# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
+0xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data
+0 string JARCS JAR (ARJ Software, Inc.) archive data
# ARJ archiver (jason@jarthur.Claremont.EDU)
0 leshort 0xea60 ARJ archive data
@@ -149,6 +452,8 @@
>7 byte 8 os: NeXT
>7 byte 9 os: VAX/VMS
>3 byte >0 %d]
+# [JW] idarc says this is also possible
+2 leshort 0xea60 ARJ archive data
# HA archiver (Greg Roelofs, newt@uchicago.edu)
# This is a really bad format. A file containing HAWAII will match this...
@@ -160,12 +465,21 @@
#>4 byte&0x0f =2 first is type HSC
#>4 byte&0x0f =0x0e first is type DIR
#>4 byte&0x0f =0x0f first is type SPECIAL
+# suggestion: at least identify small archives (<1024 files)
+0 belong&0xffff00fc 0x48410000 HA archive data
+>2 leshort =1 1 file,
+>2 leshort >1 %u files,
+>4 byte&0x0f =0 first is type CPY
+>4 byte&0x0f =1 first is type ASC
+>4 byte&0x0f =2 first is type HSC
+>4 byte&0x0f =0x0e first is type DIR
+>4 byte&0x0f =0x0f first is type SPECIAL
# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
0 string HPAK HPACK archive data
# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
-0 string \351,\001JAM\ JAM archive,
+0 string \351,\001JAM\ JAM archive,
>7 string >\0 version %.4s
>0x26 byte =0x27 -
>>0x2b string >\0 label %.11s,
@@ -173,12 +487,12 @@
>>0x36 string >\0 fstype %.8s
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
-2 string -lh0- LHarc 1.x archive data [lh0]
-2 string -lh1- LHarc 1.x archive data [lh1]
+2 string -lh0- LHarc 1.x/ARX archive data [lh0]
+2 string -lh1- LHarc 1.x/ARX archive data [lh1]
2 string -lz4- LHarc 1.x archive data [lz4]
2 string -lz5- LHarc 1.x archive data [lz5]
# [never seen any but the last; -lh4- reported in comp.compression:]
-2 string -lzs- LHa 2.x? archive data [lzs]
+2 string -lzs- LHa/LZS archive data [lzs]
2 string -lh\40- LHa 2.x? archive data [lh ]
2 string -lhd- LHa 2.x? archive data [lhd]
2 string -lh2- LHa 2.x? archive data [lh2]
@@ -186,25 +500,99 @@
2 string -lh4- LHa (2.x) archive data [lh4]
2 string -lh5- LHa (2.x) archive data [lh5]
2 string -lh6- LHa (2.x) archive data [lh6]
-2 string -lh7- LHa (2.x) archive data [lh7]
+2 string -lh7- LHa (2.x)/LHark archive data [lh7]
>20 byte x - header level %d
+# taken from idarc [JW]
+2 string -lZ PUT archive data
+2 string -lz LZS archive data
+2 string -sw1- Swag archive data
# RAR archiver (Greg Roelofs, newt@uchicago.edu)
-0 string Rar! RAR archive data
+0 string Rar! RAR archive data,
+>44 byte x v%0x,
+>10 byte >0 flags:
+>>10 byte &0x01 Archive volume,
+>>10 byte &0x02 Commented,
+>>10 byte &0x04 Locked,
+>>10 byte &0x08 Solid,
+>>10 byte &0x20 Authenticated,
+>35 byte 0 os: MS-DOS
+>35 byte 1 os: OS/2
+>35 byte 2 os: Win32
+>35 byte 3 os: Unix
+# some old version? idarc says:
+0 string RE\x7e\x5e RAR archive data
# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
0 string SQSH squished archive data (Acorn RISCOS)
# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
-# I can't figure out the self-extracting form of these buggers...
+# [JW] see exe section for self-extracting version
0 string UC2\x1a UC2 archive data
# ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
-0 string PK\003\004 Zip archive data
->4 byte 0x09 \b, at least v0.9 to extract
->4 byte 0x0a \b, at least v1.0 to extract
->4 byte 0x0b \b, at least v1.1 to extract
->4 byte 0x14 \b, at least v2.0 to extract
+0 string PK\003\004
+>4 byte 0x09 Zip archive data, at least v0.9 to extract
+>4 byte 0x0a Zip archive data, at least v1.0 to extract
+>4 byte 0x0b Zip archive data, at least v1.1 to extract
+>4 byte 0x14
+>>30 ubelong !0x6d696d65 Zip archive data, at least v2.0 to extract
+>0x161 string WINZIP Zip archive data, WinZIP self-extracting
+
+
+# OpenOffice.org / KOffice / StarOffice documents
+# From: Abel Cheung <abel@oaka.org>
+# Listed here because they are basically zip files
+>>30 string mimetype
+
+# KOffice (1.2 or above) formats
+>>>50 string vnd.kde. KOffice (>=1.2)
+>>>>58 string karbon Karbon document
+>>>>58 string kchart KChart document
+>>>>58 string kformula KFormula document
+>>>>58 string kivio Kivio document
+>>>>58 string kontour Kontour document
+>>>>58 string kpresenter KPresenter document
+>>>>58 string kspread KSpread document
+>>>>58 string kword KWord document
+
+# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
+>>>50 string vnd.sun.xml. OpenOffice.org 1.x
+>>>>62 string writer Writer
+>>>>>68 byte !0x2e document
+>>>>>68 string .template template
+>>>>>68 string .global global document
+>>>>62 string calc Calc
+>>>>>66 byte !0x2e spreadsheet
+>>>>>66 string .template template
+>>>>62 string draw Draw
+>>>>>66 byte !0x2e document
+>>>>>66 string .template template
+>>>>62 string impress Impress
+>>>>>69 byte !0x2e presentation
+>>>>>69 string .template template
+>>>>62 string math Math document
+
+# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
+# http://lists.oasis-open.org/archives/office/200505/msg00006.html
+>>>50 string vnd.oasis.opendocument. OpenDocument
+>>>>73 string text
+>>>>>77 byte !0x2d Text
+>>>>>77 string -template Text Template
+>>>>>77 string -web HTML Document Template
+>>>>>77 string -master Master Document
+>>>>73 string graphics Drawing
+>>>>>81 string -template Template
+>>>>73 string presentation Presentation
+>>>>>85 string -template Template
+>>>>73 string spreadsheet Spreadsheet
+>>>>>84 string -template Template
+>>>>73 string chart Chart
+>>>>>78 string -template Template
+>>>>73 string formula Formula
+>>>>>80 string -template Template
+>>>>73 string database Database
+>>>>73 string image Image
# Zoo archiver
20 lelong 0xfdc4a7dc Zoo archive data
@@ -278,7 +666,7 @@
# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
# by Stefan `Sec` Zehl <sec@42.org>
-7 string **ACE** ACE compressed archive
+7 string **ACE** ACE archive data
>15 byte >0 version %d
>16 byte =0x00 \b, from MS-DOS
>16 byte =0x01 \b, from OS/2
@@ -300,9 +688,47 @@
>5 leshort &0x0400 \b, small dictionary
>5 leshort &0x0800 \b, multi-volume
>5 leshort &0x1000 \b, contains AV-String
->>30 string\x16*UNREGISTERED\x20VERSION* (unregistered)
+>>30 string \x16*UNREGISTERED\x20VERSION* (unregistered)
>5 leshort &0x2000 \b, with recovery record
>5 leshort &0x4000 \b, locked
>5 leshort &0x8000 \b, solid
# Date in MS-DOS format (whatever that is)
#>18 lelong x Created on
+
+# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
+# <doj@cubic.org>
+0x1A string sfArk sfArk compressed Soundfont
+>0x15 string 2
+>>0x1 string >\0 Version %s
+>>0x2A string >\0 : %s
+
+# DR-DOS 7.03 Packed File *.??_
+0 string Packed\ File\ Personal NetWare Packed File
+>12 string x \b, was "%.12s"
+
+# EET archive
+# From: Tilman Sauerbeck <tilman@code-monkey.de>
+0 belong 0x1ee7ff00 EET archive
+
+# rzip archives
+0 string RZIP rzip compressed data
+>4 byte x - version %d
+>5 byte x \b.%d
+>6 belong x (%d bytes)
+
+# From: "Robert Dale" <robdale@gmail.com>
+0 belong 123 dar archive,
+>4 belong x label "%.8x
+>>8 belong x %.8x
+>>>12 beshort x %.4x"
+>14 byte 0x54 end slice
+>14 beshort 0x4e4e multi-part
+>14 beshort 0x4e53 multi-part, with -S
+
+# Symbian installation files
+# http://www.thouky.co.uk/software/psifs/sis.html
+# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
+8 lelong 0x10000419 Symbian installation file
+>4 lelong 0x1000006D (EPOC release 3/4/5)
+>4 lelong 0x10003A12 (EPOC release 6)
+0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)