summaryrefslogtreecommitdiff
path: root/usr.bin/file/magdir/archive
blob: bd400815444a4fabdad2bcfba6351da1949df6b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

#------------------------------------------------------------------------------
# archive:  file(1) magic for archive formats (see also "msdos" for self-
#           extracting compressed archives)
#
# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
# pre-POSIX "tar" archives are handled in the C code.

# POSIX tar archives
257	string		ustar\0		POSIX tar archive
257	string		ustar\040\040\0	GNU tar archive

# cpio archives
#
# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
# The idea is to indicate archives produced on machines with the same
# byte order as the machine running "file" with "cpio archive", and
# to indicate archives produced on machines with the opposite byte order
# from the machine running "file" with "byte-swapped cpio archive".
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
0	short		070707		cpio archive
0	short		0143561		byte-swapped cpio archive
0	string		070707		ASCII cpio archive (pre-SVR4 or odc)
0	string		070701		ASCII cpio archive (SVR4 with no CRC)
0	string		070702		ASCII cpio archive (SVR4 with CRC)

# other archives
0	long		0177555		very old archive
0	short		0177555		very old PDP-11 archive
0	long		0177545		old archive
0	short		0177545		old PDP-11 archive
0	long		0100554		apl workspace
0	string		=<ar>		archive

# MIPS archive (needs to go first)
#
0	string	!<arch>\n__________E	MIPS archive
>20	string	U			with MIPS Ucode members
>21	string	L			with MIPSEL members
>21	string	B			with MIPSEB members
>19	string	L			and an EL hash table
>19	string	B			and an EB hash table
>22	string	X			-- out of date

0	string		!<arch>		archive
>8	string		__.SYMDEF	random library
0	string		-h-		Software Tools format archive text

#
# XXX - why are there multiple <ar> thingies?  Note that 0x213c6172 is
# "!<ar", so, for new-style (4.xBSD/SVR2andup) archives, we have:
#
# 0	string		!<arch>		current ar archive
# 0	long		0x213c6172	archive file
#
# and for SVR3.1 archives, we have:
#
# 0	string		\<ar>		System V Release 1 ar archive
# 0	string		=<ar>		archive
# 0	string		=<ar>		archive
#
# XXX - did Aegis really store shared libraries, breakpointed modules,
# and absolute code program modules in the same format as new-style
# "ar" archives?
#
0	string		!<arch>		current ar archive
>8	string		__.SYMDEF	random library
>0	belong		=65538		- pre SR9.5
>0	belong		=65539		- post SR9.5
>0	beshort		2		- object archive
>0	beshort		3		- shared library module
>0	beshort		4		- debug break-pointed module
>0	beshort		5		- absolute code program module
0	string		\<ar>		System V Release 1 ar archive
0	string		=<ar>		archive
#
# XXX - from "vax", which appears to collect a bunch of byte-swapped
# thingies, to help you recognize VAX files on big-endian machines;
# with "leshort", "lelong", and "string", that's no longer necessary....
#
0	belong		0x65ff0000	VAX 3.0 archive
0	belong		0x3c61723e	VAX 5.0 archive
#
0	long		0x213c6172	archive file
0	lelong		0177555		very old VAX archive
0	leshort		0177555		very old PDP-11 archive
#
# XXX - "pdp" claims that 0177545 can have an __.SYMDEF member and thus
# be a random library (it said 0xff65 rather than 0177545).
#
0	lelong		0177545		old VAX archive
>8	string		__.SYMDEF	random library
0	leshort		0177545		old PDP-11 archive
>8	string		__.SYMDEF	random library
#
0	string		=<ar>		archive
#
# From "pdp":
#
0	lelong		0x39bed		PDP-11 old archive
0	lelong		0x39bee		PDP-11 4.0 archive
#
0	string		-h-		Software Tools format archive text

# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
#
# The first byte is the magic (0x1a), byte 2 is the compression type for
# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
# filename of the first file (null terminated).  Since some types collide
# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
0	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
0	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
0	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
0	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
0	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
0	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched

# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
# I can't create either SPARK or ArcFS archives so I have not tested this stuff
# [GRR:  the original entries collide with ARC, above; replaced with combined
#  version (not tested)]
#0	byte		0x1a		RISC OS archive
#>1	string		archive		(ArcFS format)
0	string		\032archive	RISC OS archive (ArcFS format)

# ARJ archiver (jason@jarthur.Claremont.EDU)
0	leshort		0xea60		ARJ archive data
>5	byte		x		\b, v%d,
>8	byte		&0x04		multi-volume,
>8	byte		&0x10		slash-switched,
>8	byte		&0x20		backup,
>34	string		x		original name: %s,
>7	byte		0		os: MS-DOS
>7	byte		1		os: PRIMOS
>7	byte		2		os: Unix
>7	byte		3		os: Amiga
>7	byte		4		os: Macintosh
>7	byte		5		os: OS/2
>7	byte		6		os: Apple ][ GS
>7	byte		7		os: Atari ST
>7	byte		8		os: NeXT
>7	byte		9		os: VAX/VMS
>3	byte		>0		%d]

# HA archiver (Greg Roelofs, newt@uchicago.edu)
# This is a really bad format. A file containing HAWAII will match this...
#0	string		HA		HA archive data,
#>2	leshort		=1		1 file,
#>2	leshort		>1		%u files,
#>4	byte&0x0f	=0		first is type CPY
#>4	byte&0x0f	=1		first is type ASC
#>4	byte&0x0f	=2		first is type HSC
#>4	byte&0x0f	=0x0e		first is type DIR
#>4	byte&0x0f	=0x0f		first is type SPECIAL

# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
0	string		HPAK		HPACK archive data

# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
0	string		\351,\001JAM\		JAM archive,
>7	string		>\0			version %.4s
>0x26	byte		=0x27			-
>>0x2b	string          >\0			label %.11s,
>>0x27	lelong		x			serial %08x,
>>0x36	string		>\0			fstype %.8s

# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2	string		-lh0-		LHarc 1.x archive data [lh0]
2	string		-lh1-		LHarc 1.x archive data [lh1]
2	string		-lz4-		LHarc 1.x archive data [lz4]
2	string		-lz5-		LHarc 1.x archive data [lz5]
#	[never seen any but the last; -lh4- reported in comp.compression:]
2	string		-lzs-		LHa 2.x? archive data [lzs]
2	string		-lh -		LHa 2.x? archive data [lh ]
2	string		-lhd-		LHa 2.x? archive data [lhd]
2	string		-lh2-		LHa 2.x? archive data [lh2]
2	string		-lh3-		LHa 2.x? archive data [lh3]
2	string		-lh4-		LHa (2.x) archive data [lh4]
2	string		-lh5-		LHa (2.x) archive data [lh5]
>20	byte		x		- header level %d

# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0	string		Rar!		RAR archive data

# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
0	string		SQSH		squished archive data (Acorn RISCOS)

# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
# I can't figure out the self-extracting form of these buggers...
0	string		UC2\x1a		UC2 archive data

# ZIP archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0	string		PK\003\004	Zip archive data
>4	byte		0x09		\b, at least v0.9 to extract
>4	byte		0x0a		\b, at least v1.0 to extract
>4	byte		0x0b		\b, at least v1.1 to extract
>4	byte		0x14		\b, at least v2.0 to extract

# Zoo archiver
20	lelong		0xfdc4a7dc	Zoo archive data
>4	byte		>48		\b, v%c.
>>6	byte		>47		\b%c
>>>7	byte		>47		\b%c
>32	byte		>0		\b, modify: v%d
>>33	byte		x		\b.%d+
>42	lelong		0xfdc4a7dc	\b,
>>70	byte		>0		extract: v%d
>>>71	byte		x		\b.%d+

# Shell archives
10	string		#\ This\ is\ a\ shell\ archive	shell archive text