1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# ----------------------------Archive Formats--------------------------------------
# POSIX tar archives
0 string ustar\000 POSIX tar archive{offset-adjust:-257}
>8 byte !0
>>8 string x \b, owner user name: "%.32s"
>40 byte !0
>>40 string x \b, owner group name: "%.32s"
0 string ustar\040\040\000 POSIX tar archive (GNU){offset-adjust:-257}
>8 byte !0
>>8 string x \b, owner user name: "%.32s"
>40 byte !0
>>40 string x \b, owner group name: "%.32s"
# Incremental snapshot gnu-tar format from:
# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
0 string GNU\x20tar- GNU tar incremental snapshot data,
>0 string x version: "%s"
# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
0 string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data{offset-adjust:-14}
0 string JARCS JAR (ARJ Software, Inc.) archive data
# PKZIP multi-volume archive
0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
# ZIP compression (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0 string PK\003\004 Zip
>6 leshort &0x01 encrypted
>0 byte x archive data,
>4 byte 0x00 v0.0
>4 byte 0x09 at least v0.9 to extract,
>4 byte 0x0a at least v1.0 to extract,
>4 byte 0x0b at least v1.1 to extract,
>0x161 string WINZIP WinZIP self-extracting,
>4 byte 0x14
>>30 ubelong !0x6d696d65 at least v2.0 to extract,
>18 lelong !0
>>18 lelong <0 invalid
>>18 lelong x compressed size: %d,
>>18 lelong x {jump-to-offset:%d}
>22 lelong !0
>>22 lelong <0 invalid
>>22 lelong x uncompressed size: %d,{extract-delay:End of Zip archive}
>30 byte <0x2D invalid file name,
>30 byte >0x7A invalid file name,
>30 string x name: {raw-replace}
>26 leshort x {raw-string-length:%d}
>30 string x {raw-string:%s
>61 string x \b%s
>92 string x \b%s
>123 string x \b%s
>154 string x \b%s}
# ZIP footer
0 string PK\x05\x06 End of Zip archive
#>10 leshort x number of records: %d,
#>12 leshort x size of central directory: %d
#>20 leshort x {offset-adjust:22+%d}
>20 leshort >0
>>20 leshort x \b, comment: {raw-replace}
>>20 leshort x {raw-string-length:%d}
>>22 string x {raw-string:%s}
# ARJ archiver (jason@jarthur.Claremont.EDU)
0 leshort 0xea60 ARJ archive data,
>2 leshort x header size: %d,
>5 byte <1 invalid
>5 byte >16 invalid
>5 byte x version %d,
>6 byte <1 invalid
>6 byte >16 invalid
>6 byte x minimum version to extract: %d,
>8 byte <0 invalid flags,
>8 byte &0x04 multi-volume,
>8 byte &0x10 slash-switched,
>8 byte &0x20 backup,
>9 byte <0 invalid compression method,
>9 byte >4 invalid compression method,
>9 byte 0 compression method: stored,
>9 byte 1 compression method: compressed most,
>9 byte 2 compression method: compressed,
>9 byte 3 compression method: compressed faster,
>9 byte 4 compression method: compressed fastest,
>10 byte <0 invalid file type
>10 byte >4 invalid file type
>10 byte 0 file type: binary,
>10 byte 1 file type: 7-bit text,
>10 byte 2 file type: comment header,
>10 byte 3 file type: directory,
>10 byte 4 file type: volume label,
>34 byte !0
>>34 string x {file-name:%s}
>>34 string x original name: "%s",
>0xC ledate x original file date: %s,
>0x10 lelong <0 invalid
>0x10 lelong x compressed file size: %d,
>0x14 lelong <0 invalid
>0x14 lelong x uncompressed file size: %d,
>7 byte 0 os: MS-DOS
>7 byte 1 os: PRIMOS
>7 byte 2 os: Unix
>7 byte 3 os: Amiga
>7 byte 4 os: Macintosh
>7 byte 5 os: OS/2
>7 byte 6 os: Apple ][ GS
>7 byte 7 os: Atari ST
>7 byte 8 os: NeXT
>7 byte 9 os: VAX/VMS
>7 byte >9 invalid os
>7 byte <0 invalid os
# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0 string Rar! RAR archive data
# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
0 string HPAK HPACK archive data
# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
0 string \351,\001JAM JAM archive
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
0 string -lzs- LHa 2.x? archive data [lzs] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh\40- LHa 2.x? archive data [lh ] [NSRL|LHA2]{offset-adjust:-2}
0 string -lhd- LHa 2.x? archive data [lhd] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh2- LHa 2.x? archive data [lh2] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh3- LHa 2.x? archive data [lh3] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh4- LHa (2.x) archive data [lh4] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh5- LHa (2.x) archive data [lh5] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh6- LHa (2.x) archive data [lh6] [NSRL|LHA2]{offset-adjust:-2}
0 string -lh7- LHa (2.x) archive data [lh7] [NSRL|LHA2]{offset-adjust:-2}
# cpio archives
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
#0 string 070707 ASCII cpio archive (pre-SVR4 or odc)
# WARNING: The jump-to-offset value in the ASCII cpio signatures below is a terrible hack.
# This keyword is not intended to be passed a string (%s), and doing so can open
# up the possibility of keyword injection by a malicious file. This works here though, because:
#
# 1) It would result in an invalid CPIO file (invalid size)
# 2) All valid keywords require more than 8 bytes, so a valid one can't be
# injected in the %.8s field.
0 string 070701 ASCII cpio archive (SVR4 with no CRC),
>110 byte 0 invalid
#>110 byte !0x2F
#>>110 string !TRAILER!!! invalid
>94 byte <0x30 invalid
>94 byte >0x66 invalid
>54 byte <0x30 invalid
>54 byte >0x66 invalid
>110 string x file name: "%s",
>94 string x file name length: "0x%.8s",
>54 string x file size: "0x%.8s"
>54 string x {jump-to-offset:0x%.8s+110+
>94 string x \b0x%.8s}
0 string 070702 ASCII cpio archive (SVR4 with CRC)
>110 byte 0 invalid
#>110 byte !0x2F
#>>110 string !TRAILER!!! invalid
>94 byte <0x30 invalid
>94 byte >0x66 invalid
>54 byte <0x30 invalid
>54 byte >0x66 invalid
>110 string x file name: "%s",
>94 string x file name length: "0x%.8s",
>54 string x file size: "0x%.8s"
>54 string x {jump-to-offset:0x%.8s+110+
>94 string x \b0x%.8s}
# HP Printer Job Language
# The header found on Win95 HP plot files is the "Silliest Thing possible"
# (TM)
# Every driver puts the language at some random position, with random case
# (LANGUAGE and Language)
# For example the LaserJet 5L driver puts the "PJL ENTER LANGUAGE" in line 10
# From: Uwe Bonnes <bon@elektron.ikp.physik.th-darmstadt.de>
#
0 string \033%-12345X@PJL HP Printer Job Language data
>&0 string >\0 "%s"
>>&0 string >\0 "%s"
>>>&0 string >\0 "%s"
>>>>&0 string >\0 "%s"
#------------------------------------------------------------------------------
#
# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt@redhat.com)
#
0 belong 0xedabeedb RPM
>4 byte x v%d
>6 beshort 0 bin
>6 beshort 1 src
>8 beshort 1 i386
>8 beshort 2 Alpha
>8 beshort 3 Sparc
>8 beshort 4 MIPS
>8 beshort 5 PowerPC
>8 beshort 6 68000
>8 beshort 7 SGI
>8 beshort 8 RS6000
>8 beshort 9 IA64
>8 beshort 10 Sparc64
>8 beshort 11 MIPSel
>8 beshort 12 ARM
>10 string x "%s"
# IBM AIX Backup File Format header and entry signatures
0 lelong 0xea6b0009 BFF volume header,
>4 leshort x checksum: 0x%.4X,
>6 leshort <0 invalid
>6 leshort 0 invalid
>6 leshort x volume number: %d,
>8 ledate x current date: %s,
>12 ledate x starting date: %s,
>20 string x disk name: "%s",
>36 string x file system name: "%s",
>52 string x user name: "%s"
0 leshort 0xea6b BFF volume entry,{offset-adjust:-2}
>22 lelong <0 invalid
>22 lelong 0 directory,
>22 lelong >0
>>22 lelong x file size: %d,
>>54 lelong <0 invalid
>>54 lelong 0 invalid
>>54 lelong x compressed size: %d,
>58 lelong !0 invalid
>62 byte 0 invalid
>62 byte !0x2e
>>62 byte !0x2f invalid
>62 string x file name: "%s
>92 string x \b%s"
0 leshort 0xea6c BFF volume entry, compressed,{offset-adjust:-2}
>22 lelong <0 invalid
>22 lelong 0 directory,
>22 lelong >0
>>22 lelong x file size: %d,
>>54 lelong <0 invalid
>>54 lelong 0 invalid
>>54 lelong x compressed size: %d,
>58 lelong !0 invalid
>62 byte 0 invalid
>62 byte !0x2e
>>62 byte !0x2f invalid
>62 string x file name: "%s
>92 string x \b%s"
0 leshort 0xea6d BFF volume entry, AIXv3,{offset-adjust:-2}
>22 lelong <0 invalid
>22 lelong 0 directory,
>22 lelong >0
>>22 lelong x file size: %d,
>>54 lelong <0 invalid
>>54 lelong 0 invalid
>>54 lelong x compressed size: %d,
>58 lelong !0 invalid
>62 byte 0 invalid
>62 byte !0x2e
>>62 byte !0x2f invalid
>62 string x file name: "%s
>92 string x \b%s"
#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0 string MSCF\0\0\0\0 Microsoft Cabinet archive data
# According to libmagic comments, CAB version number is always 1.3
>25 byte !1 \b,invalid major version
>24 byte !3 \b,invalid minor version
>8 lelong x \b, %u bytes
>28 leshort 0 \b, 0 files (invalid)
>28 leshort 1 \b, 1 file
>28 leshort >1 \b, %u files
# InstallShield Cabinet files
0 string ISc( InstallShield Cabinet archive data
# TODO: Version number checks should be made more specific for false positive filtering
>5 byte&0xf0 =0x60 version 6,
>5 byte&0xf0 <0x60 version 4/5,
>5 byte&0xf0 >0x60 invalid version,
>12 lelong <0 invalid offset,
>12 lelong >100000 invalid offset,
>(12.l+40) lelong x %u files
# Windows CE package files
0 string MSCE\0\0\0\0 Microsoft WinCE install header
>20 lelong 0 \b, architecture-independent
>20 lelong 103 \b, Hitachi SH3
>20 lelong 104 \b, Hitachi SH4
>20 lelong 0xA11 \b, StrongARM
>20 lelong 4000 \b, MIPS R4000
>20 lelong 10003 \b, Hitachi SH3
>20 lelong 10004 \b, Hitachi SH3E
>20 lelong 10005 \b, Hitachi SH4
>20 lelong 70001 \b, ARM 7TDMI
>52 leshort 1 \b, 1 file
>52 leshort >1 \b, %u files
>56 leshort 1 \b, 1 registry entry
>56 leshort >1 \b, %u registry entries
0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data
# Parity archive reconstruction file, the 'par' file format now used on Usenet.
0 string PAR\0 PARity archive data
>48 leshort =0 - Index file
>48 leshort >0 - file number %d
# Felix von Leitner <felix-file@fefe.de>
0 string d8:announce BitTorrent file