# ----------------------------Archive Formats--------------------------------------

# POSIX tar archives
0	string		ustar\000		POSIX tar archive{offset-adjust:-257}
>8	byte		!0
>>8	string		x			\b, owner user name: "%.32s"
>40	byte		!0
>>40	string		x			\b, owner group name: "%.32s"

0	string          ustar\040\040\000	POSIX tar archive (GNU){offset-adjust:-257}
>8	byte		!0
>>8	string		x			\b, owner user name: "%.32s"
>40	byte		!0
>>40	string		x			\b, owner group name: "%.32s"

# Incremental snapshot gnu-tar format from:
# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
0       string          GNU\x20tar-     GNU tar incremental snapshot data,
>0	string		x		version: "%s"

# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
0       string  \x1aJar\x1b JAR (ARJ Software, Inc.) archive data{offset-adjust:-14}
0       string  JARCS JAR (ARJ Software, Inc.) archive data

# PKZIP multi-volume archive
0       string          PK\x07\x08PK\x03\x04    Zip multi-volume archive data, at least PKZIP v2.50 to extract

# ZIP compression (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
0		string		PK\003\004      Zip
>6		leshort		&0x01			encrypted
>0		byte		x				archive data,
>4      byte		0x00            v0.0
>4      byte		0x09            at least v0.9 to extract,
>4      byte		0x0a            at least v1.0 to extract,
>4      byte		0x0b            at least v1.1 to extract,
>0x161  string		WINZIP          WinZIP self-extracting,
>4      byte		0x14
>>30    ubelong		!0x6d696d65     at least v2.0 to extract,
>18		lelong		!0
>>18	lelong		<0				invalid
>>18	lelong		x				compressed size: %d,
>>18	lelong		x				{jump-to-offset:%d}
>22		lelong		!0
>>22	lelong		<0				invalid
>>22	lelong		x				uncompressed size: %d,{extract-delay:End of Zip archive}
>30     byte        <0x2D           invalid file name,
>30     byte        >0x7A           invalid file name, 
>30		string		x				name: {raw-replace}
>26		leshort		x				{raw-string-length:%d}
>30		string		x				{raw-string:%s
>61		string		x				\b%s
>92		string		x				\b%s
>123	string		x				\b%s
>154	string		x				\b%s}

# ZIP footer
0	string		PK\x05\x06	End of Zip archive
#>10	leshort		x		number of records: %d,
#>12	leshort		x		size of central directory: %d
#>20	leshort		x		{offset-adjust:22+%d}
>20	leshort		>0
>>20	leshort		x		\b, comment: {raw-replace}
>>20	leshort		x		{raw-string-length:%d}
>>22	string		x		{raw-string:%s}

# ARJ archiver (jason@jarthur.Claremont.EDU)
0       leshort         0xea60          ARJ archive data,
>2	leshort		x		header size: %d,
>5	byte		<1		invalid
>5	byte		>16		invalid
>5      byte            x               version %d,
>6	byte		<1		invalid
>6	byte		>16		invalid
>6	byte		x		minimum version to extract: %d,
>8	byte		<0		invalid flags,
>8      byte            &0x04           multi-volume,
>8      byte            &0x10           slash-switched,
>8      byte            &0x20           backup,
>9	byte		<0		invalid compression method,
>9	byte		>4		invalid compression method,
>9	byte		0		compression method: stored,
>9	byte		1		compression method: compressed most,
>9	byte		2		compression method: compressed,
>9	byte		3		compression method: compressed faster,
>9	byte		4		compression method: compressed fastest,
>10	byte		<0		invalid file type
>10	byte		>4		invalid file type
>10	byte 		0		file type: binary,
>10	byte 		1		file type: 7-bit text,
>10	byte 		2		file type: comment header,
>10	byte 		3		file type: directory,
>10	byte 		4		file type: volume label,
>34	byte		!0
>>34	string		x		{file-name:%s}
>>34    string          x               original name: "%s",
>0xC	ledate		x		original file date: %s,
>0x10	lelong		<0		invalid
>0x10	lelong		x		compressed file size: %d,
>0x14	lelong		<0		invalid
>0x14	lelong		x		uncompressed file size: %d,
>7      byte            0               os: MS-DOS 
>7      byte            1               os: PRIMOS
>7      byte            2               os: Unix
>7      byte            3               os: Amiga
>7      byte            4               os: Macintosh
>7      byte            5               os: OS/2
>7      byte            6               os: Apple ][ GS
>7      byte            7               os: Atari ST
>7      byte            8               os: NeXT
>7      byte            9               os: VAX/VMS
>7	byte		>9		invalid os
>7	byte		<0		invalid os

# RAR archiver (Greg Roelofs, newt@uchicago.edu)
0	string		Rar!		RAR archive data

# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
0	string		HPAK		HPACK archive data

# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
0	string		\351,\001JAM	JAM archive

# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
0	string		-lzs-		LHa 2.x? archive data [lzs] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh\40-		LHa 2.x? archive data [lh ] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lhd-		LHa 2.x? archive data [lhd] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh2-		LHa 2.x? archive data [lh2] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh3-		LHa 2.x? archive data [lh3] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh4-		LHa (2.x) archive data [lh4] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh5-		LHa (2.x) archive data [lh5] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh6-		LHa (2.x) archive data [lh6] [NSRL|LHA2]{offset-adjust:-2}
0	string		-lh7-		LHa (2.x) archive data [lh7] [NSRL|LHA2]{offset-adjust:-2}


# cpio archives
#
# The SVR4 "cpio(4)" hints that there are additional formats, but they
# are defined as "short"s; I think all the new formats are
# character-header formats and thus are strings, not numbers.
#0       string          070707          ASCII cpio archive (pre-SVR4 or odc)

# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack.
#          This keyword is not intended to be passed a string (%s), and doing so can open
#          up the possibility of keyword injection by a malicious file. This works here though, because:
#
#          	1) It would result in an invalid CPIO file (invalid size)
#               2) All valid keywords require more than 8 bytes, so a valid one can't be
#                  injected in the %.11s field.

0       string          070701          ASCII cpio archive (SVR4 with no CRC),
>110	byte		0		invalid
#>110	byte		!0x2F
#>>110	string		!TRAILER!!!	invalid
>110	string		x		file name: "%s",
>94	string		x		file name length: "0x%.8s",
>54	string		x		file size: "0x%.8s"
>54	string		x		{jump-to-offset:0x%.8s+110+
>94	string		x		\b0x%.8s}

0       string          070702          ASCII cpio archive (SVR4 with CRC)
>110	byte		0		invalid
#>110	byte		!0x2F
#>>110	string		!TRAILER!!!	invalid
>110	string		x		file name: "%s",
>94	string		x		file name length: "0x%.8s",
>54	string		x		file size: "0x%.8s"
>54	string		x		{jump-to-offset:0x%.8s+110+
>94	string		x		\b0x%.8s}


# HP Printer Job Language
# The header found on Win95 HP plot files is the "Silliest Thing possible" 
# (TM)
# Every driver puts the language at some random position, with random case
# (LANGUAGE and Language)
# For example the LaserJet 5L driver puts the "PJL ENTER LANGUAGE" in line 10
# From: Uwe Bonnes <bon@elektron.ikp.physik.th-darmstadt.de>
# 
0       string          \033%-12345X@PJL        HP Printer Job Language data
>&0     string          >\0                     "%s"
>>&0    string          >\0                     "%s"
>>>&0   string          >\0                     "%s"
>>>>&0  string          >\0                     "%s"

#------------------------------------------------------------------------------
#
# RPM: file(1) magic for Red Hat Packages   Erik Troan (ewt@redhat.com)
#
0	belong		0xedabeedb	RPM
>4	byte		x		v%d
>6	beshort		0		bin
>6	beshort		1		src
>8	beshort		1		i386
>8	beshort		2		Alpha
>8	beshort		3		Sparc
>8	beshort		4		MIPS
>8	beshort		5		PowerPC
>8	beshort		6		68000
>8	beshort		7		SGI
>8	beshort		8		RS6000
>8	beshort		9		IA64
>8	beshort		10		Sparc64
>8	beshort		11		MIPSel
>8	beshort		12		ARM
>10	string		x		"%s"

# IBM AIX Backup File Format header and entry signatures
0	lelong	0xea6b0009	BFF volume header,
>4	leshort	x		checksum: 0x%.4X,
>6	leshort	<0		invalid
>6	leshort	0		invalid
>6	leshort	x		volume number: %d,
>8	ledate	x		current date: %s,
>12	ledate	x		starting date: %s,
>20	string	x		disk name: "%s",
>36	string	x		file system name: "%s",
>52	string	x		user name: "%s"

0	leshort	0xea6b		BFF volume entry,{offset-adjust:-2}
>22	lelong	<0		invalid
>22	lelong	0		directory,
>22	lelong	>0
>>22	lelong	x		file size: %d,
>>54	lelong	<0		invalid
>>54	lelong	0		invalid
>>54	lelong	x		compressed size: %d,
>58	lelong	!0		invalid
>62	byte	0		invalid
>62	byte	!0x2e
>>62	byte	!0x2f		invalid
>62	string	x		file name: "%s
>92	string	x		\b%s"

0	leshort	0xea6c		BFF volume entry, compressed,{offset-adjust:-2}
>22	lelong	<0		invalid
>22	lelong	0		directory,
>22	lelong	>0
>>22	lelong	x		file size: %d,
>>54	lelong	<0		invalid
>>54	lelong	0		invalid
>>54	lelong	x		compressed size: %d,
>58	lelong	!0		invalid
>62	byte	0		invalid
>62	byte	!0x2e
>>62	byte	!0x2f		invalid
>62	string	x		file name: "%s
>92	string	x		\b%s"

0	leshort	0xea6d		BFF volume entry, AIXv3,{offset-adjust:-2}
>22	lelong	<0		invalid
>22	lelong	0		directory,
>22	lelong	>0
>>22	lelong	x		file size: %d,
>>54	lelong	<0		invalid
>>54	lelong	0		invalid
>>54	lelong	x		compressed size: %d,
>58	lelong	!0		invalid
>62	byte	0		invalid
>62	byte	!0x2e
>>62	byte	!0x2f		invalid
>62	string	x		file name: "%s
>92	string	x		\b%s"

#------------------------------------------------------------------------------
# From Stuart Caie <kyzer@4u.net> (developer of cabextract)
# Microsoft Cabinet files
0       string          MSCF\0\0\0\0    Microsoft Cabinet archive data
# According to libmagic comments, CAB version number is always 1.3
>25	byte		!1		\b,invalid major version
>24	byte		!3		\b,invalid minor version
>8      lelong          x               \b, %u bytes
>28	leshort		0		\b, 0 files (invalid)
>28     leshort         1               \b, 1 file
>28     leshort         >1              \b, %u files

# InstallShield Cabinet files
0       string          ISc(            InstallShield Cabinet archive data
# TODO: Version number checks should be made more specific for false positive filtering
>5      byte&0xf0       =0x60           version 6,
>5      byte&0xf0       <0x60           version 4/5,
>5      byte&0xf0       >0x60           invalid version,
>12     lelong          <0              invalid offset,
>12     lelong          >100000         invalid offset,
>(12.l+40)      lelong  x               %u files

# Windows CE package files
0       string          MSCE\0\0\0\0    Microsoft WinCE install header
>20     lelong          0               \b, architecture-independent
>20     lelong          103             \b, Hitachi SH3
>20     lelong          104             \b, Hitachi SH4
>20     lelong          0xA11           \b, StrongARM
>20     lelong          4000            \b, MIPS R4000
>20     lelong          10003           \b, Hitachi SH3
>20     lelong          10004           \b, Hitachi SH3E
>20     lelong          10005           \b, Hitachi SH4
>20     lelong          70001           \b, ARM 7TDMI
>52     leshort         1               \b, 1 file
>52     leshort         >1              \b, %u files
>56     leshort         1               \b, 1 registry entry
>56     leshort         >1              \b, %u registry entries

0       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data

# Parity archive reconstruction file, the 'par' file format now used on Usenet.
0       string          PAR\0   PARity archive data
>48     leshort         =0      - Index file
>48     leshort         >0      - file number %d

# Felix von Leitner <felix-file@fefe.de>
0       string  d8:announce     BitTorrent file