Commit 37bb4799 by devttys0

Fixed potential self-overlapping signatures

parent 46ac01e1
......@@ -7,6 +7,7 @@ __all__ = ['Magic']
import re
import struct
import datetime
import binwalk.core.common
import binwalk.core.compat
class ParserException(Exception):
......@@ -15,14 +16,6 @@ class ParserException(Exception):
'''
pass
class SignatureTag(object):
'''
Conatiner class for each signature tag entry.
'''
def __init__(self, **kwargs):
for (k,v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class SignatureResult(binwalk.core.module.Result):
'''
Container class for signature results.
......@@ -38,6 +31,7 @@ class SignatureResult(binwalk.core.module.Result):
self.string = False
self.invalid = False
self.once = False
self.overlap = False
# These are set by code internally
self.id = 0
......@@ -63,7 +57,7 @@ class SignatureLine(object):
Returns None.
'''
self.tags = []
self.tags = {}
self.text = line
self.regex = False
......@@ -255,7 +249,7 @@ class SignatureLine(object):
v = True
# Create a new SignatureTag instance and append it to self.tags
self.tags.append(SignatureTag(name=n, value=v))
self.tags[n] = v
# Remove all tags from the printable format string
self.format = retag.sub('', self.format).strip()
......@@ -298,33 +292,34 @@ class Signature(object):
# Strings and single byte signatures are taken at face value;
# multi-byte integer values are turned into regex strings based
# on their data type size and endianess.
#
# Regex types are already compiled expressions.
if line.type == 'regex':
# Regex types are already compiled expressions.
# Note that since re.finditer is used, unless the specified
# regex accounts for it, overlapping signatures will be ignored.
return line.value
if line.type == 'string':
restr = re.escape(line.value)
restr = line.value
elif line.size == 1:
restr = re.escape(chr(line.value))
restr = chr(line.value)
elif line.size == 2:
if line.endianess == '<':
restr = re.escape(chr(line.value & 0xFF) + chr(line.value >> 8))
restr = chr(line.value & 0xFF) + chr(line.value >> 8)
elif line.endianess == '>':
restr = re.escape(chr(line.value >> 8) + chr(line.value & 0xFF))
restr = chr(line.value >> 8) + chr(line.value & 0xFF)
elif line.size == 4:
if line.endianess == '<':
restr = re.escape(chr(line.value & 0xFF) +
restr = (chr(line.value & 0xFF) +
chr((line.value >> 8) & 0xFF) +
chr((line.value >> 16) & 0xFF) +
chr(line.value >> 24))
elif line.endianess == '>':
restr = re.escape(chr(line.value >> 24) +
restr = (chr(line.value >> 24) +
chr((line.value >> 16) & 0xFF) +
chr((line.value >> 8) & 0xFF) +
chr(line.value & 0xFF))
elif line.size == 8:
if line.endianess == '<':
restr = re.escape(chr(line.value & 0xFF) +
restr = (chr(line.value & 0xFF) +
chr((line.value >> 8) & 0xFF) +
chr((line.value >> 16) & 0xFF) +
chr((line.value >> 24) & 0xFF) +
......@@ -333,7 +328,7 @@ class Signature(object):
chr((line.value >> 48) & 0xFF) +
chr(line.value >> 56))
elif line.endianess == '>':
restr = re.escape(chr(line.value >> 56) +
restr = (chr(line.value >> 56) +
chr((line.value >> 48) & 0xFF) +
chr((line.value >> 40) & 0xFF) +
chr((line.value >> 32) & 0xFF) +
......@@ -342,7 +337,21 @@ class Signature(object):
chr((line.value >> 8) & 0xFF) +
chr(line.value & 0xFF))
return re.compile(restr)
# Since re.finditer is used on a per-signature basis, signatures should be crafted carefully
# to ensure that they aren't potentially self-overlapping (e.g., a signature of "ABCDAB" could
# be confused by the byte sequence "ABCDABCDAB"). The longer the signature, the less likely an
# unintentional overlap is, although files could still be maliciously crafted to cause false
# negative results.
#
# Thus, unless a signature has been explicitly marked as knowingly overlapping ('{overlap}'),
# spit out a warning about any self-overlapping signatures.
if not binwalk.core.compat.has_key(line.tags, 'overlap'):
for i in range(1, line.size):
if restr[i:] == restr[0:(line.size-i)]:
binwalk.core.common.warning("Signature '%s' is a self-overlapping signature!" % line.text)
break
return re.compile(re.escape(restr))
def append(self, line):
'''
......@@ -377,6 +386,7 @@ class Magic(object):
self.signatures = []
# A set of signatures with the 'once' keyword that have already been displayed once
self.display_once = set()
self.dirty = True
self.show_invalid = invalid
self.includes = [re.compile(x) for x in include]
......@@ -557,7 +567,7 @@ class Magic(object):
if line.value is None:
# Check to see if this is a string whose size is known and has been specified on a previous
# signature line.
if binwalk.core.compat.has_key(tags, 'strlen') and [x for x in line.tags if x.name == 'string']:
if binwalk.core.compat.has_key(tags, 'strlen') and binwalk.core.compat.has_key(line.tags, 'string'):
dvalue = self.data[start:(start+tags['strlen'])]
# Else, just terminate the string at the first newline, carriage return, or NULL byte
else:
......@@ -633,23 +643,23 @@ class Magic(object):
# Process tag keywords specified in the signature line. These have already been parsed out of the
# original format string so that they can be processed separately from the printed description string.
for tag in line.tags:
for (tag_name, tag_value) in binwalk.core.compat.iterator(line.tags):
# If the tag value is a string, try to format it
if isinstance(tag.value, str):
if isinstance(tag_value, str):
# Generate the tuple for the format string
dvalue_tuple = ()
for x in self.fmtstr.finditer(tag.value):
for x in self.fmtstr.finditer(tag_value):
dvalue_tuple += (dvalue,)
# Format the tag string
tags[tag.name] = tag.value % dvalue_tuple
tags[tag_name] = tag_value % dvalue_tuple
# Else, just use the raw tag value
else:
tags[tag.name] = tag.value
tags[tag_name] = tag_value
# Some tag values are intended to be integer values, so try to convert them as such
try:
tags[tag.name] = int(tags[tag.name], 0)
tags[tag_name] = int(tags[tag_name], 0)
except KeyboardInterrupt as e:
raise e
except Exception as e:
......@@ -722,7 +732,6 @@ class Magic(object):
results = []
matched_offsets = set()
# It's expensive in Python to pass large strings around to various functions.
# Since data can potentially be quite a large string, make it available to other
# methods via a class attribute so that it doesn't need to be passed around to
# different methods over and over again.
......@@ -730,14 +739,14 @@ class Magic(object):
# If dlen wasn't specified, search all of self.data
if dlen is None:
dlen = len(self.data)
dlen = len(data)
# Loop through each loaded signature
for signature in self.signatures:
# Use regex to search the data block for potential signature matches (fast)
for match in signature.regex.finditer(self.data):
for match in signature.regex.finditer(data):
# Take the offset of the start of the signature into account
offset = match.start() - signature.offset
# Signatures are ordered based on the length of their magic bytes (largest first).
# If this offset has already been matched to a previous signature, ignore it unless
# self.show_invalid has been specified. Also ignore obviously invalid offsets (<1)
......
......@@ -125,15 +125,24 @@
0 string \351,\001JAM JAM archive
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2 string -lzs- LHa 2.x? archive data [lzs] [NSRL|LHA2]
2 string -lh\40- LHa 2.x? archive data [lh ] [NSRL|LHA2]
2 string -lhd- LHa 2.x? archive data [lhd] [NSRL|LHA2]
2 string -lh2- LHa 2.x? archive data [lh2] [NSRL|LHA2]
2 string -lh3- LHa 2.x? archive data [lh3] [NSRL|LHA2]
2 string -lh4- LHa (2.x) archive data [lh4] [NSRL|LHA2]
2 string -lh5- LHa (2.x) archive data [lh5] [NSRL|LHA2]
2 string -lh6- LHa (2.x) archive data [lh6] [NSRL|LHA2]
2 string -lh7- LHa (2.x) archive data [lh7] [NSRL|LHA2]
2 string -lzs LHa 2.x? archive data [lzs] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh\40 LHa 2.x? archive data [lh ] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lhd LHa 2.x? archive data [lhd] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh2 LHa 2.x? archive data [lh2] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh3 LHa 2.x? archive data [lh3] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh4 LHa (2.x) archive data [lh4] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh5 LHa (2.x) archive data [lh5] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh6 LHa (2.x) archive data [lh6] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh7 LHa (2.x) archive data [lh7] [NSRL|LHA2]
>6 string !- {invalid}
# cpio archives
......@@ -290,7 +299,8 @@
>56 leshort 1 \b, 1 registry entry
>56 leshort >1 \b, %u registry entries
0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data
0 string \x00\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20 LBR archive data
>12 string !\x00x00 {invalid}
# Parity archive reconstruction file, the 'par' file format now used on Usenet.
0 string PAR\0 PARity archive data
......
......@@ -2,7 +2,8 @@
#---------------------------Bootloaders--------------------------------
# CFE bootloader
0 string CFE1CFE1 CFE boot loader
0 string CFE1 CFE boot loader
>4 string !CFE1 {invalid}
>40 string CFE1CFE1 {invalid}
# U-Boot boot loader
......
......@@ -2,7 +2,8 @@
#------------------Compression Formats-----------------------------
# AFX compressed files (Wolfram Kleff)
2 string -afx- AFX compressed file data
2 string -afx AFX compressed file data
>6 string !- {invalid}
# bzip2
0 string BZh91AY&SY bzip2 compressed data, block size = 900k
......
# Type: OpenSSL certificates/key files
# From: Nicolas Collignon <tsointsoin@gmail.com>
0 string -----BEGIN\x20CERTIFICATE----- PEM certificate
0 string -----BEGIN\x20CERTIFICATE PEM certificate
>22 string !----- {invalid}
0 string -----BEGIN\x20CERTIFICATE\x20REQ PEM certificate request
0 string -----BEGIN\x20RSA\x20PRIVATE PEM RSA private key
0 string -----BEGIN\x20DSA\x20PRIVATE PEM DSA private key
......
......@@ -573,7 +573,7 @@
>8 belong x {jump:%d}
# Wind River MemFS file system, found in some VxWorks devices
0 string owowowowowowowowowowowowowowow Wind River management filesystem,
0 string owowowowowowowowowowowowowowow Wind River management filesystem,{overlap}
>30 string !ow {invalid},
>32 belong 1 compressed,
>32 belong 2 plain text,
......
......@@ -163,7 +163,8 @@
>23 byte x header checksum: 0x%X
# PackImg tag, somtimes used as a delimiter between the kernel and rootfs in firmware images.
0 string --PaCkImGs-- PackImg section delimiter tag,
0 string --PaCkImGs PackImg section delimiter tag,
>10 string !-- {invalid}
# If the size in both big and little endian is greater than 512MB, consider this a false positive
>16 ulelong >0x20000000
>>16 ubelong >0x20000000 {invalid}
......@@ -517,7 +518,8 @@
>18 beshort+16 x data offset from start of block: %d
# Obfuscated Arcadyan firmware
0x68 belong 0x00D50800 Obfuscated Arcadyan firmware,
0x68 belong 0x00D508 Obfuscated Arcadyan firmware,
>3 byte !0 {invalid}
>0 ubelong x signature bytes: 0x%X,
>0x70 string !\x00\x00\x00\x00\x00\x00\x00 {invalid},
>0x70 belong 0x00000000 see https://github.com/devttys0/wrt120n/deobfuscator
......
......@@ -41,7 +41,8 @@
# CodeGate 2011 http://nopsrus.blogspot.com/2013/05/codegate-ctf-2011-binary-100-points.html
0 string \x23\x40\x7e\x5e Windows Script Encoded Data (screnc.exe)
0 string /home/ Unix home path string:
0 string /home Unix home path string:
>5 string !/ {invalid}
>0 string x "%s"
0 string neighbor Neighborly text,
......
# Signatures to identify the start of a VxWorks symbol table
8 string \x00\x00\x05\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,
8 string \x00\x00\x05\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,{overlap}
>4 belong 0 {invalid}
>4 ubelong x first entry: [type: function, code address: 0x%X,
>0 belong 0 {invalid}
......@@ -33,7 +33,7 @@
>>152 belong !0x700
>>>152 belong !0x900 \b, {invalid}
8 string \x00\x00\x07\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,
8 string \x00\x00\x07\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,{overlap}
>4 belong 0 {invalid}
>4 ubelong x first entry: [type: initialized data, code address: 0x%X,
>0 belong 0 {invalid}
......@@ -66,7 +66,7 @@
>>152 belong !0x700
>>>152 belong !0x900 \b, {invalid}
8 string \x00\x00\x09\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,
8 string \x00\x00\x09\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,{overlap}
>4 belong 0 {invalid}
>4 ubelong x first entry: [type: uninitialized data, code address: 0x%X,
>0 belong 0 {invalid}
......@@ -99,7 +99,7 @@
>>152 belong !0x700
>>>152 belong !0x900 \b, {invalid}
8 string \x00\x05\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,
8 string \x00\x05\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,{overlap}
>4 lelong 0 {invalid}
>4 ulelong x first entry: [type: function, code address: 0x%X,
>0 lelong 0 {invalid}
......@@ -132,7 +132,7 @@
>>152 lelong !0x700
>>>152 lelong !0x900 \b, {invalid}
8 string \x00\x07\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,
8 string \x00\x07\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,{overlap}
>4 lelong 0 {invalid}
>4 ulelong x first entry: [type: initialized data, code address: 0x%X,
>0 lelong 0 {invalid}
......@@ -165,7 +165,7 @@
>>152 lelong !0x700
>>>152 lelong !0x900 \b, {invalid}
8 string \x00\x09\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,
8 string \x00\x09\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,{overlap}
>4 lelong 0 {invalid}
>4 ulelong x first entry: [type: uninitialized data, code address: 0x%X,
>0 lelong 0 {invalid}
......
......@@ -3,7 +3,6 @@
import os
import math
import zlib
import numpy as np
import binwalk.core.common
from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment