Commit 37bb4799 by devttys0

Fixed potential self-overlapping signatures

parent 46ac01e1
...@@ -7,6 +7,7 @@ __all__ = ['Magic'] ...@@ -7,6 +7,7 @@ __all__ = ['Magic']
import re import re
import struct import struct
import datetime import datetime
import binwalk.core.common
import binwalk.core.compat import binwalk.core.compat
class ParserException(Exception): class ParserException(Exception):
...@@ -15,14 +16,6 @@ class ParserException(Exception): ...@@ -15,14 +16,6 @@ class ParserException(Exception):
''' '''
pass pass
class SignatureTag(object):
'''
Conatiner class for each signature tag entry.
'''
def __init__(self, **kwargs):
for (k,v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class SignatureResult(binwalk.core.module.Result): class SignatureResult(binwalk.core.module.Result):
''' '''
Container class for signature results. Container class for signature results.
...@@ -38,6 +31,7 @@ class SignatureResult(binwalk.core.module.Result): ...@@ -38,6 +31,7 @@ class SignatureResult(binwalk.core.module.Result):
self.string = False self.string = False
self.invalid = False self.invalid = False
self.once = False self.once = False
self.overlap = False
# These are set by code internally # These are set by code internally
self.id = 0 self.id = 0
...@@ -63,7 +57,7 @@ class SignatureLine(object): ...@@ -63,7 +57,7 @@ class SignatureLine(object):
Returns None. Returns None.
''' '''
self.tags = [] self.tags = {}
self.text = line self.text = line
self.regex = False self.regex = False
...@@ -255,7 +249,7 @@ class SignatureLine(object): ...@@ -255,7 +249,7 @@ class SignatureLine(object):
v = True v = True
# Create a new SignatureTag instance and append it to self.tags # Create a new SignatureTag instance and append it to self.tags
self.tags.append(SignatureTag(name=n, value=v)) self.tags[n] = v
# Remove all tags from the printable format string # Remove all tags from the printable format string
self.format = retag.sub('', self.format).strip() self.format = retag.sub('', self.format).strip()
...@@ -298,33 +292,34 @@ class Signature(object): ...@@ -298,33 +292,34 @@ class Signature(object):
# Strings and single byte signatures are taken at face value; # Strings and single byte signatures are taken at face value;
# multi-byte integer values are turned into regex strings based # multi-byte integer values are turned into regex strings based
# on their data type size and endianess. # on their data type size and endianess.
#
# Regex types are already compiled expressions.
if line.type == 'regex': if line.type == 'regex':
# Regex types are already compiled expressions.
# Note that since re.finditer is used, unless the specified
# regex accounts for it, overlapping signatures will be ignored.
return line.value return line.value
if line.type == 'string': if line.type == 'string':
restr = re.escape(line.value) restr = line.value
elif line.size == 1: elif line.size == 1:
restr = re.escape(chr(line.value)) restr = chr(line.value)
elif line.size == 2: elif line.size == 2:
if line.endianess == '<': if line.endianess == '<':
restr = re.escape(chr(line.value & 0xFF) + chr(line.value >> 8)) restr = chr(line.value & 0xFF) + chr(line.value >> 8)
elif line.endianess == '>': elif line.endianess == '>':
restr = re.escape(chr(line.value >> 8) + chr(line.value & 0xFF)) restr = chr(line.value >> 8) + chr(line.value & 0xFF)
elif line.size == 4: elif line.size == 4:
if line.endianess == '<': if line.endianess == '<':
restr = re.escape(chr(line.value & 0xFF) + restr = (chr(line.value & 0xFF) +
chr((line.value >> 8) & 0xFF) + chr((line.value >> 8) & 0xFF) +
chr((line.value >> 16) & 0xFF) + chr((line.value >> 16) & 0xFF) +
chr(line.value >> 24)) chr(line.value >> 24))
elif line.endianess == '>': elif line.endianess == '>':
restr = re.escape(chr(line.value >> 24) + restr = (chr(line.value >> 24) +
chr((line.value >> 16) & 0xFF) + chr((line.value >> 16) & 0xFF) +
chr((line.value >> 8) & 0xFF) + chr((line.value >> 8) & 0xFF) +
chr(line.value & 0xFF)) chr(line.value & 0xFF))
elif line.size == 8: elif line.size == 8:
if line.endianess == '<': if line.endianess == '<':
restr = re.escape(chr(line.value & 0xFF) + restr = (chr(line.value & 0xFF) +
chr((line.value >> 8) & 0xFF) + chr((line.value >> 8) & 0xFF) +
chr((line.value >> 16) & 0xFF) + chr((line.value >> 16) & 0xFF) +
chr((line.value >> 24) & 0xFF) + chr((line.value >> 24) & 0xFF) +
...@@ -333,7 +328,7 @@ class Signature(object): ...@@ -333,7 +328,7 @@ class Signature(object):
chr((line.value >> 48) & 0xFF) + chr((line.value >> 48) & 0xFF) +
chr(line.value >> 56)) chr(line.value >> 56))
elif line.endianess == '>': elif line.endianess == '>':
restr = re.escape(chr(line.value >> 56) + restr = (chr(line.value >> 56) +
chr((line.value >> 48) & 0xFF) + chr((line.value >> 48) & 0xFF) +
chr((line.value >> 40) & 0xFF) + chr((line.value >> 40) & 0xFF) +
chr((line.value >> 32) & 0xFF) + chr((line.value >> 32) & 0xFF) +
...@@ -342,7 +337,21 @@ class Signature(object): ...@@ -342,7 +337,21 @@ class Signature(object):
chr((line.value >> 8) & 0xFF) + chr((line.value >> 8) & 0xFF) +
chr(line.value & 0xFF)) chr(line.value & 0xFF))
return re.compile(restr) # Since re.finditer is used on a per-signature basis, signatures should be crafted carefully
# to ensure that they aren't potentially self-overlapping (e.g., a signature of "ABCDAB" could
# be confused by the byte sequence "ABCDABCDAB"). The longer the signature, the less likely an
# unintentional overlap is, although files could still be maliciously crafted to cause false
# negative results.
#
# Thus, unless a signature has been explicitly marked as knowingly overlapping ('{overlap}'),
# spit out a warning about any self-overlapping signatures.
if not binwalk.core.compat.has_key(line.tags, 'overlap'):
for i in range(1, line.size):
if restr[i:] == restr[0:(line.size-i)]:
binwalk.core.common.warning("Signature '%s' is a self-overlapping signature!" % line.text)
break
return re.compile(re.escape(restr))
def append(self, line): def append(self, line):
''' '''
...@@ -377,6 +386,7 @@ class Magic(object): ...@@ -377,6 +386,7 @@ class Magic(object):
self.signatures = [] self.signatures = []
# A set of signatures with the 'once' keyword that have already been displayed once # A set of signatures with the 'once' keyword that have already been displayed once
self.display_once = set() self.display_once = set()
self.dirty = True
self.show_invalid = invalid self.show_invalid = invalid
self.includes = [re.compile(x) for x in include] self.includes = [re.compile(x) for x in include]
...@@ -557,7 +567,7 @@ class Magic(object): ...@@ -557,7 +567,7 @@ class Magic(object):
if line.value is None: if line.value is None:
# Check to see if this is a string whose size is known and has been specified on a previous # Check to see if this is a string whose size is known and has been specified on a previous
# signature line. # signature line.
if binwalk.core.compat.has_key(tags, 'strlen') and [x for x in line.tags if x.name == 'string']: if binwalk.core.compat.has_key(tags, 'strlen') and binwalk.core.compat.has_key(line.tags, 'string'):
dvalue = self.data[start:(start+tags['strlen'])] dvalue = self.data[start:(start+tags['strlen'])]
# Else, just terminate the string at the first newline, carriage return, or NULL byte # Else, just terminate the string at the first newline, carriage return, or NULL byte
else: else:
...@@ -633,23 +643,23 @@ class Magic(object): ...@@ -633,23 +643,23 @@ class Magic(object):
# Process tag keywords specified in the signature line. These have already been parsed out of the # Process tag keywords specified in the signature line. These have already been parsed out of the
# original format string so that they can be processed separately from the printed description string. # original format string so that they can be processed separately from the printed description string.
for tag in line.tags: for (tag_name, tag_value) in binwalk.core.compat.iterator(line.tags):
# If the tag value is a string, try to format it # If the tag value is a string, try to format it
if isinstance(tag.value, str): if isinstance(tag_value, str):
# Generate the tuple for the format string # Generate the tuple for the format string
dvalue_tuple = () dvalue_tuple = ()
for x in self.fmtstr.finditer(tag.value): for x in self.fmtstr.finditer(tag_value):
dvalue_tuple += (dvalue,) dvalue_tuple += (dvalue,)
# Format the tag string # Format the tag string
tags[tag.name] = tag.value % dvalue_tuple tags[tag_name] = tag_value % dvalue_tuple
# Else, just use the raw tag value # Else, just use the raw tag value
else: else:
tags[tag.name] = tag.value tags[tag_name] = tag_value
# Some tag values are intended to be integer values, so try to convert them as such # Some tag values are intended to be integer values, so try to convert them as such
try: try:
tags[tag.name] = int(tags[tag.name], 0) tags[tag_name] = int(tags[tag_name], 0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
...@@ -722,7 +732,6 @@ class Magic(object): ...@@ -722,7 +732,6 @@ class Magic(object):
results = [] results = []
matched_offsets = set() matched_offsets = set()
# It's expensive in Python to pass large strings around to various functions.
# Since data can potentially be quite a large string, make it available to other # Since data can potentially be quite a large string, make it available to other
# methods via a class attribute so that it doesn't need to be passed around to # methods via a class attribute so that it doesn't need to be passed around to
# different methods over and over again. # different methods over and over again.
...@@ -730,14 +739,14 @@ class Magic(object): ...@@ -730,14 +739,14 @@ class Magic(object):
# If dlen wasn't specified, search all of self.data # If dlen wasn't specified, search all of self.data
if dlen is None: if dlen is None:
dlen = len(self.data) dlen = len(data)
# Loop through each loaded signature
for signature in self.signatures: for signature in self.signatures:
# Use regex to search the data block for potential signature matches (fast) # Use regex to search the data block for potential signature matches (fast)
for match in signature.regex.finditer(self.data): for match in signature.regex.finditer(data):
# Take the offset of the start of the signature into account # Take the offset of the start of the signature into account
offset = match.start() - signature.offset offset = match.start() - signature.offset
# Signatures are ordered based on the length of their magic bytes (largest first). # Signatures are ordered based on the length of their magic bytes (largest first).
# If this offset has already been matched to a previous signature, ignore it unless # If this offset has already been matched to a previous signature, ignore it unless
# self.show_invalid has been specified. Also ignore obviously invalid offsets (<1) # self.show_invalid has been specified. Also ignore obviously invalid offsets (<1)
......
...@@ -125,15 +125,24 @@ ...@@ -125,15 +125,24 @@
0 string \351,\001JAM JAM archive 0 string \351,\001JAM JAM archive
# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu) # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
2 string -lzs- LHa 2.x? archive data [lzs] [NSRL|LHA2] 2 string -lzs LHa 2.x? archive data [lzs] [NSRL|LHA2]
2 string -lh\40- LHa 2.x? archive data [lh ] [NSRL|LHA2] >6 string !- {invalid}
2 string -lhd- LHa 2.x? archive data [lhd] [NSRL|LHA2] 2 string -lh\40 LHa 2.x? archive data [lh ] [NSRL|LHA2]
2 string -lh2- LHa 2.x? archive data [lh2] [NSRL|LHA2] >6 string !- {invalid}
2 string -lh3- LHa 2.x? archive data [lh3] [NSRL|LHA2] 2 string -lhd LHa 2.x? archive data [lhd] [NSRL|LHA2]
2 string -lh4- LHa (2.x) archive data [lh4] [NSRL|LHA2] >6 string !- {invalid}
2 string -lh5- LHa (2.x) archive data [lh5] [NSRL|LHA2] 2 string -lh2 LHa 2.x? archive data [lh2] [NSRL|LHA2]
2 string -lh6- LHa (2.x) archive data [lh6] [NSRL|LHA2] >6 string !- {invalid}
2 string -lh7- LHa (2.x) archive data [lh7] [NSRL|LHA2] 2 string -lh3 LHa 2.x? archive data [lh3] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh4 LHa (2.x) archive data [lh4] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh5 LHa (2.x) archive data [lh5] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh6 LHa (2.x) archive data [lh6] [NSRL|LHA2]
>6 string !- {invalid}
2 string -lh7 LHa (2.x) archive data [lh7] [NSRL|LHA2]
>6 string !- {invalid}
# cpio archives # cpio archives
...@@ -290,7 +299,8 @@ ...@@ -290,7 +299,8 @@
>56 leshort 1 \b, 1 registry entry >56 leshort 1 \b, 1 registry entry
>56 leshort >1 \b, %u registry entries >56 leshort >1 \b, %u registry entries
0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data 0 string \x00\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20 LBR archive data
>12 string !\x00x00 {invalid}
# Parity archive reconstruction file, the 'par' file format now used on Usenet. # Parity archive reconstruction file, the 'par' file format now used on Usenet.
0 string PAR\0 PARity archive data 0 string PAR\0 PARity archive data
......
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
#---------------------------Bootloaders-------------------------------- #---------------------------Bootloaders--------------------------------
# CFE bootloader # CFE bootloader
0 string CFE1CFE1 CFE boot loader 0 string CFE1 CFE boot loader
>4 string !CFE1 {invalid}
>40 string CFE1CFE1 {invalid} >40 string CFE1CFE1 {invalid}
# U-Boot boot loader # U-Boot boot loader
......
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
#------------------Compression Formats----------------------------- #------------------Compression Formats-----------------------------
# AFX compressed files (Wolfram Kleff) # AFX compressed files (Wolfram Kleff)
2 string -afx- AFX compressed file data 2 string -afx AFX compressed file data
>6 string !- {invalid}
# bzip2 # bzip2
0 string BZh91AY&SY bzip2 compressed data, block size = 900k 0 string BZh91AY&SY bzip2 compressed data, block size = 900k
......
# Type: OpenSSL certificates/key files # Type: OpenSSL certificates/key files
# From: Nicolas Collignon <tsointsoin@gmail.com> # From: Nicolas Collignon <tsointsoin@gmail.com>
0 string -----BEGIN\x20CERTIFICATE----- PEM certificate 0 string -----BEGIN\x20CERTIFICATE PEM certificate
>22 string !----- {invalid}
0 string -----BEGIN\x20CERTIFICATE\x20REQ PEM certificate request 0 string -----BEGIN\x20CERTIFICATE\x20REQ PEM certificate request
0 string -----BEGIN\x20RSA\x20PRIVATE PEM RSA private key 0 string -----BEGIN\x20RSA\x20PRIVATE PEM RSA private key
0 string -----BEGIN\x20DSA\x20PRIVATE PEM DSA private key 0 string -----BEGIN\x20DSA\x20PRIVATE PEM DSA private key
......
...@@ -573,7 +573,7 @@ ...@@ -573,7 +573,7 @@
>8 belong x {jump:%d} >8 belong x {jump:%d}
# Wind River MemFS file system, found in some VxWorks devices # Wind River MemFS file system, found in some VxWorks devices
0 string owowowowowowowowowowowowowowow Wind River management filesystem, 0 string owowowowowowowowowowowowowowow Wind River management filesystem,{overlap}
>30 string !ow {invalid}, >30 string !ow {invalid},
>32 belong 1 compressed, >32 belong 1 compressed,
>32 belong 2 plain text, >32 belong 2 plain text,
......
...@@ -163,7 +163,8 @@ ...@@ -163,7 +163,8 @@
>23 byte x header checksum: 0x%X >23 byte x header checksum: 0x%X
# PackImg tag, somtimes used as a delimiter between the kernel and rootfs in firmware images. # PackImg tag, somtimes used as a delimiter between the kernel and rootfs in firmware images.
0 string --PaCkImGs-- PackImg section delimiter tag, 0 string --PaCkImGs PackImg section delimiter tag,
>10 string !-- {invalid}
# If the size in both big and little endian is greater than 512MB, consider this a false positive # If the size in both big and little endian is greater than 512MB, consider this a false positive
>16 ulelong >0x20000000 >16 ulelong >0x20000000
>>16 ubelong >0x20000000 {invalid} >>16 ubelong >0x20000000 {invalid}
...@@ -517,7 +518,8 @@ ...@@ -517,7 +518,8 @@
>18 beshort+16 x data offset from start of block: %d >18 beshort+16 x data offset from start of block: %d
# Obfuscated Arcadyan firmware # Obfuscated Arcadyan firmware
0x68 belong 0x00D50800 Obfuscated Arcadyan firmware, 0x68 belong 0x00D508 Obfuscated Arcadyan firmware,
>3 byte !0 {invalid}
>0 ubelong x signature bytes: 0x%X, >0 ubelong x signature bytes: 0x%X,
>0x70 string !\x00\x00\x00\x00\x00\x00\x00 {invalid}, >0x70 string !\x00\x00\x00\x00\x00\x00\x00 {invalid},
>0x70 belong 0x00000000 see https://github.com/devttys0/wrt120n/deobfuscator >0x70 belong 0x00000000 see https://github.com/devttys0/wrt120n/deobfuscator
......
...@@ -41,7 +41,8 @@ ...@@ -41,7 +41,8 @@
# CodeGate 2011 http://nopsrus.blogspot.com/2013/05/codegate-ctf-2011-binary-100-points.html # CodeGate 2011 http://nopsrus.blogspot.com/2013/05/codegate-ctf-2011-binary-100-points.html
0 string \x23\x40\x7e\x5e Windows Script Encoded Data (screnc.exe) 0 string \x23\x40\x7e\x5e Windows Script Encoded Data (screnc.exe)
0 string /home/ Unix home path string: 0 string /home Unix home path string:
>5 string !/ {invalid}
>0 string x "%s" >0 string x "%s"
0 string neighbor Neighborly text, 0 string neighbor Neighborly text,
......
# Signatures to identify the start of a VxWorks symbol table # Signatures to identify the start of a VxWorks symbol table
8 string \x00\x00\x05\x00\x00\x00\x00\x00 VxWorks symbol table, big endian, 8 string \x00\x00\x05\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,{overlap}
>4 belong 0 {invalid} >4 belong 0 {invalid}
>4 ubelong x first entry: [type: function, code address: 0x%X, >4 ubelong x first entry: [type: function, code address: 0x%X,
>0 belong 0 {invalid} >0 belong 0 {invalid}
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
>>152 belong !0x700 >>152 belong !0x700
>>>152 belong !0x900 \b, {invalid} >>>152 belong !0x900 \b, {invalid}
8 string \x00\x00\x07\x00\x00\x00\x00\x00 VxWorks symbol table, big endian, 8 string \x00\x00\x07\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,{overlap}
>4 belong 0 {invalid} >4 belong 0 {invalid}
>4 ubelong x first entry: [type: initialized data, code address: 0x%X, >4 ubelong x first entry: [type: initialized data, code address: 0x%X,
>0 belong 0 {invalid} >0 belong 0 {invalid}
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
>>152 belong !0x700 >>152 belong !0x700
>>>152 belong !0x900 \b, {invalid} >>>152 belong !0x900 \b, {invalid}
8 string \x00\x00\x09\x00\x00\x00\x00\x00 VxWorks symbol table, big endian, 8 string \x00\x00\x09\x00\x00\x00\x00\x00 VxWorks symbol table, big endian,{overlap}
>4 belong 0 {invalid} >4 belong 0 {invalid}
>4 ubelong x first entry: [type: uninitialized data, code address: 0x%X, >4 ubelong x first entry: [type: uninitialized data, code address: 0x%X,
>0 belong 0 {invalid} >0 belong 0 {invalid}
...@@ -99,7 +99,7 @@ ...@@ -99,7 +99,7 @@
>>152 belong !0x700 >>152 belong !0x700
>>>152 belong !0x900 \b, {invalid} >>>152 belong !0x900 \b, {invalid}
8 string \x00\x05\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian, 8 string \x00\x05\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,{overlap}
>4 lelong 0 {invalid} >4 lelong 0 {invalid}
>4 ulelong x first entry: [type: function, code address: 0x%X, >4 ulelong x first entry: [type: function, code address: 0x%X,
>0 lelong 0 {invalid} >0 lelong 0 {invalid}
...@@ -132,7 +132,7 @@ ...@@ -132,7 +132,7 @@
>>152 lelong !0x700 >>152 lelong !0x700
>>>152 lelong !0x900 \b, {invalid} >>>152 lelong !0x900 \b, {invalid}
8 string \x00\x07\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian, 8 string \x00\x07\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,{overlap}
>4 lelong 0 {invalid} >4 lelong 0 {invalid}
>4 ulelong x first entry: [type: initialized data, code address: 0x%X, >4 ulelong x first entry: [type: initialized data, code address: 0x%X,
>0 lelong 0 {invalid} >0 lelong 0 {invalid}
...@@ -165,7 +165,7 @@ ...@@ -165,7 +165,7 @@
>>152 lelong !0x700 >>152 lelong !0x700
>>>152 lelong !0x900 \b, {invalid} >>>152 lelong !0x900 \b, {invalid}
8 string \x00\x09\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian, 8 string \x00\x09\x00\x00\x00\x00\x00\x00 VxWorks symbol table, little endian,{overlap}
>4 lelong 0 {invalid} >4 lelong 0 {invalid}
>4 ulelong x first entry: [type: uninitialized data, code address: 0x%X, >4 ulelong x first entry: [type: uninitialized data, code address: 0x%X,
>0 lelong 0 {invalid} >0 lelong 0 {invalid}
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
import os import os
import math import math
import zlib import zlib
import numpy as np
import binwalk.core.common import binwalk.core.common
from binwalk.core.compat import * from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg from binwalk.core.module import Module, Option, Kwarg
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment