Commit 1c03b051 by devttys0

Added code comments to the magic.Magic class.

parent 2b82b156
...@@ -42,13 +42,27 @@ class SignatureResult(object): ...@@ -42,13 +42,27 @@ class SignatureResult(object):
class SignatureLine(object): class SignatureLine(object):
def __init__(self, line): def __init__(self, line):
'''
Class constructor. Responsible for parsing a line from a signature file.
@line - A line from the signature file.
Returns None.
'''
self.tags = [] self.tags = []
self.original_text = line self.text = line
# Split the line on any white space; for this to work, backslash-escaped
# spaces ('\ ') are replaced with their escaped hex value ('\x20').
parts = line.replace('\\ ', '\\x20').split(None, 3) parts = line.replace('\\ ', '\\x20').split(None, 3)
# The indentation level is determined by the number of '>' characters at
# the beginning of the signature line.
self.level = parts[0].count('>') self.level = parts[0].count('>')
# Get rid of the indentation characters and try to convert the remaining
# characters to an integer offset. This will fail if the offset is a complex
# value (e.g., '(4.l+16)').
self.offset = parts[0].replace('>', '') self.offset = parts[0].replace('>', '')
try: try:
self.offset = int(self.offset, 0) self.offset = int(self.offset, 0)
...@@ -205,27 +219,50 @@ class Signature(object): ...@@ -205,27 +219,50 @@ class Signature(object):
self.lines.append(line) self.lines.append(line)
class Magic(object): class Magic(object):
'''
Primary class for loading signature files and scanning
blocks of arbitrary data for matching signatures.
'''
def __init__(self, exclude=[], include=[], invalid=False): def __init__(self, exclude=[], include=[], invalid=False):
''' '''
Class constructor. Class constructor.
@include - A list of regex strings describing which signatures should be included in the scan results.
@exclude - A list of regex strings describing which signatures should not be included in the scan results.
@invalid - If set to True, invalid results will not be ignored. @invalid - If set to True, invalid results will not be ignored.
Returns None. Returns None.
''' '''
# Used to save the block of data passed to self.scan (see additional comments in self.scan)
self.data = "" self.data = ""
# A list of Signature class objects, populated by self.parse (see also: self.load)
self.signatures = [] self.signatures = []
self.show_invalid = invalid self.show_invalid = invalid
self.includes = [re.compile(x) for x in include] self.includes = [re.compile(x) for x in include]
self.excludes = [re.compile(x) for x in exclude] self.excludes = [re.compile(x) for x in exclude]
# Regex rule to replace backspace characters (an the preceeding character)
# in formatted signature strings (see self._analyze).
self.bspace = re.compile(".\\\\b") self.bspace = re.compile(".\\\\b")
# Regex rule to match printable ASCII characters in formatted signature
# strings (see self._analyze).
self.printable = re.compile("[ -~]*") self.printable = re.compile("[ -~]*")
def _filtered(self, text): def _filtered(self, text):
'''
Tests if a string should be filtered out or not.
@text - The string to check against filter rules.
Returns True if the string should be filtered out, i.e., not displayed.
Returns False if the string should be displayed.
'''
filtered = None filtered = None
# Text is converted to lower case first, partially for historical
# purposes, but also because it simplifies writing filter rules
# (e.g., don't have to worry about case sensitivity).
text = text.lower() text = text.lower()
for include in self.includes: for include in self.includes:
...@@ -233,6 +270,8 @@ class Magic(object): ...@@ -233,6 +270,8 @@ class Magic(object):
filtered = False filtered = False
break break
# If exclusive include filters have been specified and did
# not match the text, then the text should be filtered out.
if self.includes and filtered == None: if self.includes and filtered == None:
return True return True
...@@ -241,77 +280,123 @@ class Magic(object): ...@@ -241,77 +280,123 @@ class Magic(object):
filtered = True filtered = True
break break
# If no explicit exclude filters were matched, then the
# text should *not* be filtered.
if filtered == None: if filtered == None:
filtered = False filtered = False
return filtered return filtered
def _do_math(self, offset, expression): def _do_math(self, offset, expression):
# (4.l+12) '''
Parses and evaluates complex expressions, e.g., "(4.l+12)", "(6*32)", etc.
@offset - The offset inside self.data that the current signature starts at.
@expressions - The expression to evaluate.
Returns an integer value that is the result of the evaluated expression.
'''
# Does the expression contain an offset (e.g., "(4.l+12)")?
if '.' in expression: if '.' in expression:
# Split the offset field into the integer offset and type values (o and t respsectively)
(o, t) = expression.split('.', 1) (o, t) = expression.split('.', 1)
o = offset + int(o.split('(', 1)[1], 0) o = offset + int(o.split('(', 1)[1], 0)
t = t[0] t = t[0]
try: try:
# Big and little endian byte format
if t in ['b', 'B']: if t in ['b', 'B']:
v = struct.unpack('b', binwalk.core.compat.str2bytes(self.data[o:o+1]))[0] v = struct.unpack('b', binwalk.core.compat.str2bytes(self.data[o:o+1]))[0]
# Little endian short format
elif t == 's': elif t == 's':
v = struct.unpack('<h', binwalk.core.compat.str2bytes(self.data[o:o+2]))[0] v = struct.unpack('<h', binwalk.core.compat.str2bytes(self.data[o:o+2]))[0]
# Little endian long format
elif t == 'l': elif t == 'l':
v = struct.unpack('<i', binwalk.core.compat.str2bytes(self.data[o:o+4]))[0] v = struct.unpack('<i', binwalk.core.compat.str2bytes(self.data[o:o+4]))[0]
# Big endian short format
elif t == 'S': elif t == 'S':
v = struct.unpack('>h', binwalk.core.compat.str2bytes(self.data[o:o+2]))[0] v = struct.unpack('>h', binwalk.core.compat.str2bytes(self.data[o:o+2]))[0]
# Bit endian long format
elif t == 'L': elif t == 'L':
v = struct.unpack('>i', binwalk.core.compat.str2bytes(self.data[o:o+4]))[0] v = struct.unpack('>i', binwalk.core.compat.str2bytes(self.data[o:o+4]))[0]
# struct.error is thrown if there is not enough bytes in self.data for the specified format type
except struct.error as e: except struct.error as e:
v = 0 v = 0
# Once the value at the specified offset is read from self.data, re-build the expression
# (e.g., "(4.l+12)" might be converted into "(256+12)".
v = "(%d%s" % (v, expression.split(t, 1)[1]) v = "(%d%s" % (v, expression.split(t, 1)[1])
# (32+0x20) # If no offset, then it's just an evaluatable math expression (e.g., "(32+0x20)")
else: else:
v = expression v = expression
#print ("Converted offset '%s' to '%s'" % (expression, v)) # Evaluate the final expression
return binwalk.core.common.MathExpression(v).value return binwalk.core.common.MathExpression(v).value
def _analyze(self, signature, offset): def _analyze(self, signature, offset):
'''
Analyzes self.data for the specified signature data at the specified offset .
@signature - The signature to apply to the data.
@offset - The offset in self.data to apply the signature to.
Returns a dictionary of tags parsed from the data.
'''
description = [] description = []
tag_strlen = None tag_strlen = None
max_line_level = 0 max_line_level = 0
tags = {'id' : signature.id, 'offset' : offset, 'invalid' : False} tags = {'id' : signature.id, 'offset' : offset, 'invalid' : False}
# Apply each line of the signature to self.data, starting at the specified offset
for line in signature.lines: for line in signature.lines:
# Ignore indentation levels above the current max indent level
if line.level <= max_line_level: if line.level <= max_line_level:
# If the relative offset of this signature line is just an integer value, use it
if isinstance(line.offset, int): if isinstance(line.offset, int):
line_offset = line.offset line_offset = line.offset
# Else, evaluate the complex expression
else: else:
line_offset = self._do_math(offset, line.offset) line_offset = self._do_math(offset, line.offset)
# The start of the data needed by this line is at offset + line_offset.
# The end of the data will be line.size bytes later.
start = offset + line_offset start = offset + line_offset
end = start + line.size end = start + line.size
# If the line has a packed format string, unpack it
if line.pkfmt: if line.pkfmt:
try: try:
dvalue = struct.unpack(line.pkfmt, binwalk.core.compat.str2bytes(self.data[start:end]))[0] dvalue = struct.unpack(line.pkfmt, binwalk.core.compat.str2bytes(self.data[start:end]))[0]
# Not enough bytes left in self.data for the specified format size
except struct.error as e: except struct.error as e:
dvalue = 0 dvalue = 0
# Else, this is a string
else: else:
# Wildcard strings have line.value == None # Wildcard strings have line.value == None
if line.value is None: if line.value is None:
# Check to see if this is a string whose size is known and has been specified on a previous
# signature line.
if [x for x in line.tags if x.name == 'string'] and binwalk.core.compat.has_key(tags, 'strlen'): if [x for x in line.tags if x.name == 'string'] and binwalk.core.compat.has_key(tags, 'strlen'):
dvalue = self.data[start:(start+tags['strlen'])] dvalue = self.data[start:(start+tags['strlen'])]
# Else, just terminate the string at the first newline, carriage return, or NULL byte
else: else:
dvalue = self.data[start:end].split('\x00')[0].split('\r')[0].split('\r')[0] dvalue = self.data[start:end].split('\x00')[0].split('\r')[0].split('\r')[0]
# Non-wildcard strings have a known length, specified in the signature line
else: else:
dvalue = self.data[start:end] dvalue = self.data[start:end]
# Some integer values have special operations that need to be performed on them
# before comparison (e.g., "belong&0x0000FFFF"). Complex math expressions are
# supported here as well.
if isinstance(dvalue, int) and line.operator: if isinstance(dvalue, int) and line.operator:
# If the operator value of this signature line is just an integer value, use it
if isinstance(line.opvalue, int): if isinstance(line.opvalue, int):
opval = line.opvalue opval = line.opvalue
# Else, evaluate the complex expression
else: else:
opval = self._do_math(offset, line.opvalue) opval = self._do_math(offset, line.opvalue)
# Perform the specified operation
if line.operator == '&': if line.operator == '&':
dvalue &= opval dvalue &= opval
elif line.operator == '|': elif line.operator == '|':
...@@ -325,6 +410,7 @@ class Magic(object): ...@@ -325,6 +410,7 @@ class Magic(object):
elif line.operator == '/': elif line.operator == '/':
dvalue /= opval dvalue /= opval
# Does the data (dvalue) match the specified comparison?
if ((line.value is None) or if ((line.value is None) or
(line.condition == '=' and dvalue == line.value) or (line.condition == '=' and dvalue == line.value) or
(line.condition == '>' and dvalue > line.value) or (line.condition == '>' and dvalue > line.value) or
...@@ -333,21 +419,32 @@ class Magic(object): ...@@ -333,21 +419,32 @@ class Magic(object):
(line.condition == '&' and (dvalue & line.value)) or (line.condition == '&' and (dvalue & line.value)) or
(line.condition == '|' and (dvalue | line.value))): (line.condition == '|' and (dvalue | line.value))):
# Up until this point, date fields are treated as integer values,
# but we want to display them as nicely formatted strings.
if line.type == 'date': if line.type == 'date':
ts = datetime.datetime.utcfromtimestamp(dvalue) ts = datetime.datetime.utcfromtimestamp(dvalue)
dvalue = ts.strftime("%Y-%m-%d %H:%M:%S") dvalue = ts.strftime("%Y-%m-%d %H:%M:%S")
# Format the description string
# TODO: This is too simplistic of a check. What if '%%' is in the format string?
if '%' in line.format: if '%' in line.format:
desc = line.format % dvalue desc = line.format % dvalue
else: else:
desc = line.format desc = line.format
# If there was any description string, append it to the list of description string parts
if desc: if desc:
description.append(desc) description.append(desc)
# Process tag keywords specified in the signature line. These have already been parsed out of the
# original format string so that they can be processed separately from the printed description string.
for tag in line.tags: for tag in line.tags:
# Format the tag string
# TODO: This is too simplistic of a check. What if '%%' is in the format string?
if isinstance(tag.value, str) and '%' in tag.value: if isinstance(tag.value, str) and '%' in tag.value:
tags[tag.name] = tag.value % dvalue tags[tag.name] = tag.value % dvalue
# Some tag values are intended to be integer values, so try to convert them as such
try: try:
tags[tag.name] = int(tags[tag.name], 0) tags[tag.name] = int(tags[tag.name], 0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
...@@ -355,6 +452,7 @@ class Magic(object): ...@@ -355,6 +452,7 @@ class Magic(object):
except Exception as e: except Exception as e:
pass pass
else: else:
# Some tag values are intended to be integer values, so try to convert them as such
try: try:
tags[tag.name] = int(tag.value, 0) tags[tag.name] = int(tag.value, 0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
...@@ -362,18 +460,24 @@ class Magic(object): ...@@ -362,18 +460,24 @@ class Magic(object):
except Exception as e: except Exception as e:
tags[tag.name] = tag.value tags[tag.name] = tag.value
# Abort abort abort # Abort processing soon as this signature is marked invalid, unless invalid results
# were explicitly requested. This means that the sooner invalid checks are made in a
# given signature, the faster the scan can filter out false positives.
if not self.show_invalid and tags['invalid']: if not self.show_invalid and tags['invalid']:
break break
# If this line satisfied its comparison, +1 the max indentation level
max_line_level = line.level + 1 max_line_level = line.level + 1
else: else:
# No match on the first line, abort # No match on the first line, abort
if line.level == 0: if line.level == 0:
break break
else: else:
# If this line did not satisfy its comparison, then higher
# indentation levels will not be accepted.
max_line_level = line.level max_line_level = line.level
# Join the formatted description strings and remove backspace characters (plus the preceeding character as well)
tags['description'] = self.bspace.sub('', " ".join(description)) tags['description'] = self.bspace.sub('', " ".join(description))
# This should never happen # This should never happen
...@@ -381,29 +485,58 @@ class Magic(object): ...@@ -381,29 +485,58 @@ class Magic(object):
tags['display'] = False tags['display'] = False
tags['invalid'] = True tags['invalid'] = True
# If the formatted string contains non-printable characters, consider it invalid
if self.printable.match(tags['description']).group() != tags['description']: if self.printable.match(tags['description']).group() != tags['description']:
tags['invalid'] = True tags['invalid'] = True
return tags return tags
def scan(self, data, dlen=None): def scan(self, data, dlen=None):
'''
Scan a data block for matching signatures.
@data - A string of data to scan.
@dlen - If specified, signatures at offsets larger than dlen will be ignored.
Returns a list of SignatureResult objects.
'''
results = [] results = []
matched_offsets = set() matched_offsets = set()
# It's expensive in Python to pass large strings around to various functions.
# Since data can potentially be quite a large string, make it available to other
# methods via a class attribute so that it doesn't need to be passed around to
# different methods over and over again.
self.data = data self.data = data
# If dlen wasn't specified, search all of self.data
if dlen is None: if dlen is None:
dlen = len(self.data) dlen = len(self.data)
# Loop through each loaded signature
for signature in self.signatures: for signature in self.signatures:
# Use regex to search the data block for potential signature matches (fast)
for match in signature.regex.finditer(self.data): for match in signature.regex.finditer(self.data):
# Take the offset of the start of the signature into account
offset = match.start() - signature.offset offset = match.start() - signature.offset
# Signatures are orderd based on the length of their magic bytes (largest first).
# If this offset has already been matched to a previous signature, ignore it unless
# self.show_invalid has been specified. Also ignore obviously invalid offsets (<1)
# as well as those outside the specified self.data range (dlen).
if (offset not in matched_offsets or self.show_invalid) and offset >= 0 and offset <= dlen: if (offset not in matched_offsets or self.show_invalid) and offset >= 0 and offset <= dlen:
# Analyze the data at this offset using the current signature rule
tags = self._analyze(signature, offset) tags = self._analyze(signature, offset)
# Generate a SignatureResult object and append it to the results list if the
# signature is valid, or if invalid results were requested.
if not tags['invalid'] or self.show_invalid: if not tags['invalid'] or self.show_invalid:
results.append(SignatureResult(**tags)) results.append(SignatureResult(**tags))
# Add this offset to the matched_offsets set, so that it can be ignored by
# subsequent loops.
matched_offsets.add(offset) matched_offsets.add(offset)
# Sort results by offset
results.sort(key=lambda x: x.offset, reverse=False) results.sort(key=lambda x: x.offset, reverse=False)
return results return results
def load(self, fname): def load(self, fname):
...@@ -420,25 +553,47 @@ class Magic(object): ...@@ -420,25 +553,47 @@ class Magic(object):
fp.close() fp.close()
def parse(self, lines): def parse(self, lines):
'''
Parse signature file lines.
@lines - A list of lines from a signature file.
Returns None.
'''
signature = None signature = None
for line in lines: for line in lines:
# Split at the first comment delimiter (if any) and strip the result
line = line.split('#')[0].strip() line = line.split('#')[0].strip()
# Ignore blank lines and lines that are nothing but comments
if line: if line:
# Parse this signature line
sigline = SignatureLine(line) sigline = SignatureLine(line)
# Level 0 means the first line of a signature entry
if sigline.level == 0: if sigline.level == 0:
# If there is an existing signature, append it to the signature list,
# unless the text in its title field has been filtered by user-defined
# filter rules.
if signature: if signature:
if not self._filtered(signature.title): if not self._filtered(signature.title):
self.signatures.append(signature) self.signatures.append(signature)
# Create a new signature object; use the size of self.signatures to
# assign each signature a unique ID.
signature = Signature(len(self.signatures), sigline) signature = Signature(len(self.signatures), sigline)
# Else, just append this line to the existing signature
elif signature: elif signature:
signature.append(sigline) signature.append(sigline)
# If this is not the first line of a signature entry and there is no other
# existing signature entry, something is very wrong with the signature file.
else: else:
raise ParserException("Invalid signature line: '%s'" % line) raise ParserException("Invalid signature line: '%s'" % line)
# Add the final signature to the signature list
if signature: if signature:
if not self._filtered(signature.lines[0].format): if not self._filtered(signature.lines[0].format):
self.signatures.append(signature) self.signatures.append(signature)
# Sort signatures by confidence (aka, length of their magic bytes), largest first
self.signatures.sort(key=lambda x: x.confidence, reverse=True) self.signatures.sort(key=lambda x: x.confidence, reverse=True)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment