Commit 5a52a5a8 by devttys0

Cleaned up smart.Signature code, fixed Python3 ctypes bugs.

parent 881f0c18
import re import re
import binwalk.core.common as common import binwalk.core.common as common
from binwalk.core.smart import SmartSignature from binwalk.core.smart import Signature
from binwalk.core.compat import * from binwalk.core.compat import *
class Filter: class Filter:
...@@ -32,7 +32,7 @@ class Filter: ...@@ -32,7 +32,7 @@ class Filter:
self.grep_filters = [] self.grep_filters = []
self.show_invalid_results = show_invalid_results self.show_invalid_results = show_invalid_results
self.exclusive_filter = False self.exclusive_filter = False
self.smart = SmartSignature(self) self.smart = Signature(self)
def include(self, match, exclusive=True): def include(self, match, exclusive=True):
''' '''
...@@ -134,7 +134,7 @@ class Filter: ...@@ -134,7 +134,7 @@ class Filter:
# Don't include quoted strings or keyword arguments in this search, as # Don't include quoted strings or keyword arguments in this search, as
# strings from the target file may legitimately contain the INVALID_RESULT text. # strings from the target file may legitimately contain the INVALID_RESULT text.
if self.INVALID_RESULT in common.strip_quoted_strings(self.smart._strip_tags(data)): if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)):
return False return False
# There should be no non-printable characters in any of the data # There should be no non-printable characters in any of the data
......
...@@ -3,7 +3,36 @@ import binwalk.core.module ...@@ -3,7 +3,36 @@ import binwalk.core.module
from binwalk.core.compat import * from binwalk.core.compat import *
from binwalk.core.common import get_quoted_strings, MathExpression from binwalk.core.common import get_quoted_strings, MathExpression
class SmartSignature: class Tag(object):
TAG_DELIM_START = "{"
TAG_DELIM_END = "}"
TAG_ARG_SEPERATOR = ":"
def __init__(self, **kwargs):
self.name = None
self.keyword = None
self.type = None
self.handler = None
self.tag = None
for (k,v) in iterator(kwargs):
setattr(self, k, v)
if self.keyword is not None:
self.tag = self.TAG_DELIM_START + self.keyword
if self.type is None:
self.tag += self.TAG_DELIM_END
else:
self.tag += self.TAG_ARG_SEPERATOR
if self.handler is None:
if self.type == int:
self.handler = 'get_math_arg'
elif self.type == str:
self.handler = 'get_keyword_arg'
class Signature(object):
''' '''
Class for parsing smart signature tags in libmagic result strings. Class for parsing smart signature tags in libmagic result strings.
...@@ -12,29 +41,28 @@ class SmartSignature: ...@@ -12,29 +41,28 @@ class SmartSignature:
from binwalk import SmartSignature from binwalk import SmartSignature
for (i, keyword) in SmartSignature().KEYWORDS.iteritems(): for tag in SmartSignature.TAGS:
print keyword print tag.keyword
''' '''
KEYWORD_DELIM_START = "{" TAGS = [
KEYWORD_DELIM_END = "}" Tag(name='raw-string', keyword='raw-string', handler='parse_raw_string'),
KEYWORDS = { Tag(name='string-len', keyword='string-len', handler='parse_string_len'),
'jump' : '%sjump-to-offset:' % KEYWORD_DELIM_START, Tag(name='math', keyword='math', handler='parse_math'),
'filename' : '%sfile-name:' % KEYWORD_DELIM_START, Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'),
'filesize' : '%sfile-size:' % KEYWORD_DELIM_START,
'raw-string' : '%sraw-string:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block Tag(name='jump', keyword='jump-to-offset', type=int),
'string-len' : '%sstring-len:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block Tag(name='name', keyword='file-name', type=str),
'raw-size' : '%sraw-string-length:' % KEYWORD_DELIM_START, Tag(name='size', keyword='file-size', type=int),
'adjust' : '%soffset-adjust:' % KEYWORD_DELIM_START, Tag(name='adjust', keyword='offset-adjust', type=int),
'delay' : '%sextract-delay:' % KEYWORD_DELIM_START, Tag(name='delay', keyword='extract-delay', type=str),
'year' : '%sfile-year:' % KEYWORD_DELIM_START, Tag(name='year', keyword='file-year', type=str),
'epoch' : '%sfile-epoch:' % KEYWORD_DELIM_START, Tag(name='epoch', keyword='file-epoch', type=int),
'math' : '%smath:' % KEYWORD_DELIM_START,
Tag(name='raw-size', keyword='raw-string-length'),
'raw-replace' : '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END), Tag(name='raw-replace', keyword='raw-replace'),
'one-of-many' : '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END), Tag(name='string-len-replace', keyword='string-len'),
'string-len-replace' : '%sstring-len%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END), ]
}
def __init__(self, filter, ignore_smart_signatures=False): def __init__(self, filter, ignore_smart_signatures=False):
''' '''
...@@ -58,20 +86,7 @@ class SmartSignature: ...@@ -58,20 +86,7 @@ class SmartSignature:
Returns a dictionary of parsed values. Returns a dictionary of parsed values.
''' '''
results = { results = {}
'offset' : '', # Offset where the match was found, filled in by Binwalk.single_scan.
'description' : '', # The libmagic data string, stripped of all keywords
'name' : '', # The original name of the file, if known
'delay' : '', # Extract delay description
'extract' : '', # Name of the extracted file, filled in by Binwalk.single_scan.
'jump' : 0, # The relative offset to resume the scan from
'size' : 0, # The size of the file, if known
'adjust' : 0, # The relative offset to add to the reported offset
'year' : 0, # The file's creation/modification year, if reported in the signature
'epoch' : 0, # The file's creation/modification epoch time, if reported in the signature
'valid' : True, # Set to False if parsed numerical values appear invalid
}
self.valid = True self.valid = True
# If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious), # If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious),
...@@ -79,57 +94,28 @@ class SmartSignature: ...@@ -79,57 +94,28 @@ class SmartSignature:
if self.ignore_smart_signatures: if self.ignore_smart_signatures:
results['description'] = data results['description'] = data
else: else:
# Calculate and replace special keywords/values for tag in self.TAGS:
data = self._parse_raw_strings(data) if tag.handler is not None:
data = self._parse_string_len(data) (data, arg) = getattr(self, tag.handler)(data, tag)
data = self._replace_maths(data)
# Parse the offset-adjust value. This is used to adjust the reported offset at which
# a signature was located due to the fact that MagicParser.match expects all signatures
# to be located at offset 0, which some wil not be.
results['adjust'] = self._get_math_arg(data, 'adjust')
# Parse the file-size value. This is used to determine how many bytes should be extracted
# when extraction is enabled. If not specified, everything to the end of the file will be
# extracted (see Binwalk.scan).
try:
results['size'] = int(self._get_math_arg(data, 'filesize'), 0)
except KeyboardInterrupt as e:
raise e
except Exception:
pass
try: if isinstance(arg, type(False)) and arg == False:
results['year'] = int(self._get_keyword_arg(data, 'year'), 0) self.valid = False
except KeyboardInterrupt as e: elif tag.type is not None:
raise e results[tag.name] = arg
except Exception:
pass
try:
results['epoch'] = int(self._get_keyword_arg(data, 'epoch'), 0)
except KeyboardInterrupt as e:
raise e
except Exception:
pass
results['delay'] = self._get_keyword_arg(data, 'delay')
# Parse the string for the jump-to-offset keyword.
# This keyword is honored, even if this string result is one of many.
results['jump'] = self._get_math_arg(data, 'jump')
# If this is one of many, don't do anything and leave description as a blank string. results['description'] = self.strip_tags(data)
# Else, strip all keyword tags from the string and process additional keywords as necessary.
if not self._one_of_many(data):
results['name'] = self._get_keyword_arg(data, 'filename').strip('"')
results['description'] = self._strip_tags(data)
results['valid'] = self.valid results['valid'] = self.valid
return binwalk.core.module.Result(**results) return binwalk.core.module.Result(**results)
def _is_valid(self, data): def tag_lookup(self, keyword):
for tag in self.TAGS:
if tag.keyword == keyword:
return tag
return None
def is_valid(self, data):
''' '''
Validates that result data does not contain smart keywords in file-supplied strings. Validates that result data does not contain smart keywords in file-supplied strings.
...@@ -144,15 +130,15 @@ class SmartSignature: ...@@ -144,15 +130,15 @@ class SmartSignature:
quoted_data = get_quoted_strings(data) quoted_data = get_quoted_strings(data)
# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter # Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
if quoted_data and self.KEYWORD_DELIM_START in quoted_data: if quoted_data and Tag.TAG_DELIM_START in quoted_data:
# If so, check to see if the quoted data contains any of our keywords. # If so, check to see if the quoted data contains any of our keywords.
# If any keywords are found inside of quoted data, consider the keywords invalid. # If any keywords are found inside of quoted data, consider the keywords invalid.
for (name, keyword) in iterator(self.KEYWORDS): for tag in self.TAGS:
if keyword in quoted_data: if tag.tag in quoted_data:
return False return False
return True return True
def _safe_string(self, data): def safe_string(self, data):
''' '''
Strips out quoted data (i.e., data taken directly from a file). Strips out quoted data (i.e., data taken directly from a file).
''' '''
...@@ -161,28 +147,28 @@ class SmartSignature: ...@@ -161,28 +147,28 @@ class SmartSignature:
data = data.replace(quoted_string, "") data = data.replace(quoted_string, "")
return data return data
def _one_of_many(self, data): def one_of_many(self, data, tag):
''' '''
Determines if a given data string is one result of many. Determines if a given data string is one result of many.
@data - String result data. @data - String result data.
Returns True if the string result is one of many. Returns False if the string result is one of many and should not be displayed.
Returns False if the string result is not one of many. Returns True if the string result is not one of many and should be displayed.
''' '''
if self.filter.valid_result(data): if self.filter.valid_result(data):
if self.last_one_of_many is not None and data.startswith(self.last_one_of_many): if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
return True return (data, False)
if self.KEYWORDS['one-of-many'] in data: if tag.tag in data:
# Only match on the data before the first comma, as that is typically unique and static # Only match on the data before the first comma, as that is typically unique and static
self.last_one_of_many = data.split(',')[0] self.last_one_of_many = data.split(',')[0]
else: else:
self.last_one_of_many = None self.last_one_of_many = None
return False return (data, True)
def _get_keyword_arg(self, data, keyword): def get_keyword_arg(self, data, tag):
''' '''
Retrieves the argument for keywords that specify arguments. Retrieves the argument for keywords that specify arguments.
...@@ -193,14 +179,14 @@ class SmartSignature: ...@@ -193,14 +179,14 @@ class SmartSignature:
Returns a blank string on failure. Returns a blank string on failure.
''' '''
arg = '' arg = ''
data = self._safe_string(data) data = self.safe_string(data)
if has_key(self.KEYWORDS, keyword) and self.KEYWORDS[keyword] in data: if tag.tag in data:
arg = data.split(self.KEYWORDS[keyword])[1].split(self.KEYWORD_DELIM_END)[0] arg = data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0]
return arg return (data, arg)
def _get_math_arg(self, data, keyword): def get_math_arg(self, data, tag):
''' '''
Retrieves the argument for keywords that specifiy mathematical expressions as arguments. Retrieves the argument for keywords that specifiy mathematical expressions as arguments.
...@@ -211,37 +197,16 @@ class SmartSignature: ...@@ -211,37 +197,16 @@ class SmartSignature:
''' '''
value = 0 value = 0
arg = self._get_keyword_arg(data, keyword) (data, arg) = self.get_keyword_arg(data, tag)
if arg: if arg:
value = MathExpression(arg).value value = MathExpression(arg).value
if value is None: if value is None:
value = 0 value = 0
self.valid = False self.valid = False
return value return (data, value)
def _jump(self, data):
'''
Obtains the jump-to-offset value of a signature, if any.
@data - String result data.
Returns the offset to jump to. def parse_math(self, data, tag):
'''
offset = 0
offset_str = self._get_keyword_arg(data, 'jump')
if offset_str:
try:
offset = int(offset_str, 0)
except KeyboardInterrupt as e:
raise e
except Exception:
pass
return offset
def _replace_maths(self, data):
''' '''
Replace math keywords with the requested values. Replace math keywords with the requested values.
...@@ -249,15 +214,15 @@ class SmartSignature: ...@@ -249,15 +214,15 @@ class SmartSignature:
Returns the modified string result data. Returns the modified string result data.
''' '''
while self.KEYWORDS['math'] in data: while tag.keyword in data:
arg = self._get_keyword_arg(data, 'math') (data, arg) = self.get_keyword_arg(data, tag.name)
v = '%s%s%s' % (self.KEYWORDS['math'], arg, self.KEYWORD_DELIM_END) v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END)
math_value = "%d" % self._get_math_arg(data, 'math') math_value = "%d" % self.get_math_arg(data, tag.name)
data = data.replace(v, math_value) data = data.replace(v, math_value)
return data return (data, None)
def _parse_raw_strings(self, data): def parse_raw_string(self, data, raw_str_tag):
''' '''
Process strings that aren't NULL byte terminated, but for which we know the string length. Process strings that aren't NULL byte terminated, but for which we know the string length.
This should be called prior to any other smart parsing functions. This should be called prior to any other smart parsing functions.
...@@ -266,25 +231,27 @@ class SmartSignature: ...@@ -266,25 +231,27 @@ class SmartSignature:
Returns a parsed string. Returns a parsed string.
''' '''
if not self.ignore_smart_signatures and self._is_valid(data): if not self.ignore_smart_signatures and self.is_valid(data):
raw_size_tag = self.tag_lookup('raw-size')
raw_replace_tag = self.tag_lookup('raw-replace')
# Get the raw string keyword arg # Get the raw string keyword arg
raw_string = self._get_keyword_arg(data, 'raw-string') (data, raw_string) = self.get_keyword_arg(data, raw_str_tag)
# Was a raw string keyword specified? # Was a raw string keyword specified?
if raw_string: if raw_string:
# Get the raw string length arg # Get the raw string length arg
raw_size = self._get_keyword_arg(data, 'raw-size') (data, raw_size) = self.get_math_arg(data, raw_size_tag)
# Is the raw string length arg is a numeric value?
if re.match('^-?[0-9]+$', raw_size):
# Replace all instances of raw-replace in data with raw_string[:raw_size] # Replace all instances of raw-replace in data with raw_string[:raw_size]
# Also strip out everything after the raw-string keyword, including the keyword itself. # Also strip out everything after the raw-string keyword, including the keyword itself.
# Failure to do so may (will) result in non-printable characters and this string will be # Failure to do so may (will) result in non-printable characters and this string will be
# marked as invalid when it shouldn't be. # marked as invalid when it shouldn't be.
data = data[:data.find(self.KEYWORDS['raw-string'])].replace(self.KEYWORDS['raw-replace'], '"' + raw_string[:int(raw_size, 0)] + '"') data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"')
return data
return (data, True)
def _parse_string_len(self, data): def parse_string_len(self, data, str_len_tag):
''' '''
Process {string-len} macros. Process {string-len} macros.
...@@ -292,14 +259,16 @@ class SmartSignature: ...@@ -292,14 +259,16 @@ class SmartSignature:
Returns parsed data string. Returns parsed data string.
''' '''
if not self.ignore_smart_signatures and self._is_valid(data): if not self.ignore_smart_signatures and self.is_valid(data):
str_len_replace_tag = self.tag_lookup('string-len-replace')
# Get the raw string keyword arg # Get the raw string keyword arg
raw_string = self._get_keyword_arg(data, 'string-len') (data, raw_string) = self.get_keyword_arg(data, str_len_tag)
# Was a string-len keyword specified? # Was a string-len keyword specified?
if raw_string: if raw_string:
# Convert the string to an integer as a sanity check # Get the string length
try: try:
string_length = '%d' % len(raw_string) string_length = '%d' % len(raw_string)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
...@@ -309,10 +278,11 @@ class SmartSignature: ...@@ -309,10 +278,11 @@ class SmartSignature:
# Strip out *everything* after the string-len keyword, including the keyword itself. # Strip out *everything* after the string-len keyword, including the keyword itself.
# Failure to do so can potentially allow keyword injection from a maliciously created file. # Failure to do so can potentially allow keyword injection from a maliciously created file.
data = data.split(self.KEYWORDS['string-len'])[0].replace(self.KEYWORDS['string-len-replace'], string_length) data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length)
return data
return (data, True)
def _strip_tags(self, data): def strip_tags(self, data):
''' '''
Strips the smart tags from a result string. Strips the smart tags from a result string.
...@@ -321,10 +291,10 @@ class SmartSignature: ...@@ -321,10 +291,10 @@ class SmartSignature:
Returns a sanitized string. Returns a sanitized string.
''' '''
if not self.ignore_smart_signatures: if not self.ignore_smart_signatures:
for (name, keyword) in iterator(self.KEYWORDS): for tag in self.TAGS:
start = data.find(keyword) start = data.find(tag.tag)
if start != -1: if start != -1:
end = data[start:].find(self.KEYWORD_DELIM_END) end = data[start:].find(tag.TAG_DELIM_END)
if end != -1: if end != -1:
data = data.replace(data[start:start+end+1], "") data = data.replace(data[start:start+end+1], "")
return data return data
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import os import os
import ctypes import ctypes
import ctypes.util import ctypes.util
from binwalk.core.compat import str2bytes
from binwalk.core.module import Option, Kwarg, Module from binwalk.core.module import Option, Kwarg, Module
class Deflate(object): class Deflate(object):
...@@ -46,7 +47,7 @@ class Deflate(object): ...@@ -46,7 +47,7 @@ class Deflate(object):
def decompress(self, data): def decompress(self, data):
description = None description = None
decomp_size = self.tinfl.is_deflated(data, len(data), 0) decomp_size = self.tinfl.is_deflated(str2bytes(data), len(data), 0)
if decomp_size >= self.MIN_DECOMP_SIZE: if decomp_size >= self.MIN_DECOMP_SIZE:
description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size
......
...@@ -48,8 +48,8 @@ class Signature(Module): ...@@ -48,8 +48,8 @@ class Signature(Module):
VERBOSE_HEADER_FORMAT = "%s %d" VERBOSE_HEADER_FORMAT = "%s %d"
def init(self): def init(self):
# Create SmartSignature and MagicParser class instances. These are mostly for internal use. # Create Signature and MagicParser class instances. These are mostly for internal use.
self.smart = binwalk.core.smart.SmartSignature(self.config.filter, ignore_smart_signatures=self.dumb_scan) self.smart = binwalk.core.smart.Signature(self.config.filter, ignore_smart_signatures=self.dumb_scan)
self.parser = binwalk.core.parser.MagicParser(self.config.filter, self.smart) self.parser = binwalk.core.parser.MagicParser(self.config.filter, self.smart)
# If a raw byte sequence was specified, build a magic file from that instead of using the default magic files # If a raw byte sequence was specified, build a magic file from that instead of using the default magic files
......
import ctypes import ctypes
import ctypes.util import ctypes.util
from binwalk.core.compat import str2bytes
from binwalk.core.common import BlockFile from binwalk.core.common import BlockFile
class Plugin(object): class Plugin(object):
...@@ -23,9 +24,9 @@ class Plugin(object): ...@@ -23,9 +24,9 @@ class Plugin(object):
# If this result is a zlib signature match, try to decompress the data # If this result is a zlib signature match, try to decompress the data
if self.tinfl and result.file and result.description.lower().startswith('zlib'): if self.tinfl and result.file and result.description.lower().startswith('zlib'):
# Seek to and read the suspected zlib data # Seek to and read the suspected zlib data
fd = self.module.config.open_file(result.file.name, offset=result.offset) fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
#BlockFile(result.file.name, offset=result.offset, swap=self.module.config.swap_size) # Python3 ctypes needs a bytes object, not a str
data = fd.read(self.MAX_DATA_SIZE) data = str2bytes(fd.read(self.MAX_DATA_SIZE))
fd.close() fd.close()
# Check if this is valid zlib data # Check if this is valid zlib data
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment