Commit 7a9b037a by devttys0

Finished signature and fuzzy hash modules. Added --swap option. Fixed argv parsing bugs.

parent 3969222a
...@@ -220,7 +220,7 @@ class BlockFile(io.FileIO): ...@@ -220,7 +220,7 @@ class BlockFile(io.FileIO):
# limit disk I/O, but small enough to limit the size of processed data blocks. # limit disk I/O, but small enough to limit the size of processed data blocks.
READ_BLOCK_SIZE = 1 * 1024 * 1024 READ_BLOCK_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE): def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE, trail=MAX_TRAILING_SIZE, swap=0):
''' '''
Class constructor. Class constructor.
...@@ -228,10 +228,14 @@ class BlockFile(io.FileIO): ...@@ -228,10 +228,14 @@ class BlockFile(io.FileIO):
@mode - Mode to open the file in (default: 'r'). @mode - Mode to open the file in (default: 'r').
@length - Maximum number of bytes to read from the file via self.block_read(). @length - Maximum number of bytes to read from the file via self.block_read().
@offset - Offset at which to start reading from the file. @offset - Offset at which to start reading from the file.
@block - Size of data block to read (excluding any trailing size),
@trail - Size of trailing data to append to the end of each block.
@swap - Swap every n bytes of data.
Returns None. Returns None.
''' '''
self.total_read = 0 self.total_read = 0
self.swap_size = swap
# Python 2.6 doesn't like modes like 'rb' or 'wb' # Python 2.6 doesn't like modes like 'rb' or 'wb'
mode = mode.replace('b', '') mode = mode.replace('b', '')
...@@ -267,6 +271,9 @@ class BlockFile(io.FileIO): ...@@ -267,6 +271,9 @@ class BlockFile(io.FileIO):
if block > 0: if block > 0:
self.READ_BLOCK_SIZE = block self.READ_BLOCK_SIZE = block
if trail > 0:
self.MAX_TRAILING_SIZE = trail
io.FileIO.__init__(self, fname, mode) io.FileIO.__init__(self, fname, mode)
...@@ -278,6 +285,27 @@ class BlockFile(io.FileIO): ...@@ -278,6 +285,27 @@ class BlockFile(io.FileIO):
self.seek(self.offset) self.seek(self.offset)
def _swap_data_block(self, block):
'''
Reverses every self.swap_size bytes inside the specified data block.
Size of data block must be a multiple of self.swap_size.
@block - The data block to swap.
Returns a swapped string.
'''
i = 0
data = ""
if self.swap_size > 0:
while i < len(block):
data += block[i:i+self.swap_size][::-1]
i += self.swap_size
else:
data = block
return data
def write(self, data): def write(self, data):
''' '''
Writes data to the opened file. Writes data to the opened file.
...@@ -317,7 +345,7 @@ class BlockFile(io.FileIO): ...@@ -317,7 +345,7 @@ class BlockFile(io.FileIO):
break break
self.total_read += len(data) self.total_read += len(data)
return bytes2str(data) return self._swap_data_block(bytes2str(data))
def _internal_read(self, n=-1): def _internal_read(self, n=-1):
''' '''
......
...@@ -13,22 +13,21 @@ class ModuleOption(object): ...@@ -13,22 +13,21 @@ class ModuleOption(object):
A container class that allows modules to declare command line options. A container class that allows modules to declare command line options.
''' '''
def __init__(self, kwargs={}, nargs=0, priority=0, description="", short="", long="", type=str, dtype=""): def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=""):
''' '''
Class constructor. Class constructor.
@kwargs - A dictionary of kwarg key-value pairs affected by this command line option. @kwargs - A dictionary of kwarg key-value pairs affected by this command line option.
@nargs - The number of arguments this option accepts (only 1 or 0 is currently supported).
@priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options. @priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options.
@description - A description to be displayed in the help output. @description - A description to be displayed in the help output.
@short - The short option to use (optional). @short - The short option to use (optional).
@long - The long option to use (if None, this option will not be displayed in help output). @long - The long option to use (if None, this option will not be displayed in help output).
@type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.common.BlockFile, list, str, int, float). @type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.common.BlockFile, list, str, int, float).
@dtype - The displayed accepted type string, to be shown in help output.
Returns None. Returns None.
''' '''
self.kwargs = kwargs self.kwargs = kwargs
self.nargs = nargs
self.priority = priority self.priority = priority
self.description = description self.description = description
self.short = short self.short = short
...@@ -397,7 +396,7 @@ class Modules(object): ...@@ -397,7 +396,7 @@ class Modules(object):
if module_option.long: if module_option.long:
long_opt = '--' + module_option.long long_opt = '--' + module_option.long
if module_option.nargs > 0: if module_option.type is not None:
optargs = "=<%s>" % module_option.dtype optargs = "=<%s>" % module_option.dtype
else: else:
optargs = "" optargs = ""
...@@ -464,9 +463,6 @@ class Modules(object): ...@@ -464,9 +463,6 @@ class Modules(object):
return kwargs return kwargs
def _is_file(self, fname):
return (not fname.startswith('-')) and (os.path.exists(fname) or fname.startswith('./') or fname.startswith('/'))
def argv(self, module, argv=sys.argv[1:]): def argv(self, module, argv=sys.argv[1:]):
''' '''
Processes argv for any options specific to the specified module. Processes argv for any options specific to the specified module.
...@@ -482,15 +478,17 @@ class Modules(object): ...@@ -482,15 +478,17 @@ class Modules(object):
shorts = "" shorts = ""
parser = argparse.ArgumentParser(add_help=False) parser = argparse.ArgumentParser(add_help=False)
# TODO: Add all arguments for all modules to parser so that the only unknowns will be file names. # Must build arguments from all modules so that:
# Only return arguments for the specified module though. #
if hasattr(module, "CLI"): # 1) Any conflicting arguments will raise an exception
# 2) The only unknown arguments will be the target files, making them easy to identify
for m in self.list(attribute="CLI"):
for module_option in module.CLI: for module_option in m.CLI:
if not module_option.long: if not module_option.long:
continue continue
if module_option.nargs == 0: if module_option.type is None:
action = 'store_true' action = 'store_true'
else: else:
action = None action = None
...@@ -500,47 +498,44 @@ class Modules(object): ...@@ -500,47 +498,44 @@ class Modules(object):
else: else:
parser.add_argument('--' + module_option.long, action=action, dest=module_option.long) parser.add_argument('--' + module_option.long, action=action, dest=module_option.long)
args, unknown = parser.parse_known_args(argv) args, unknown = parser.parse_known_args(argv)
args = args.__dict__ args = args.__dict__
for module_option in module.CLI: # Only add parsed options pertinent to the requested module
for module_option in module.CLI:
if module_option.type == binwalk.common.BlockFile:
if module_option.type == binwalk.common.BlockFile:
for k in get_keys(module_option.kwargs):
kwargs[k] = [] for k in get_keys(module_option.kwargs):
for unk in unknown: kwargs[k] = []
if self._is_file(unk): for unk in unknown:
kwargs[k].append(unk) kwargs[k].append(unk)
elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]: elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]:
i = 0 for (name, value) in iterator(module_option.kwargs):
for (name, value) in iterator(module_option.kwargs): if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
if module_option.nargs > i: if module_option.type is not None:
value = args[module_option.long] value = args[module_option.long]
i += 1
last_priority[name] = module_option.priority
last_priority[name] = module_option.priority
# Do this manually as argparse doesn't seem to be able to handle hexadecimal values
# Do this manually as argparse doesn't seem to be able to handle hexadecimal values if module_option.type == int:
if module_option.type == int: kwargs[name] = int(value, 0)
kwargs[name] = int(value, 0) elif module_option.type == float:
elif module_option.type == float: kwargs[name] = float(value)
kwargs[name] = float(value) elif module_option.type == dict:
elif module_option.type == dict: if not has_key(kwargs, name):
if not has_key(kwargs, name): kwargs[name] = {}
kwargs[name] = {} kwargs[name][len(kwargs[name])] = value
kwargs[name][len(kwargs[name])] = value elif module_option.type == list:
elif module_option.type == list: if not has_key(kwargs, name):
if not has_key(kwargs, name): kwargs[name] = []
kwargs[name] = [] kwargs[name].append(value)
kwargs[name].append(value) else:
else: kwargs[name] = value
kwargs[name] = value
else:
raise Exception("binwalk.module.Modules.argv: %s has no attribute 'CLI'" % str(module))
if not has_key(kwargs, 'enabled'): if not has_key(kwargs, 'enabled'):
kwargs['enabled'] = False kwargs['enabled'] = False
......
...@@ -27,7 +27,6 @@ class Plotter(binwalk.module.Module): ...@@ -27,7 +27,6 @@ class Plotter(binwalk.module.Module):
long='max-points', long='max-points',
type=int, type=int,
kwargs={'max_points' : 0}, kwargs={'max_points' : 0},
nargs=1,
description='Set the maximum number of plotted data points'), description='Set the maximum number of plotted data points'),
binwalk.module.ModuleOption(short='V', binwalk.module.ModuleOption(short='V',
long='show-grids', long='show-grids',
......
...@@ -15,25 +15,26 @@ class Configuration(binwalk.module.Module): ...@@ -15,25 +15,26 @@ class Configuration(binwalk.module.Module):
CLI = [ CLI = [
binwalk.module.ModuleOption(long='length', binwalk.module.ModuleOption(long='length',
short='l', short='l',
nargs=1,
type=int, type=int,
kwargs={'length' : 0}, kwargs={'length' : 0},
description='Number of bytes to scan'), description='Number of bytes to scan'),
binwalk.module.ModuleOption(long='offset', binwalk.module.ModuleOption(long='offset',
short='o', short='o',
nargs=1,
type=int, type=int,
kwargs={'offset' : 0}, kwargs={'offset' : 0},
description='Start scan at this file offset'), description='Start scan at this file offset'),
binwalk.module.ModuleOption(long='block', binwalk.module.ModuleOption(long='block',
short='K', short='K',
nargs=1,
type=int, type=int,
kwargs={'block' : 0}, kwargs={'block' : 0},
description='Set file block size'), description='Set file block size'),
binwalk.module.ModuleOption(long='swap',
short='g',
type=int,
kwargs={'swap_size' : 0},
description='Reverse every n bytes before scanning'),
binwalk.module.ModuleOption(long='log', binwalk.module.ModuleOption(long='log',
short='f', short='f',
nargs=1,
type=argparse.FileType, type=argparse.FileType,
kwargs={'log_file' : None}, kwargs={'log_file' : None},
description='Log results to file'), description='Log results to file'),
...@@ -68,6 +69,7 @@ class Configuration(binwalk.module.Module): ...@@ -68,6 +69,7 @@ class Configuration(binwalk.module.Module):
binwalk.module.ModuleKwarg(name='length', default=0), binwalk.module.ModuleKwarg(name='length', default=0),
binwalk.module.ModuleKwarg(name='offset', default=0), binwalk.module.ModuleKwarg(name='offset', default=0),
binwalk.module.ModuleKwarg(name='block', default=0), binwalk.module.ModuleKwarg(name='block', default=0),
binwalk.module.ModuleKwarg(name='swap_size', default=0),
binwalk.module.ModuleKwarg(name='log_file', default=None), binwalk.module.ModuleKwarg(name='log_file', default=None),
binwalk.module.ModuleKwarg(name='csv', default=False), binwalk.module.ModuleKwarg(name='csv', default=False),
binwalk.module.ModuleKwarg(name='format_to_terminal', default=False), binwalk.module.ModuleKwarg(name='format_to_terminal', default=False),
...@@ -130,7 +132,7 @@ class Configuration(binwalk.module.Module): ...@@ -130,7 +132,7 @@ class Configuration(binwalk.module.Module):
if not os.path.isdir(tfile): if not os.path.isdir(tfile):
# Make sure we can open the target files # Make sure we can open the target files
try: try:
fp = binwalk.common.BlockFile(tfile, length=self.length, offset=self.offset) fp = binwalk.common.BlockFile(tfile, length=self.length, offset=self.offset, swap=self.swap_size)
self.target_files.append(fp) self.target_files.append(fp)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
......
...@@ -33,9 +33,8 @@ class HashMatch(binwalk.module.Module): ...@@ -33,9 +33,8 @@ class HashMatch(binwalk.module.Module):
long='fuzzy', long='fuzzy',
kwargs={'enabled' : True}, kwargs={'enabled' : True},
description='Perform fuzzy hash matching on files/directories'), description='Perform fuzzy hash matching on files/directories'),
binwalk.module.ModuleOption(short='t', binwalk.module.ModuleOption(short='u',
long='cutoff', long='cutoff',
nargs=1,
priority=100, priority=100,
type=int, type=int,
kwargs={'cutoff' : DEFAULT_CUTOFF}, kwargs={'cutoff' : DEFAULT_CUTOFF},
...@@ -48,10 +47,18 @@ class HashMatch(binwalk.module.Module): ...@@ -48,10 +47,18 @@ class HashMatch(binwalk.module.Module):
long='same', long='same',
kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF}, kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are the same'), description='Only show files that are the same'),
binwalk.module.ModuleOption(short='', binwalk.module.ModuleOption(short='p',
long='diff', long='diff',
kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF}, kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are different'), description='Only show files that are different'),
binwalk.module.ModuleOption(short='n',
long='name',
kwargs={'filter_by_name' : True},
description='Only compare files whose base names are the same'),
binwalk.module.ModuleOption(short='L',
long='symlinks',
kwargs={'symlinks' : True},
description="Don't ignore symlinks"),
] ]
KWARGS = [ KWARGS = [
...@@ -64,6 +71,8 @@ class HashMatch(binwalk.module.Module): ...@@ -64,6 +71,8 @@ class HashMatch(binwalk.module.Module):
binwalk.module.ModuleKwarg(name='abspath', default=False), binwalk.module.ModuleKwarg(name='abspath', default=False),
binwalk.module.ModuleKwarg(name='matches', default={}), binwalk.module.ModuleKwarg(name='matches', default={}),
binwalk.module.ModuleKwarg(name='types', default={}), binwalk.module.ModuleKwarg(name='types', default={}),
binwalk.module.ModuleKwarg(name='filter_by_name', default=False),
binwalk.module.ModuleKwarg(name='symlinks', default=False),
] ]
# Requires libfuzzy.so # Requires libfuzzy.so
...@@ -130,7 +139,7 @@ class HashMatch(binwalk.module.Module): ...@@ -130,7 +139,7 @@ class HashMatch(binwalk.module.Module):
file1_dup = False file1_dup = False
file2_dup = False file2_dup = False
if not self.name or os.path.basename(file1) == os.path.basename(file2): if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2):
if os.path.exists(file1) and os.path.exists(file2): if os.path.exists(file1) and os.path.exists(file2):
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
......
...@@ -16,23 +16,46 @@ class Signature(binwalk.module.Module): ...@@ -16,23 +16,46 @@ class Signature(binwalk.module.Module):
description='Scan target file(s) for file signatures'), description='Scan target file(s) for file signatures'),
binwalk.module.ModuleOption(short='m', binwalk.module.ModuleOption(short='m',
long='magic', long='magic',
nargs=1,
kwargs={'magic_files' : []}, kwargs={'magic_files' : []},
type=[], type=list,
dtype='file', dtype='file',
description='Specify a custom magic file to use'), description='Specify a custom magic file to use'),
binwalk.module.ModuleOption(short='R', binwalk.module.ModuleOption(short='R',
long='raw-bytes', long='raw-bytes',
nargs=1,
kwargs={'raw_bytes' : None}, kwargs={'raw_bytes' : None},
type=str, type=str,
description='Specify a sequence of bytes to search for'), description='Specify a sequence of bytes to search for'),
binwalk.module.ModuleOption(short='b',
long='dumb',
kwargs={'dumb_scan' : True},
description='Disable smart signature keywords'),
binwalk.module.ModuleOption(short='I',
long='show-invalid',
kwargs={'show_invalid' : True},
description='Show results marked as invalid'),
binwalk.module.ModuleOption(short='x',
long='exclude',
kwargs={'exclude_filters' : []},
type=list,
dtype=str.__name__,
description='Exclude results that match <str>'),
binwalk.module.ModuleOption(short='y',
long='include',
kwargs={'include_filters' : []},
type=list,
dtype=str.__name__,
description='Only show results that match <str>'),
] ]
KWARGS = [ KWARGS = [
binwalk.module.ModuleKwarg(name='enabled', default=False), binwalk.module.ModuleKwarg(name='enabled', default=False),
binwalk.module.ModuleKwarg(name='magic_files', default=[]), binwalk.module.ModuleKwarg(name='dumb_scan', default=False),
binwalk.module.ModuleKwarg(name='show_invalid', default=False),
binwalk.module.ModuleKwarg(name='raw_bytes', default=None), binwalk.module.ModuleKwarg(name='raw_bytes', default=None),
binwalk.module.ModuleKwarg(name='magic_files', default=[]),
binwalk.module.ModuleKwarg(name='exclude_filters', default=[]),
binwalk.module.ModuleKwarg(name='include_filters', default=[]),
] ]
HEADER = ["DECIMAL", "HEX", "DESCRIPTION"] HEADER = ["DECIMAL", "HEX", "DESCRIPTION"]
...@@ -45,9 +68,15 @@ class Signature(binwalk.module.Module): ...@@ -45,9 +68,15 @@ class Signature(binwalk.module.Module):
def init(self): def init(self):
# Create SmartSignature and MagicParser class instances. These are mostly for internal use. # Create SmartSignature and MagicParser class instances. These are mostly for internal use.
self.filter = binwalk.filter.MagicFilter() self.filter = binwalk.filter.MagicFilter()
self.smart = binwalk.smartsignature.SmartSignature(self.filter, ignore_smart_signatures=False) self.smart = binwalk.smartsignature.SmartSignature(self.filter, ignore_smart_signatures=self.dumb_scan)
self.parser = binwalk.parser.MagicParser(self.filter, self.smart) self.parser = binwalk.parser.MagicParser(self.filter, self.smart)
# Set any specified include/exclude filters
for regex in self.exclude_filters:
self.filter.exclude(regex)
for regex in self.include_filters:
self.filter.include(regex)
# If a raw byte sequence was specified, build a magic file from that instead of using the default magic files # If a raw byte sequence was specified, build a magic file from that instead of using the default magic files
if self.raw_bytes is not None: if self.raw_bytes is not None:
self.magic_files = [self.parser.file_from_string(self.raw_bytes)] self.magic_files = [self.parser.file_from_string(self.raw_bytes)]
...@@ -72,14 +101,15 @@ class Signature(binwalk.module.Module): ...@@ -72,14 +101,15 @@ class Signature(binwalk.module.Module):
''' '''
Called automatically by self.result. Called automatically by self.result.
''' '''
if not r.description: if not self.show_invalid:
r.valid = False if not r.description:
r.valid = False
if r.size and (r.size + r.offset) > r.file.size: if r.size and (r.size + r.offset) > r.file.size:
r.valid = False r.valid = False
if r.jump and (r.jump + r.offset) > r.file.size: if r.jump and (r.jump + r.offset) > r.file.size:
r.valid = False r.valid = False
def scan_file(self, fp): def scan_file(self, fp):
while True: while True:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment