Finished signature and fuzzy hash modules. Added --swap option. Fixed argv parsing bugs.

7a9b037a · devttys0 · 3969222a · 7a9b037a · 7a9b037a · 7a9b037a
Commit 7a9b037a authored Dec 19, 2013 by devttys0
7 changed files
--- a/src/binwalk/__init__.py
+++ b/src/binwalk/__init__.py
-
+from module import Modules
--- a/src/binwalk/common.py
+++ b/src/binwalk/common.py
@@ -220,7 +220,7 @@ class BlockFile(io.FileIO):
 	# limit disk I/O, but small enough to limit the size of processed data blocks.
 	READ_BLOCK_SIZE = 1 * 1024 * 1024

-	def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE):
+	def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE, trail=MAX_TRAILING_SIZE, swap=0):
 		'''
 		Class constructor.

@@ -228,10 +228,14 @@ class BlockFile(io.FileIO):
 		@mode   - Mode to open the file in (default: 'r').
 		@length - Maximum number of bytes to read from the file via self.block_read().
 		@offset - Offset at which to start reading from the file.
+		@block  - Size of data block to read (excluding any trailing size),
+		@trail  - Size of trailing data to append to the end of each block.
+		@swap   - Swap every n bytes of data.

 		Returns None.
 		'''
 		self.total_read = 0
+		self.swap_size = swap

 		# Python 2.6 doesn't like modes like 'rb' or 'wb'
 		mode = mode.replace('b', '')
@@ -267,6 +271,9 @@ class BlockFile(io.FileIO):

 		if block > 0:
 			self.READ_BLOCK_SIZE = block
+	
+		if trail > 0:
+			self.MAX_TRAILING_SIZE = trail

 		io.FileIO.__init__(self, fname, mode)

@@ -278,6 +285,27 @@ class BlockFile(io.FileIO):

 		self.seek(self.offset)

+	def _swap_data_block(self, block):
+		'''
+		Reverses every self.swap_size bytes inside the specified data block.
+		Size of data block must be a multiple of self.swap_size.
+
+		@block - The data block to swap.
+
+		Returns a swapped string.
+		'''
+		i = 0
+		data = ""
+		
+		if self.swap_size > 0:
+			while i < len(block):
+				data += block[i:i+self.swap_size][::-1]
+				i += self.swap_size
+		else:
+			data = block
+
+		return data
+
 	def write(self, data):
 		'''
 		Writes data to the opened file.
@@ -317,7 +345,7 @@ class BlockFile(io.FileIO):
 				break

 		self.total_read += len(data)
-		return bytes2str(data)
+		return self._swap_data_block(bytes2str(data))

 	def _internal_read(self, n=-1):
 		'''

--- a/src/binwalk/module.py
+++ b/src/binwalk/module.py
@@ -13,22 +13,21 @@ class ModuleOption(object):
 	A container class that allows modules to declare command line options.
 	'''

-	def __init__(self, kwargs={}, nargs=0, priority=0, description="", short="", long="", type=str, dtype=""):
+	def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=""):
 		'''
 		Class constructor.

 		@kwargs      - A dictionary of kwarg key-value pairs affected by this command line option.
-		@nargs       - The number of arguments this option accepts (only 1 or 0 is currently supported).
 		@priority    - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options.
 		@description - A description to be displayed in the help output.
 		@short       - The short option to use (optional).
 		@long        - The long option to use (if None, this option will not be displayed in help output).
 		@type        - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.common.BlockFile, list, str, int, float).
+		@dtype       - The displayed accepted type string, to be shown in help output.

 		Returns None.
 		'''
 		self.kwargs = kwargs
-		self.nargs = nargs
 		self.priority = priority
 		self.description = description
 		self.short = short
@@ -397,7 +396,7 @@ class Modules(object):
 					if module_option.long:
 						long_opt = '--' + module_option.long
 					
-						if module_option.nargs > 0:
+						if module_option.type is not None:
 							optargs = "=<%s>" % module_option.dtype
 						else:
 							optargs = ""
@@ -464,9 +463,6 @@ class Modules(object):
 	
 		return kwargs

-	def _is_file(self, fname):
-		return (not fname.startswith('-')) and (os.path.exists(fname) or fname.startswith('./') or fname.startswith('/'))
-
 	def argv(self, module, argv=sys.argv[1:]):
 		'''
 		Processes argv for any options specific to the specified module.
@@ -482,15 +478,17 @@ class Modules(object):
 		shorts = ""
 		parser = argparse.ArgumentParser(add_help=False)

-		# TODO: Add all arguments for all modules to parser so that the only unknowns will be file names.
-		#       Only return arguments for the specified module though.
-		if hasattr(module, "CLI"):
+		# Must build arguments from all modules so that:
+		#
+		#	1) Any conflicting arguments will raise an exception
+		#	2) The only unknown arguments will be the target files, making them easy to identify
+		for m in self.list(attribute="CLI"):

-			for module_option in module.CLI:
+			for module_option in m.CLI:
 				if not module_option.long:
 					continue

-				if module_option.nargs == 0:
+				if module_option.type is None:
 					action = 'store_true'
 				else:
 					action = None
@@ -500,47 +498,44 @@ class Modules(object):
 				else:
 					parser.add_argument('--' + module_option.long, action=action, dest=module_option.long)

-			args, unknown = parser.parse_known_args(argv)
-			args = args.__dict__
-
-			for module_option in module.CLI:
-
-				if module_option.type == binwalk.common.BlockFile:
-
-					for k in get_keys(module_option.kwargs):
-						kwargs[k] = []
-						for unk in unknown:
-							if self._is_file(unk):
-								kwargs[k].append(unk)
-
-				elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]:
-
-					i = 0
-					for (name, value) in iterator(module_option.kwargs):
-						if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
-							if module_option.nargs > i:
-								value = args[module_option.long]
-								i += 1
-
-							last_priority[name] = module_option.priority
-
-							# Do this manually as argparse doesn't seem to be able to handle hexadecimal values
-							if module_option.type == int:
-								kwargs[name] = int(value, 0)
-							elif module_option.type == float:
-								kwargs[name] = float(value)
-							elif module_option.type == dict:
-								if not has_key(kwargs, name):
-									kwargs[name] = {}
-								kwargs[name][len(kwargs[name])] = value
-							elif module_option.type == list:
-								if not has_key(kwargs, name):
-									kwargs[name] = []
-								kwargs[name].append(value)
-							else:
-								kwargs[name] = value
-		else:
-			raise Exception("binwalk.module.Modules.argv: %s has no attribute 'CLI'" % str(module))
+		args, unknown = parser.parse_known_args(argv)
+		args = args.__dict__
+
+		# Only add parsed options pertinent to the requested module
+		for module_option in module.CLI:
+
+			if module_option.type == binwalk.common.BlockFile:
+
+				for k in get_keys(module_option.kwargs):
+					kwargs[k] = []
+					for unk in unknown:
+						kwargs[k].append(unk)
+
+			elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]:
+
+				for (name, value) in iterator(module_option.kwargs):
+					if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
+
+						if module_option.type is not None:
+							value = args[module_option.long]
+
+						last_priority[name] = module_option.priority
+
+						# Do this manually as argparse doesn't seem to be able to handle hexadecimal values
+						if module_option.type == int:
+							kwargs[name] = int(value, 0)
+						elif module_option.type == float:
+							kwargs[name] = float(value)
+						elif module_option.type == dict:
+							if not has_key(kwargs, name):
+								kwargs[name] = {}
+							kwargs[name][len(kwargs[name])] = value
+						elif module_option.type == list:
+							if not has_key(kwargs, name):
+								kwargs[name] = []
+							kwargs[name].append(value)
+						else:
+							kwargs[name] = value

 		if not has_key(kwargs, 'enabled'):
 			kwargs['enabled'] = False

--- a/src/binwalk/modules/binvis.py
+++ b/src/binwalk/modules/binvis.py
@@ -27,7 +27,6 @@ class Plotter(binwalk.module.Module):
 										long='max-points',
 										type=int,
 										kwargs={'max_points' : 0},
-										nargs=1,
 										description='Set the maximum number of plotted data points'),
 			binwalk.module.ModuleOption(short='V',
 										long='show-grids',

--- a/src/binwalk/modules/configuration.py
+++ b/src/binwalk/modules/configuration.py
@@ -15,25 +15,26 @@ class Configuration(binwalk.module.Module):
 	CLI = [
 		binwalk.module.ModuleOption(long='length',
 									short='l',
-									nargs=1,
 									type=int,
 									kwargs={'length' : 0},
 									description='Number of bytes to scan'),
 		binwalk.module.ModuleOption(long='offset',
 									short='o',
-									nargs=1,
 									type=int,
 									kwargs={'offset' : 0},
 									description='Start scan at this file offset'),
 		binwalk.module.ModuleOption(long='block',
 									short='K',
-									nargs=1,
 									type=int,
 									kwargs={'block' : 0},
 									description='Set file block size'),
+		binwalk.module.ModuleOption(long='swap',
+									short='g',
+									type=int,
+									kwargs={'swap_size' : 0},
+									description='Reverse every n bytes before scanning'),
 		binwalk.module.ModuleOption(long='log',
 									short='f',
-									nargs=1,
 									type=argparse.FileType,
 									kwargs={'log_file' : None},
 									description='Log results to file'),
@@ -68,6 +69,7 @@ class Configuration(binwalk.module.Module):
 		binwalk.module.ModuleKwarg(name='length', default=0),
 		binwalk.module.ModuleKwarg(name='offset', default=0),
 		binwalk.module.ModuleKwarg(name='block', default=0),
+		binwalk.module.ModuleKwarg(name='swap_size', default=0),
 		binwalk.module.ModuleKwarg(name='log_file', default=None),
 		binwalk.module.ModuleKwarg(name='csv', default=False),
 		binwalk.module.ModuleKwarg(name='format_to_terminal', default=False),
@@ -130,7 +132,7 @@ class Configuration(binwalk.module.Module):
 			if not os.path.isdir(tfile):
 				# Make sure we can open the target files
 				try:
-					fp = binwalk.common.BlockFile(tfile, length=self.length, offset=self.offset)
+					fp = binwalk.common.BlockFile(tfile, length=self.length, offset=self.offset, swap=self.swap_size)
 					self.target_files.append(fp)
 				except KeyboardInterrupt as e:
 					raise e

--- a/src/binwalk/modules/hashmatch.py
+++ b/src/binwalk/modules/hashmatch.py
@@ -33,9 +33,8 @@ class HashMatch(binwalk.module.Module):
 									long='fuzzy',
 									kwargs={'enabled' : True},
 									description='Perform fuzzy hash matching on files/directories'),
-		binwalk.module.ModuleOption(short='t',
+		binwalk.module.ModuleOption(short='u',
 									long='cutoff',
-									nargs=1,
 									priority=100,
 									type=int,
 									kwargs={'cutoff' : DEFAULT_CUTOFF},
@@ -48,10 +47,18 @@ class HashMatch(binwalk.module.Module):
 									long='same',
 									kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF},
 									description='Only show files that are the same'),
-		binwalk.module.ModuleOption(short='',
+		binwalk.module.ModuleOption(short='p',
 									long='diff',
 									kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF},
 									description='Only show files that are different'),
+		binwalk.module.ModuleOption(short='n',
+									long='name',
+									kwargs={'filter_by_name' : True},
+									description='Only compare files whose base names are the same'),
+		binwalk.module.ModuleOption(short='L',
+									long='symlinks',
+									kwargs={'symlinks' : True},
+									description="Don't ignore symlinks"),
 	]

 	KWARGS = [
@@ -64,6 +71,8 @@ class HashMatch(binwalk.module.Module):
 		binwalk.module.ModuleKwarg(name='abspath', default=False),
 		binwalk.module.ModuleKwarg(name='matches', default={}),
 		binwalk.module.ModuleKwarg(name='types', default={}),
+		binwalk.module.ModuleKwarg(name='filter_by_name', default=False),
+		binwalk.module.ModuleKwarg(name='symlinks', default=False),
 	]

 	# Requires libfuzzy.so
@@ -130,7 +139,7 @@ class HashMatch(binwalk.module.Module):
 		file1_dup = False
 		file2_dup = False

-		if not self.name or os.path.basename(file1) == os.path.basename(file2):
+		if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2):
 			if os.path.exists(file1) and os.path.exists(file2):

 				hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)

--- a/src/binwalk/modules/signature.py
+++ b/src/binwalk/modules/signature.py
@@ -16,23 +16,46 @@ class Signature(binwalk.module.Module):
 										description='Scan target file(s) for file signatures'),
 			binwalk.module.ModuleOption(short='m',
 										long='magic',
-										nargs=1,
 										kwargs={'magic_files' : []},
-										type=[],
+										type=list,
 										dtype='file',
 										description='Specify a custom magic file to use'),
 			binwalk.module.ModuleOption(short='R',
 										long='raw-bytes',
-										nargs=1,
 										kwargs={'raw_bytes' : None},
 										type=str,
 										description='Specify a sequence of bytes to search for'),
+			binwalk.module.ModuleOption(short='b',
+										long='dumb',
+										kwargs={'dumb_scan' : True},
+										description='Disable smart signature keywords'),
+			binwalk.module.ModuleOption(short='I',
+										long='show-invalid',
+										kwargs={'show_invalid' : True},
+										description='Show results marked as invalid'),
+			binwalk.module.ModuleOption(short='x',
+										long='exclude',
+										kwargs={'exclude_filters' : []},
+										type=list,
+										dtype=str.__name__,
+										description='Exclude results that match <str>'),
+			binwalk.module.ModuleOption(short='y',
+										long='include',
+										kwargs={'include_filters' : []},
+										type=list,
+										dtype=str.__name__,
+										description='Only show results that match <str>'),
+
 	]

 	KWARGS = [
 			binwalk.module.ModuleKwarg(name='enabled', default=False),
-			binwalk.module.ModuleKwarg(name='magic_files', default=[]),
+			binwalk.module.ModuleKwarg(name='dumb_scan', default=False),
+			binwalk.module.ModuleKwarg(name='show_invalid', default=False),
 			binwalk.module.ModuleKwarg(name='raw_bytes', default=None),
+			binwalk.module.ModuleKwarg(name='magic_files', default=[]),
+			binwalk.module.ModuleKwarg(name='exclude_filters', default=[]),
+			binwalk.module.ModuleKwarg(name='include_filters', default=[]),
 	]

 	HEADER = ["DECIMAL", "HEX", "DESCRIPTION"]
@@ -45,9 +68,15 @@ class Signature(binwalk.module.Module):
 	def init(self):
 		# Create SmartSignature and MagicParser class instances. These are mostly for internal use.
 		self.filter = binwalk.filter.MagicFilter()
-		self.smart = binwalk.smartsignature.SmartSignature(self.filter, ignore_smart_signatures=False)
+		self.smart = binwalk.smartsignature.SmartSignature(self.filter, ignore_smart_signatures=self.dumb_scan)
 		self.parser = binwalk.parser.MagicParser(self.filter, self.smart)

+		# Set any specified include/exclude filters
+		for regex in self.exclude_filters:
+			self.filter.exclude(regex)
+		for regex in self.include_filters:
+			self.filter.include(regex)
+
 		# If a raw byte sequence was specified, build a magic file from that instead of using the default magic files
 		if self.raw_bytes is not None:
 			self.magic_files = [self.parser.file_from_string(self.raw_bytes)]
@@ -72,14 +101,15 @@ class Signature(binwalk.module.Module):
 		'''
 		Called automatically by self.result.
 		'''
-		if not r.description:
-			r.valid = False
+		if not self.show_invalid:
+			if not r.description:
+				r.valid = False

-		if r.size and (r.size + r.offset) > r.file.size:
-			r.valid = False
+			if r.size and (r.size + r.offset) > r.file.size:
+				r.valid = False

-		if r.jump and (r.jump + r.offset) > r.file.size:
-			r.valid = False
+			if r.jump and (r.jump + r.offset) > r.file.size:
+				r.valid = False

 	def scan_file(self, fp):
 		while True: