From a5217d6217bcb2cc3f73e592be639d3a725502e4 Mon Sep 17 00:00:00 2001 From: devttys0 <heffnercj@gmail.com> Date: Sun, 12 Jan 2014 14:11:21 -0500 Subject: [PATCH] s/\t/ /g --- src/binwalk/core/C.py | 222 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------- src/binwalk/core/common.py | 706 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/core/compat.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------- src/binwalk/core/display.py | 334 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/core/filter.py | 386 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/core/module.py | 1484 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/core/parser.py | 688 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/core/plugin.py | 352 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/core/smart.py | 608 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/magic/bincast | 10 ++++++++++ src/binwalk/modules/binvis.py | 592 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/modules/compression.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------ src/binwalk/modules/entropy.py | 376 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/modules/extractor.py | 1112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/modules/general.py | 348 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ src/binwalk/modules/hashmatch.py | 610 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/modules/heuristics.py | 330 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/plugins/compressd.py | 52 ++++++++++++++++++++++++++-------------------------- src/binwalk/plugins/cpio.py | 54 +++++++++++++++++++++++++++--------------------------- src/binwalk/plugins/lzmamod.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------- src/binwalk/plugins/zlibvalid.py | 70 +++++++++++++++++++++++++++++++++++----------------------------------- 21 files changed, 4331 insertions(+), 4321 deletions(-) create mode 100644 src/binwalk/magic/bincast diff --git a/src/binwalk/core/C.py b/src/binwalk/core/C.py index 6df8779..3265519 100644 --- a/src/binwalk/core/C.py +++ b/src/binwalk/core/C.py @@ -6,126 +6,126 @@ import ctypes.util from binwalk.core.compat import * class Function(object): - ''' - Container class for defining library functions. - ''' - def __init__(self, **kwargs): - self.name = None - self.type = int + ''' + Container class for defining library functions. + ''' + def __init__(self, **kwargs): + self.name = None + self.type = int - for (k, v) in iterator(kwargs): - setattr(self, k, v) + for (k, v) in iterator(kwargs): + setattr(self, k, v) class FunctionHandler(object): - ''' - Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues. - ''' - PY2CTYPES = { - bytes : ctypes.c_char_p, - str : ctypes.c_char_p, - int : ctypes.c_int, - float : ctypes.c_float, - bool : ctypes.c_int, - None : ctypes.c_int, - } - - RETVAL_CONVERTERS = { - None : int, - int : int, - float : float, - bool : bool, - str : bytes2str, - bytes : str2bytes, - } - - def __init__(self, library, function): - ''' - Class constructor. - - @library - Library handle as returned by ctypes.cdll.LoadLibrary. - @function - An instance of the binwalk.core.C.Function class. - - Returns None. - ''' - self.retype = function.type - self.function = getattr(library, function.name) - - if has_key(self.PY2CTYPES, self.retype): - self.function.restype = self.PY2CTYPES[self.retype] - self.retval_converter = self.RETVAL_CONVERTERS[self.retype] - else: - raise Exception("Unknown return type: '%s'" % self.retype) - - def run(self, *args): - ''' - Executes the library function, handling Python 2/3 compatibility and properly converting the return type. - - @*args - Library function arguments. - - Returns the return value from the libraray function. - ''' - args = list(args) - - # Python3 expects a bytes object for char *'s, not a str. - # This allows us to pass either, regardless of the Python version. - for i in range(0, len(args)): - if isinstance(args[i], str): - args[i] = str2bytes(args[i]) - - return self.retval_converter(self.function(*args)) - + ''' + Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues. + ''' + PY2CTYPES = { + bytes : ctypes.c_char_p, + str : ctypes.c_char_p, + int : ctypes.c_int, + float : ctypes.c_float, + bool : ctypes.c_int, + None : ctypes.c_int, + } + + RETVAL_CONVERTERS = { + None : int, + int : int, + float : float, + bool : bool, + str : bytes2str, + bytes : str2bytes, + } + + def __init__(self, library, function): + ''' + Class constructor. + + @library - Library handle as returned by ctypes.cdll.LoadLibrary. + @function - An instance of the binwalk.core.C.Function class. + + Returns None. + ''' + self.retype = function.type + self.function = getattr(library, function.name) + + if has_key(self.PY2CTYPES, self.retype): + self.function.restype = self.PY2CTYPES[self.retype] + self.retval_converter = self.RETVAL_CONVERTERS[self.retype] + else: + raise Exception("Unknown return type: '%s'" % self.retype) + + def run(self, *args): + ''' + Executes the library function, handling Python 2/3 compatibility and properly converting the return type. + + @*args - Library function arguments. + + Returns the return value from the libraray function. + ''' + args = list(args) + + # Python3 expects a bytes object for char *'s, not a str. + # This allows us to pass either, regardless of the Python version. + for i in range(0, len(args)): + if isinstance(args[i], str): + args[i] = str2bytes(args[i]) + + return self.retval_converter(self.function(*args)) + class Library(object): - ''' - Class for loading the specified library via ctypes. - ''' + ''' + Class for loading the specified library via ctypes. + ''' - def __init__(self, library, functions): - ''' - Class constructor. + def __init__(self, library, functions): + ''' + Class constructor. - @library - Library name (e.g., 'magic' for libmagic). - @functions - A dictionary of function names and their return types (e.g., {'magic_buffer' : str}) + @library - Library name (e.g., 'magic' for libmagic). + @functions - A dictionary of function names and their return types (e.g., {'magic_buffer' : str}) - Returns None. - ''' - self.library = ctypes.cdll.LoadLibrary(self.find_library(library)) - if not self.library: - raise Exception("Failed to load library '%s'" % library) + Returns None. + ''' + self.library = ctypes.cdll.LoadLibrary(self.find_library(library)) + if not self.library: + raise Exception("Failed to load library '%s'" % library) - for function in functions: - f = FunctionHandler(self.library, function) - setattr(self, function.name, f.run) + for function in functions: + f = FunctionHandler(self.library, function) + setattr(self, function.name, f.run) - def find_library(self, library): - ''' - Locates the specified library. + def find_library(self, library): + ''' + Locates the specified library. - @library - Library name (e.g., 'magic' for libmagic). + @library - Library name (e.g., 'magic' for libmagic). - Returns a string to be passed to ctypes.cdll.LoadLibrary. - ''' - lib_path = None - system_paths = { - 'linux' : ['/usr/local/lib/lib%s.so' % library], - 'linux2' : ['/usr/local/lib/lib%s.so' % library], - 'linux3' : ['/usr/local/lib/lib%s.so' % library], - 'darwin' : ['/opt/local/lib/lib%s.dylib' % library, - '/usr/local/lib/lib%s.dylib' % library, - ] + glob.glob('/usr/local/Cellar/lib%s/*/lib/lib%s.dylib' % (library, library)), - - 'win32' : ['%s.dll' % library] - } - - lib_path = ctypes.util.find_library(library) - - if not lib_path: - for path in system_paths[sys.platform]: - if os.path.exists(path): - lib_path = path - break - - if not lib_path: - raise Exception("Failed to locate library '%s'" % library) - - return lib_path + Returns a string to be passed to ctypes.cdll.LoadLibrary. + ''' + lib_path = None + system_paths = { + 'linux' : ['/usr/local/lib/lib%s.so' % library], + 'linux2' : ['/usr/local/lib/lib%s.so' % library], + 'linux3' : ['/usr/local/lib/lib%s.so' % library], + 'darwin' : ['/opt/local/lib/lib%s.dylib' % library, + '/usr/local/lib/lib%s.dylib' % library, + ] + glob.glob('/usr/local/Cellar/lib%s/*/lib/lib%s.dylib' % (library, library)), + + 'win32' : ['%s.dll' % library] + } + + lib_path = ctypes.util.find_library(library) + + if not lib_path: + for path in system_paths[sys.platform]: + if os.path.exists(path): + lib_path = path + break + + if not lib_path: + raise Exception("Failed to locate library '%s'" % library) + + return lib_path diff --git a/src/binwalk/core/common.py b/src/binwalk/core/common.py index da683ed..dfae8d0 100644 --- a/src/binwalk/core/common.py +++ b/src/binwalk/core/common.py @@ -9,383 +9,383 @@ from binwalk.core.compat import * # This allows other modules/scripts to subclass BlockFile from a custom class. Defaults to io.FileIO. if has_key(__builtins__, 'BLOCK_FILE_PARENT_CLASS'): - BLOCK_FILE_PARENT_CLASS = __builtins__['BLOCK_FILE_PARENT_CLASS'] + BLOCK_FILE_PARENT_CLASS = __builtins__['BLOCK_FILE_PARENT_CLASS'] else: - BLOCK_FILE_PARENT_CLASS = io.FileIO + BLOCK_FILE_PARENT_CLASS = io.FileIO def file_md5(file_name): - ''' - Generate an MD5 hash of the specified file. - - @file_name - The file to hash. + ''' + Generate an MD5 hash of the specified file. + + @file_name - The file to hash. - Returns an MD5 hex digest string. - ''' - md5 = hashlib.md5() + Returns an MD5 hex digest string. + ''' + md5 = hashlib.md5() - with open(file_name, 'rb') as f: - for chunk in iter(lambda: f.read(128*md5.block_size), b''): - md5.update(chunk) + with open(file_name, 'rb') as f: + for chunk in iter(lambda: f.read(128*md5.block_size), b''): + md5.update(chunk) - return md5.hexdigest() + return md5.hexdigest() def file_size(filename): - ''' - Obtains the size of a given file. - - @filename - Path to the file. - - Returns the size of the file. - ''' - # Using open/lseek works on both regular files and block devices - fd = os.open(filename, os.O_RDONLY) - try: - return os.lseek(fd, 0, os.SEEK_END) - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e))) - finally: - os.close(fd) + ''' + Obtains the size of a given file. + + @filename - Path to the file. + + Returns the size of the file. + ''' + # Using open/lseek works on both regular files and block devices + fd = os.open(filename, os.O_RDONLY) + try: + return os.lseek(fd, 0, os.SEEK_END) + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e))) + finally: + os.close(fd) def strip_quoted_strings(string): - ''' - Strips out data in between double quotes. - - @string - String to strip. - - Returns a sanitized string. - ''' - # This regex removes all quoted data from string. - # Note that this removes everything in between the first and last double quote. - # This is intentional, as printed (and quoted) strings from a target file may contain - # double quotes, and this function should ignore those. However, it also means that any - # data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped. - return re.sub(r'\"(.*)\"', "", string) + ''' + Strips out data in between double quotes. + + @string - String to strip. + + Returns a sanitized string. + ''' + # This regex removes all quoted data from string. + # Note that this removes everything in between the first and last double quote. + # This is intentional, as printed (and quoted) strings from a target file may contain + # double quotes, and this function should ignore those. However, it also means that any + # data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped. + return re.sub(r'\"(.*)\"', "", string) def get_quoted_strings(string): - ''' - Returns a string comprised of all data in between double quotes. - - @string - String to get quoted data from. - - Returns a string of quoted data on success. - Returns a blank string if no quoted data is present. - ''' - try: - # This regex grabs all quoted data from string. - # Note that this gets everything in between the first and last double quote. - # This is intentional, as printed (and quoted) strings from a target file may contain - # double quotes, and this function should ignore those. However, it also means that any - # data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included. - return re.findall(r'\"(.*)\"', string)[0] - except KeyboardInterrupt as e: - raise e - except Exception: - return '' + ''' + Returns a string comprised of all data in between double quotes. + + @string - String to get quoted data from. + + Returns a string of quoted data on success. + Returns a blank string if no quoted data is present. + ''' + try: + # This regex grabs all quoted data from string. + # Note that this gets everything in between the first and last double quote. + # This is intentional, as printed (and quoted) strings from a target file may contain + # double quotes, and this function should ignore those. However, it also means that any + # data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included. + return re.findall(r'\"(.*)\"', string)[0] + except KeyboardInterrupt as e: + raise e + except Exception: + return '' def unique_file_name(base_name, extension=''): - ''' - Creates a unique file name based on the specified base name. + ''' + Creates a unique file name based on the specified base name. - @base_name - The base name to use for the unique file name. - @extension - The file extension to use for the unique file name. + @base_name - The base name to use for the unique file name. + @extension - The file extension to use for the unique file name. - Returns a unique file string. - ''' - idcount = 0 - - if extension and not extension.startswith('.'): - extension = '.%s' % extension + Returns a unique file string. + ''' + idcount = 0 + + if extension and not extension.startswith('.'): + extension = '.%s' % extension - fname = base_name + extension + fname = base_name + extension - while os.path.exists(fname): - fname = "%s-%d%s" % (base_name, idcount, extension) - idcount += 1 + while os.path.exists(fname): + fname = "%s-%d%s" % (base_name, idcount, extension) + idcount += 1 - return fname + return fname def strings(filename, minimum=4): - ''' - A strings generator, similar to the Unix strings utility. - - @filename - The file to search for strings in. - @minimum - The minimum string length to search for. - - Yeilds printable ASCII strings from filename. - ''' - result = "" - - with BlockFile(filename) as f: - while True: - (data, dlen) = f.read_block() - if not data: - break - - for c in data: - if c in string.printable: - result += c - continue - elif len(result) >= minimum: - yield result - result = "" - else: - result = "" + ''' + A strings generator, similar to the Unix strings utility. + + @filename - The file to search for strings in. + @minimum - The minimum string length to search for. + + Yeilds printable ASCII strings from filename. + ''' + result = "" + + with BlockFile(filename) as f: + while True: + (data, dlen) = f.read_block() + if not data: + break + + for c in data: + if c in string.printable: + result += c + continue + elif len(result) >= minimum: + yield result + result = "" + else: + result = "" class MathExpression(object): - ''' - Class for safely evaluating mathematical expressions from a string. - Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string - ''' - - OPERATORS = { - ast.Add: op.add, - ast.Sub: op.sub, - ast.Mult: op.mul, - ast.Div: op.truediv, - ast.Pow: op.pow, - ast.BitXor: op.xor - } - - def __init__(self, expression): - self.expression = expression - self.value = None - - if expression: - try: - self.value = self.evaluate(self.expression) - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - def evaluate(self, expr): - return self._eval(ast.parse(expr).body[0].value) - - def _eval(self, node): - if isinstance(node, ast.Num): # <number> - return node.n - elif isinstance(node, ast.operator): # <operator> - return self.OPERATORS[type(node)] - elif isinstance(node, ast.BinOp): # <left> <operator> <right> - return self._eval(node.op)(self._eval(node.left), self._eval(node.right)) - else: - raise TypeError(node) + ''' + Class for safely evaluating mathematical expressions from a string. + Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string + ''' + + OPERATORS = { + ast.Add: op.add, + ast.Sub: op.sub, + ast.Mult: op.mul, + ast.Div: op.truediv, + ast.Pow: op.pow, + ast.BitXor: op.xor + } + + def __init__(self, expression): + self.expression = expression + self.value = None + + if expression: + try: + self.value = self.evaluate(self.expression) + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + def evaluate(self, expr): + return self._eval(ast.parse(expr).body[0].value) + + def _eval(self, node): + if isinstance(node, ast.Num): # <number> + return node.n + elif isinstance(node, ast.operator): # <operator> + return self.OPERATORS[type(node)] + elif isinstance(node, ast.BinOp): # <left> <operator> <right> + return self._eval(node.op)(self._eval(node.left), self._eval(node.right)) + else: + raise TypeError(node) class BlockFile(BLOCK_FILE_PARENT_CLASS): - ''' - Abstraction class for accessing binary files. - - This class overrides io.FilIO's read and write methods. This guaruntees two things: - - 1. All requested data will be read/written via the read and write methods. - 2. All reads return a str object and all writes can accept either a str or a - bytes object, regardless of the Python interpreter version. - - However, the downside is that other io.FileIO methods won't work properly in Python 3, - namely things that are wrappers around self.read (e.g., readline, readlines, etc). - - This class also provides a read_block method, which is used by binwalk to read in a - block of data, plus some additional data (MAX_TRAILING_SIZE), but on the next block read - pick up at the end of the previous data block (not the end of the additional data). This - is necessary for scans where a signature may span a block boundary. - - The descision to force read to return a str object instead of a bytes object is questionable - for Python 3, it seemed the best way to abstract differences in Python 2/3 from the rest - of the code (especially for people writing plugins) and to add Python 3 support with - minimal code change. - ''' - - # The MAX_TRAILING_SIZE limits the amount of data available to a signature. - # While most headers/signatures are far less than this value, some may reference - # pointers in the header structure which may point well beyond the header itself. - # Passing the entire remaining buffer to libmagic is resource intensive and will - # significantly slow the scan; this value represents a reasonable buffer size to - # pass to libmagic which will not drastically affect scan time. - DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024 - - # Max number of bytes to process at one time. This needs to be large enough to - # limit disk I/O, but small enough to limit the size of processed data blocks. - DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024 - - def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0): - ''' - Class constructor. - - @fname - Path to the file to be opened. - @mode - Mode to open the file in (default: 'r'). - @length - Maximum number of bytes to read from the file via self.block_read(). - @offset - Offset at which to start reading from the file. - @block - Size of data block to read (excluding any trailing size), - @peek - Size of trailing data to append to the end of each block. - @swap - Swap every n bytes of data. - - Returns None. - ''' - self.total_read = 0 - self.swap_size = swap - self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE - self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE - - # Python 2.6 doesn't like modes like 'rb' or 'wb' - mode = mode.replace('b', '') - - try: - self.size = file_size(fname) - except KeyboardInterrupt as e: - raise e - except Exception: - self.size = 0 - - if offset < 0: - self.offset = self.size + offset - else: - self.offset = offset - - if self.offset < 0: - self.offset = 0 - elif self.offset > self.size: - self.offset = self.size - - if offset < 0: - self.length = offset * -1 - elif length: - self.length = length - else: - self.length = self.size - offset - - if self.length < 0: - self.length = 0 - elif self.length > self.size: - self.length = self.size - - if block is not None: - self.block_read_size = block - self.base_block_size = self.block_read_size - - if peek is not None: - self.block_peek_size = peek - self.base_peek_size = self.block_peek_size - - super(self.__class__, self).__init__(fname, mode) - - # Work around for python 2.6 where FileIO._name is not defined - try: - self.name - except AttributeError: - self._name = fname - - self.seek(self.offset) - - def _swap_data_block(self, block): - ''' - Reverses every self.swap_size bytes inside the specified data block. - Size of data block must be a multiple of self.swap_size. - - @block - The data block to swap. - - Returns a swapped string. - ''' - i = 0 - data = "" - - if self.swap_size > 0: - while i < len(block): - data += block[i:i+self.swap_size][::-1] - i += self.swap_size - else: - data = block - - return data - - def reset(self): - self.set_block_size(block=self.base_block_size, peek=self.base_peek_size) - self.seek(self.offset) - - def set_block_size(self, block=None, peek=None): - if block is not None: - self.block_read_size = block - if peek is not None: - self.block_peek_size = peek - - def write(self, data): - ''' - Writes data to the opened file. - - io.FileIO.write does not guaruntee that all data will be written; - this method overrides io.FileIO.write and does guaruntee that all data will be written. - - Returns the number of bytes written. - ''' - n = 0 - l = len(data) - data = str2bytes(data) - - while n < l: - n += super(self.__class__, self).write(data[n:]) - - return n - - def read(self, n=-1): - '''' - Reads up to n bytes of data (or to EOF if n is not specified). - Will not read more than self.length bytes. - - io.FileIO.read does not guaruntee that all requested data will be read; - this method overrides io.FileIO.read and does guaruntee that all data will be read. - - Returns a str object containing the read data. - ''' - l = 0 - data = b'' - - if self.total_read < self.length: - # Don't read more than self.length bytes from the file - if (self.total_read + n) > self.length: - n = self.length - self.total_read - - while n < 0 or l < n: - tmp = super(self.__class__, self).read(n-l) - if tmp: - data += tmp - l += len(tmp) - else: - break - - self.total_read += len(data) - - return self._swap_data_block(bytes2str(data)) - - def peek(self, n=-1): - ''' - Peeks at data in file. - ''' - pos = self.tell() - data = self.read(n) - self.seek(pos) - return data - - def seek(self, n, whence=os.SEEK_SET): - if whence == os.SEEK_SET: - self.total_read = n - self.offset - elif whence == os.SEEK_CUR: - self.total_read += n - elif whence == os.SEEK_END: - self.total_read = self.size + n - - super(self.__class__, self).seek(n, whence) - - def read_block(self): - ''' - Reads in a block of data from the target file. - - Returns a tuple of (str(file block data), block data length). - ''' - data = self.read(self.block_read_size) - dlen = len(data) - data += self.peek(self.block_peek_size) - - return (data, dlen) + ''' + Abstraction class for accessing binary files. + + This class overrides io.FilIO's read and write methods. This guaruntees two things: + + 1. All requested data will be read/written via the read and write methods. + 2. All reads return a str object and all writes can accept either a str or a + bytes object, regardless of the Python interpreter version. + + However, the downside is that other io.FileIO methods won't work properly in Python 3, + namely things that are wrappers around self.read (e.g., readline, readlines, etc). + + This class also provides a read_block method, which is used by binwalk to read in a + block of data, plus some additional data (MAX_TRAILING_SIZE), but on the next block read + pick up at the end of the previous data block (not the end of the additional data). This + is necessary for scans where a signature may span a block boundary. + + The descision to force read to return a str object instead of a bytes object is questionable + for Python 3, it seemed the best way to abstract differences in Python 2/3 from the rest + of the code (especially for people writing plugins) and to add Python 3 support with + minimal code change. + ''' + + # The MAX_TRAILING_SIZE limits the amount of data available to a signature. + # While most headers/signatures are far less than this value, some may reference + # pointers in the header structure which may point well beyond the header itself. + # Passing the entire remaining buffer to libmagic is resource intensive and will + # significantly slow the scan; this value represents a reasonable buffer size to + # pass to libmagic which will not drastically affect scan time. + DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024 + + # Max number of bytes to process at one time. This needs to be large enough to + # limit disk I/O, but small enough to limit the size of processed data blocks. + DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024 + + def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0): + ''' + Class constructor. + + @fname - Path to the file to be opened. + @mode - Mode to open the file in (default: 'r'). + @length - Maximum number of bytes to read from the file via self.block_read(). + @offset - Offset at which to start reading from the file. + @block - Size of data block to read (excluding any trailing size), + @peek - Size of trailing data to append to the end of each block. + @swap - Swap every n bytes of data. + + Returns None. + ''' + self.total_read = 0 + self.swap_size = swap + self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE + self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE + + # Python 2.6 doesn't like modes like 'rb' or 'wb' + mode = mode.replace('b', '') + + try: + self.size = file_size(fname) + except KeyboardInterrupt as e: + raise e + except Exception: + self.size = 0 + + if offset < 0: + self.offset = self.size + offset + else: + self.offset = offset + + if self.offset < 0: + self.offset = 0 + elif self.offset > self.size: + self.offset = self.size + + if offset < 0: + self.length = offset * -1 + elif length: + self.length = length + else: + self.length = self.size - offset + + if self.length < 0: + self.length = 0 + elif self.length > self.size: + self.length = self.size + + if block is not None: + self.block_read_size = block + self.base_block_size = self.block_read_size + + if peek is not None: + self.block_peek_size = peek + self.base_peek_size = self.block_peek_size + + super(self.__class__, self).__init__(fname, mode) + + # Work around for python 2.6 where FileIO._name is not defined + try: + self.name + except AttributeError: + self._name = fname + + self.seek(self.offset) + + def _swap_data_block(self, block): + ''' + Reverses every self.swap_size bytes inside the specified data block. + Size of data block must be a multiple of self.swap_size. + + @block - The data block to swap. + + Returns a swapped string. + ''' + i = 0 + data = "" + + if self.swap_size > 0: + while i < len(block): + data += block[i:i+self.swap_size][::-1] + i += self.swap_size + else: + data = block + + return data + + def reset(self): + self.set_block_size(block=self.base_block_size, peek=self.base_peek_size) + self.seek(self.offset) + + def set_block_size(self, block=None, peek=None): + if block is not None: + self.block_read_size = block + if peek is not None: + self.block_peek_size = peek + + def write(self, data): + ''' + Writes data to the opened file. + + io.FileIO.write does not guaruntee that all data will be written; + this method overrides io.FileIO.write and does guaruntee that all data will be written. + + Returns the number of bytes written. + ''' + n = 0 + l = len(data) + data = str2bytes(data) + + while n < l: + n += super(self.__class__, self).write(data[n:]) + + return n + + def read(self, n=-1): + '''' + Reads up to n bytes of data (or to EOF if n is not specified). + Will not read more than self.length bytes. + + io.FileIO.read does not guaruntee that all requested data will be read; + this method overrides io.FileIO.read and does guaruntee that all data will be read. + + Returns a str object containing the read data. + ''' + l = 0 + data = b'' + + if self.total_read < self.length: + # Don't read more than self.length bytes from the file + if (self.total_read + n) > self.length: + n = self.length - self.total_read + + while n < 0 or l < n: + tmp = super(self.__class__, self).read(n-l) + if tmp: + data += tmp + l += len(tmp) + else: + break + + self.total_read += len(data) + + return self._swap_data_block(bytes2str(data)) + + def peek(self, n=-1): + ''' + Peeks at data in file. + ''' + pos = self.tell() + data = self.read(n) + self.seek(pos) + return data + + def seek(self, n, whence=os.SEEK_SET): + if whence == os.SEEK_SET: + self.total_read = n - self.offset + elif whence == os.SEEK_CUR: + self.total_read += n + elif whence == os.SEEK_END: + self.total_read = self.size + n + + super(self.__class__, self).seek(n, whence) + + def read_block(self): + ''' + Reads in a block of data from the target file. + + Returns a tuple of (str(file block data), block data length). + ''' + data = self.read(self.block_read_size) + dlen = len(data) + data += self.peek(self.block_peek_size) + + return (data, dlen) diff --git a/src/binwalk/core/compat.py b/src/binwalk/core/compat.py index 5359892..cbd1430 100644 --- a/src/binwalk/core/compat.py +++ b/src/binwalk/core/compat.py @@ -7,68 +7,68 @@ import string PY_MAJOR_VERSION = sys.version_info[0] if PY_MAJOR_VERSION > 2: - string.letters = string.ascii_letters + string.letters = string.ascii_letters def iterator(dictionary): - ''' - For cross compatibility between Python 2 and Python 3 dictionaries. - ''' - if PY_MAJOR_VERSION > 2: - return dictionary.items() - else: - return dictionary.iteritems() + ''' + For cross compatibility between Python 2 and Python 3 dictionaries. + ''' + if PY_MAJOR_VERSION > 2: + return dictionary.items() + else: + return dictionary.iteritems() def has_key(dictionary, key): - ''' - For cross compatibility between Python 2 and Python 3 dictionaries. - ''' - if PY_MAJOR_VERSION > 2: - return key in dictionary - else: - return dictionary.has_key(key) + ''' + For cross compatibility between Python 2 and Python 3 dictionaries. + ''' + if PY_MAJOR_VERSION > 2: + return key in dictionary + else: + return dictionary.has_key(key) def get_keys(dictionary): - ''' - For cross compatibility between Python 2 and Python 3 dictionaries. - ''' - if PY_MAJOR_VERSION > 2: - return list(dictionary.keys()) - else: - return dictionary.keys() + ''' + For cross compatibility between Python 2 and Python 3 dictionaries. + ''' + if PY_MAJOR_VERSION > 2: + return list(dictionary.keys()) + else: + return dictionary.keys() def str2bytes(string): - ''' - For cross compatibility between Python 2 and Python 3 strings. - ''' - if isinstance(string, type('')) and PY_MAJOR_VERSION > 2: - return bytes(string, 'latin1') - else: - return string + ''' + For cross compatibility between Python 2 and Python 3 strings. + ''' + if isinstance(string, type('')) and PY_MAJOR_VERSION > 2: + return bytes(string, 'latin1') + else: + return string def bytes2str(bs): - ''' - For cross compatibility between Python 2 and Python 3 strings. - ''' - if isinstance(bs, type(b'')) and PY_MAJOR_VERSION > 2: - return bs.decode('latin1') - else: - return bs + ''' + For cross compatibility between Python 2 and Python 3 strings. + ''' + if isinstance(bs, type(b'')) and PY_MAJOR_VERSION > 2: + return bs.decode('latin1') + else: + return bs def string_decode(string): - ''' - For cross compatibility between Python 2 and Python 3 strings. - ''' - if PY_MAJOR_VERSION > 2: - return bytes(string, 'utf-8').decode('unicode_escape') - else: - return string.decode('string_escape') + ''' + For cross compatibility between Python 2 and Python 3 strings. + ''' + if PY_MAJOR_VERSION > 2: + return bytes(string, 'utf-8').decode('unicode_escape') + else: + return string.decode('string_escape') def user_input(prompt=''): - ''' - For getting raw user input in Python 2 and 3. - ''' - if PY_MAJOR_VERSION > 2: - return input(prompt) - else: - return raw_input(prompt) + ''' + For getting raw user input in Python 2 and 3. + ''' + if PY_MAJOR_VERSION > 2: + return input(prompt) + else: + return raw_input(prompt) diff --git a/src/binwalk/core/display.py b/src/binwalk/core/display.py index b2313c2..b2154f8 100644 --- a/src/binwalk/core/display.py +++ b/src/binwalk/core/display.py @@ -6,171 +6,171 @@ from binwalk.core.compat import * class Display(object): - SCREEN_WIDTH = 0 - HEADER_WIDTH = 150 - DEFAULT_FORMAT = "%s\n" - - def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None): - self.quiet = quiet - self.filter = filter - self.verbose = verbose - self.fit_to_screen = fit_to_screen - self.fp = None - self.csv = None - self.num_columns = 0 - self.custom_verbose_format = "" - self.custom_verbose_args = [] - - self._configure_formatting() - - if log: - self.fp = open(log, "w") - if csv: - self.csv = pycsv.writer(self.fp) - - def format_strings(self, header, result): - self.result_format = result - self.header_format = header - - if self.num_columns == 0: - self.num_columns = len(header.split()) - - def log(self, fmt, columns): - if self.fp: - if self.csv: - self.csv.writerow(columns) - else: - self.fp.write(fmt % tuple(columns)) - - def add_custom_header(self, fmt, args): - self.custom_verbose_format = fmt - self.custom_verbose_args = args - - def header(self, *args, **kwargs): - file_name = None - self.num_columns = len(args) - - if has_key(kwargs, 'file_name'): - file_name = kwargs['file_name'] - - if self.verbose and file_name: - md5sum = binwalk.core.common.file_md5(file_name) - timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - - if self.csv: - self.log("", ["FILE", "MD5SUM", "TIMESTAMP"]) - self.log("", [file_name, md5sum, timestamp]) - - self._fprint("%s", "\n", csv=False) - self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False) - self._fprint("Target File: %s\n", [file_name], csv=False, filter=False) - self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False) - if self.custom_verbose_format and self.custom_verbose_args: - self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False) - - self._fprint("%s", "\n", csv=False, filter=False) - self._fprint(self.header_format, args, filter=False) - self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False) - - def result(self, *args): - # Convert to list for item assignment - args = list(args) - - # Replace multiple spaces with single spaces. This is to prevent accidentally putting - # four spaces in the description string, which would break auto-formatting. - for i in range(len(args)): - if isinstance(args[i], str): - while " " in args[i]: - args[i] = args[i].replace(" " , " ") - - self._fprint(self.result_format, tuple(args)) - - def footer(self): - self._fprint("%s", "\n", csv=False, filter=False) - - def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True): - line = fmt % tuple(columns) - - if not filter or self.filter.valid_result(line): - if not self.quiet and stdout: - sys.stdout.write(self._format_line(line.strip()) + "\n") - - if self.fp and not (self.csv and not csv): - self.log(fmt, columns) - - def _append_to_data_parts(self, data, start, end): - ''' - Intelligently appends data to self.string_parts. - For use by self._format. - ''' - try: - while data[start] == ' ': - start += 1 - - if start == end: - end = len(data[start:]) - - self.string_parts.append(data[start:end]) - except KeyboardInterrupt as e: - raise e - except Exception: - try: - self.string_parts.append(data[start:]) - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - return start - - def _format_line(self, line): - ''' - Formats a line of text to fit in the terminal window. - For Tim. - ''' - offset = 0 - space_offset = 0 - self.string_parts = [] - delim = '\n' - - if self.fit_to_screen and len(line) > self.SCREEN_WIDTH: - line_columns = line.split(None, self.num_columns-1) - - if line_columns: - delim = '\n' + ' ' * line.rfind(line_columns[-1]) - - while len(line[offset:]) > self.SCREEN_WIDTH: - space_offset = line[offset:offset+self.HEADER_WIDTH].rfind(' ') - if space_offset == -1 or space_offset == 0: - space_offset = self.SCREEN_WIDTH - - self._append_to_data_parts(line, offset, offset+space_offset) - - offset += space_offset - - self._append_to_data_parts(line, offset, offset+len(line[offset:])) - - return delim.join(self.string_parts) - - def _configure_formatting(self): - ''' - Configures output formatting, and fitting output to the current terminal width. - - Returns None. - ''' - self.format_strings(self.DEFAULT_FORMAT, self.DEFAULT_FORMAT) - - if self.fit_to_screen: - try: - import fcntl - import struct - import termios - - # Get the terminal window width - hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) - self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1] - except KeyboardInterrupt as e: - raise e - except Exception: - pass + SCREEN_WIDTH = 0 + HEADER_WIDTH = 150 + DEFAULT_FORMAT = "%s\n" + + def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None): + self.quiet = quiet + self.filter = filter + self.verbose = verbose + self.fit_to_screen = fit_to_screen + self.fp = None + self.csv = None + self.num_columns = 0 + self.custom_verbose_format = "" + self.custom_verbose_args = [] + + self._configure_formatting() + + if log: + self.fp = open(log, "w") + if csv: + self.csv = pycsv.writer(self.fp) + + def format_strings(self, header, result): + self.result_format = result + self.header_format = header + + if self.num_columns == 0: + self.num_columns = len(header.split()) + + def log(self, fmt, columns): + if self.fp: + if self.csv: + self.csv.writerow(columns) + else: + self.fp.write(fmt % tuple(columns)) + + def add_custom_header(self, fmt, args): + self.custom_verbose_format = fmt + self.custom_verbose_args = args + + def header(self, *args, **kwargs): + file_name = None + self.num_columns = len(args) + + if has_key(kwargs, 'file_name'): + file_name = kwargs['file_name'] + + if self.verbose and file_name: + md5sum = binwalk.core.common.file_md5(file_name) + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + if self.csv: + self.log("", ["FILE", "MD5SUM", "TIMESTAMP"]) + self.log("", [file_name, md5sum, timestamp]) + + self._fprint("%s", "\n", csv=False) + self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False) + self._fprint("Target File: %s\n", [file_name], csv=False, filter=False) + self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False) + if self.custom_verbose_format and self.custom_verbose_args: + self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False) + + self._fprint("%s", "\n", csv=False, filter=False) + self._fprint(self.header_format, args, filter=False) + self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False) + + def result(self, *args): + # Convert to list for item assignment + args = list(args) + + # Replace multiple spaces with single spaces. This is to prevent accidentally putting + # four spaces in the description string, which would break auto-formatting. + for i in range(len(args)): + if isinstance(args[i], str): + while " " in args[i]: + args[i] = args[i].replace(" " , " ") + + self._fprint(self.result_format, tuple(args)) + + def footer(self): + self._fprint("%s", "\n", csv=False, filter=False) + + def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True): + line = fmt % tuple(columns) + + if not filter or self.filter.valid_result(line): + if not self.quiet and stdout: + sys.stdout.write(self._format_line(line.strip()) + "\n") + + if self.fp and not (self.csv and not csv): + self.log(fmt, columns) + + def _append_to_data_parts(self, data, start, end): + ''' + Intelligently appends data to self.string_parts. + For use by self._format. + ''' + try: + while data[start] == ' ': + start += 1 + + if start == end: + end = len(data[start:]) + + self.string_parts.append(data[start:end]) + except KeyboardInterrupt as e: + raise e + except Exception: + try: + self.string_parts.append(data[start:]) + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + return start + + def _format_line(self, line): + ''' + Formats a line of text to fit in the terminal window. + For Tim. + ''' + offset = 0 + space_offset = 0 + self.string_parts = [] + delim = '\n' + + if self.fit_to_screen and len(line) > self.SCREEN_WIDTH: + line_columns = line.split(None, self.num_columns-1) + + if line_columns: + delim = '\n' + ' ' * line.rfind(line_columns[-1]) + + while len(line[offset:]) > self.SCREEN_WIDTH: + space_offset = line[offset:offset+self.HEADER_WIDTH].rfind(' ') + if space_offset == -1 or space_offset == 0: + space_offset = self.SCREEN_WIDTH + + self._append_to_data_parts(line, offset, offset+space_offset) + + offset += space_offset + + self._append_to_data_parts(line, offset, offset+len(line[offset:])) + + return delim.join(self.string_parts) + + def _configure_formatting(self): + ''' + Configures output formatting, and fitting output to the current terminal width. + + Returns None. + ''' + self.format_strings(self.DEFAULT_FORMAT, self.DEFAULT_FORMAT) + + if self.fit_to_screen: + try: + import fcntl + import struct + import termios + + # Get the terminal window width + hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) + self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1] + except KeyboardInterrupt as e: + raise e + except Exception: + pass diff --git a/src/binwalk/core/filter.py b/src/binwalk/core/filter.py index 729773e..d016eab 100644 --- a/src/binwalk/core/filter.py +++ b/src/binwalk/core/filter.py @@ -5,205 +5,205 @@ from binwalk.core.compat import * class FilterType(object): - FILTER_INCLUDE = 0 - FILTER_EXCLUDE = 1 + FILTER_INCLUDE = 0 + FILTER_EXCLUDE = 1 - def __init__(self, **kwargs): - self.type = None - self.filter = None - self.regex = None + def __init__(self, **kwargs): + self.type = None + self.filter = None + self.regex = None - for (k,v) in iterator(kwargs): - setattr(self, k, v) + for (k,v) in iterator(kwargs): + setattr(self, k, v) - if self.regex is None: - self.regex = re.compile(self.filter) + if self.regex is None: + self.regex = re.compile(self.filter) class FilterInclude(FilterType): - - def __init__(self, **kwargs): - super(FilterInclude, self).__init__(**kwargs) - self.type = self.FILTER_INCLUDE + + def __init__(self, **kwargs): + super(FilterInclude, self).__init__(**kwargs) + self.type = self.FILTER_INCLUDE class FilterExclude(FilterType): - - def __init__(self, **kwargs): - super(FilterExclude, self).__init__(**kwargs) - self.type = self.FILTER_EXCLUDE + + def __init__(self, **kwargs): + super(FilterExclude, self).__init__(**kwargs) + self.type = self.FILTER_EXCLUDE class Filter(object): - ''' - Class to filter results based on include/exclude rules and false positive detection. - An instance of this class is available via the Binwalk.filter object. - Note that all filter strings should be in lower case. - ''' - - # If the result returned by libmagic is "data" or contains the text - # 'invalid' or a backslash are known to be invalid/false positives. - DATA_RESULT = "data" - INVALID_RESULTS = ["invalid", "\\"] - INVALID_RESULT = "invalid" - NON_PRINTABLE_RESULT = "\\" - - def __init__(self, show_invalid_results=False): - ''' - Class constructor. - - @show_invalid_results - Set to True to display results marked as invalid. - - Returns None. - ''' - self.filters = [] - self.grep_filters = [] - self.show_invalid_results = show_invalid_results - self.exclusive_filter = False - self.smart = Signature(self) - - def include(self, match, exclusive=True): - ''' - Adds a new filter which explicitly includes results that contain - the specified matching text. - - @match - Regex, or list of regexs, to match. - @exclusive - If True, then results that do not explicitly contain - a FILTER_INCLUDE match will be excluded. If False, - signatures that contain the FILTER_INCLUDE match will - be included in the scan, but will not cause non-matching - results to be excluded. - - Returns None. - ''' - if not isinstance(match, type([])): - matches = [match] - else: - matches = match - - for m in matches: - if m: - if exclusive and not self.exclusive_filter: - self.exclusive_filter = True - - self.filters.append(FilterInclude(filter=m)) - - def exclude(self, match): - ''' - Adds a new filter which explicitly excludes results that contain - the specified matching text. - - @match - Regex, or list of regexs, to match. - - Returns None. - ''' - if not isinstance(match, type([])): - matches = [match] - else: - matches = match - - for m in matches: - if m: - self.filters.append(FilterExclude(filter=m)) - - def filter(self, data): - ''' - Checks to see if a given string should be excluded from or included in the results. - Called internally by Binwalk.scan(). - - @data - String to check. - - Returns FILTER_INCLUDE if the string should be included. - Returns FILTER_EXCLUDE if the string should be excluded. - ''' - data = data.lower() - - # Loop through the filters to see if any of them are a match. - # If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE). - for f in self.filters: - if f.regex.search(data): - return f.type - - # If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE. - if self.exclusive_filter: - return FilterType.FILTER_EXCLUDE - - return FilterType.FILTER_INCLUDE - - def valid_result(self, data): - ''' - Checks if the given string contains invalid data. - - @data - String to validate. - - Returns True if data is valid, False if invalid. - ''' - # A result of 'data' is never ever valid (for libmagic results) - if data == self.DATA_RESULT: - return False - - # Make sure this result wasn't filtered - if self.filter(data) == FilterType.FILTER_EXCLUDE: - return False - - # If showing invalid results, just return True without further checking. - if self.show_invalid_results: - return True - - # Don't include quoted strings or keyword arguments in this search, as - # strings from the target file may legitimately contain the INVALID_RESULT text. - if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)): - return False - - # There should be no non-printable characters in any of the data - if self.NON_PRINTABLE_RESULT in data: - return False - - return True - - def grep(self, data=None, filters=[]): - ''' - Add or check case-insensitive grep filters against the supplied data string. - - @data - Data string to check grep filters against. Not required if filters is specified. - @filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified. - - Returns None if data is not specified. - If data is specified, returns True if the data contains a grep filter, or if no grep filters exist. - If data is specified, returns False if the data does not contain any grep filters. - ''' - # Add any specified filters to self.grep_filters - if filters: - if not isinstance(filters, type([])): - gfilters = [filters] - else: - gfilters = filters - - for gfilter in gfilters: - # Filters are case insensitive - self.grep_filters.append(re.compile(gfilter)) - - # Check the data against all grep filters until one is found - if data is not None: - # If no grep filters have been created, always return True - if not self.grep_filters: - return True - - # Filters are case insensitive - data = data.lower() - - # If a filter exists in data, return True - for gfilter in self.grep_filters: - if gfilter.search(data): - return True - - # Else, return False - return False - - return None - - def clear(self): - ''' - Clears all include, exclude and grep filters. - - Retruns None. - ''' - self.filters = [] - self.grep_filters = [] + ''' + Class to filter results based on include/exclude rules and false positive detection. + An instance of this class is available via the Binwalk.filter object. + Note that all filter strings should be in lower case. + ''' + + # If the result returned by libmagic is "data" or contains the text + # 'invalid' or a backslash are known to be invalid/false positives. + DATA_RESULT = "data" + INVALID_RESULTS = ["invalid", "\\"] + INVALID_RESULT = "invalid" + NON_PRINTABLE_RESULT = "\\" + + def __init__(self, show_invalid_results=False): + ''' + Class constructor. + + @show_invalid_results - Set to True to display results marked as invalid. + + Returns None. + ''' + self.filters = [] + self.grep_filters = [] + self.show_invalid_results = show_invalid_results + self.exclusive_filter = False + self.smart = Signature(self) + + def include(self, match, exclusive=True): + ''' + Adds a new filter which explicitly includes results that contain + the specified matching text. + + @match - Regex, or list of regexs, to match. + @exclusive - If True, then results that do not explicitly contain + a FILTER_INCLUDE match will be excluded. If False, + signatures that contain the FILTER_INCLUDE match will + be included in the scan, but will not cause non-matching + results to be excluded. + + Returns None. + ''' + if not isinstance(match, type([])): + matches = [match] + else: + matches = match + + for m in matches: + if m: + if exclusive and not self.exclusive_filter: + self.exclusive_filter = True + + self.filters.append(FilterInclude(filter=m)) + + def exclude(self, match): + ''' + Adds a new filter which explicitly excludes results that contain + the specified matching text. + + @match - Regex, or list of regexs, to match. + + Returns None. + ''' + if not isinstance(match, type([])): + matches = [match] + else: + matches = match + + for m in matches: + if m: + self.filters.append(FilterExclude(filter=m)) + + def filter(self, data): + ''' + Checks to see if a given string should be excluded from or included in the results. + Called internally by Binwalk.scan(). + + @data - String to check. + + Returns FILTER_INCLUDE if the string should be included. + Returns FILTER_EXCLUDE if the string should be excluded. + ''' + data = data.lower() + + # Loop through the filters to see if any of them are a match. + # If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE). + for f in self.filters: + if f.regex.search(data): + return f.type + + # If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE. + if self.exclusive_filter: + return FilterType.FILTER_EXCLUDE + + return FilterType.FILTER_INCLUDE + + def valid_result(self, data): + ''' + Checks if the given string contains invalid data. + + @data - String to validate. + + Returns True if data is valid, False if invalid. + ''' + # A result of 'data' is never ever valid (for libmagic results) + if data == self.DATA_RESULT: + return False + + # Make sure this result wasn't filtered + if self.filter(data) == FilterType.FILTER_EXCLUDE: + return False + + # If showing invalid results, just return True without further checking. + if self.show_invalid_results: + return True + + # Don't include quoted strings or keyword arguments in this search, as + # strings from the target file may legitimately contain the INVALID_RESULT text. + if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)): + return False + + # There should be no non-printable characters in any of the data + if self.NON_PRINTABLE_RESULT in data: + return False + + return True + + def grep(self, data=None, filters=[]): + ''' + Add or check case-insensitive grep filters against the supplied data string. + + @data - Data string to check grep filters against. Not required if filters is specified. + @filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified. + + Returns None if data is not specified. + If data is specified, returns True if the data contains a grep filter, or if no grep filters exist. + If data is specified, returns False if the data does not contain any grep filters. + ''' + # Add any specified filters to self.grep_filters + if filters: + if not isinstance(filters, type([])): + gfilters = [filters] + else: + gfilters = filters + + for gfilter in gfilters: + # Filters are case insensitive + self.grep_filters.append(re.compile(gfilter)) + + # Check the data against all grep filters until one is found + if data is not None: + # If no grep filters have been created, always return True + if not self.grep_filters: + return True + + # Filters are case insensitive + data = data.lower() + + # If a filter exists in data, return True + for gfilter in self.grep_filters: + if gfilter.search(data): + return True + + # Else, return False + return False + + return None + + def clear(self): + ''' + Clears all include, exclude and grep filters. + + Retruns None. + ''' + self.filters = [] + self.grep_filters = [] diff --git a/src/binwalk/core/module.py b/src/binwalk/core/module.py index 7c50677..54880ce 100644 --- a/src/binwalk/core/module.py +++ b/src/binwalk/core/module.py @@ -10,777 +10,777 @@ import binwalk.core.plugin from binwalk.core.compat import * class Option(object): - ''' - A container class that allows modules to declare command line options. - ''' - - def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=None): - ''' - Class constructor. - - @kwargs - A dictionary of kwarg key-value pairs affected by this command line option. - @priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options. - @description - A description to be displayed in the help output. - @short - The short option to use (optional). - @long - The long option to use (if None, this option will not be displayed in help output). - @type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.core.common.BlockFile, list, str, int, float). - @dtype - The displayed accepted type string, to be shown in help output. - - Returns None. - ''' - self.kwargs = kwargs - self.priority = priority - self.description = description - self.short = short - self.long = long - self.type = type - self.dtype = dtype - - if not self.dtype and self.type: - if self.type in [io.FileIO, argparse.FileType, binwalk.core.common.BlockFile]: - self.dtype = 'file' - elif self.type in [int, float, str]: - self.dtype = self.type.__name__ - else: - self.dtype = str.__name__ + ''' + A container class that allows modules to declare command line options. + ''' + + def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=None): + ''' + Class constructor. + + @kwargs - A dictionary of kwarg key-value pairs affected by this command line option. + @priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options. + @description - A description to be displayed in the help output. + @short - The short option to use (optional). + @long - The long option to use (if None, this option will not be displayed in help output). + @type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.core.common.BlockFile, list, str, int, float). + @dtype - The displayed accepted type string, to be shown in help output. + + Returns None. + ''' + self.kwargs = kwargs + self.priority = priority + self.description = description + self.short = short + self.long = long + self.type = type + self.dtype = dtype + + if not self.dtype and self.type: + if self.type in [io.FileIO, argparse.FileType, binwalk.core.common.BlockFile]: + self.dtype = 'file' + elif self.type in [int, float, str]: + self.dtype = self.type.__name__ + else: + self.dtype = str.__name__ class Kwarg(object): - ''' - A container class allowing modules to specify their expected __init__ kwarg(s). - ''' - - def __init__(self, name="", default=None, description=""): - ''' - Class constructor. - - @name - Kwarg name. - @default - Default kwarg value. - @description - Description string. - - Return None. - ''' - self.name = name - self.default = default - self.description = description + ''' + A container class allowing modules to specify their expected __init__ kwarg(s). + ''' + + def __init__(self, name="", default=None, description=""): + ''' + Class constructor. + + @name - Kwarg name. + @default - Default kwarg value. + @description - Description string. + + Return None. + ''' + self.name = name + self.default = default + self.description = description class Dependency(object): - ''' - A container class for declaring module dependencies. - ''' + ''' + A container class for declaring module dependencies. + ''' - def __init__(self, attribute="", name="", kwargs={}): - self.attribute = attribute - self.name = name - self.kwargs = kwargs - self.module = None + def __init__(self, attribute="", name="", kwargs={}): + self.attribute = attribute + self.name = name + self.kwargs = kwargs + self.module = None class Result(object): - ''' - Generic class for storing and accessing scan results. - ''' - - def __init__(self, **kwargs): - ''' - Class constructor. - - @offset - The file offset of the result. - @size - Size of the result, if known. - @description - The result description, as displayed to the user. - @module - Name of the module that generated the result. - @file - The file object of the scanned file. - @valid - Set to True if the result if value, False if invalid. - @display - Set to True to display the result to the user, False to hide it. - @extract - Set to True to flag this result for extraction. - @plot - Set to Flase to exclude this result from entropy plots. - @name - Name of the result found (None if not applicable or unknown). - - Provide additional kwargs as necessary. - Returns None. - ''' - self.offset = 0 - self.size = 0 - self.description = '' - self.module = '' - self.file = None - self.valid = True - self.display = True - self.extract = True - self.plot = True - self.name = None - - for (k, v) in iterator(kwargs): - setattr(self, k, v) + ''' + Generic class for storing and accessing scan results. + ''' + + def __init__(self, **kwargs): + ''' + Class constructor. + + @offset - The file offset of the result. + @size - Size of the result, if known. + @description - The result description, as displayed to the user. + @module - Name of the module that generated the result. + @file - The file object of the scanned file. + @valid - Set to True if the result if value, False if invalid. + @display - Set to True to display the result to the user, False to hide it. + @extract - Set to True to flag this result for extraction. + @plot - Set to Flase to exclude this result from entropy plots. + @name - Name of the result found (None if not applicable or unknown). + + Provide additional kwargs as necessary. + Returns None. + ''' + self.offset = 0 + self.size = 0 + self.description = '' + self.module = '' + self.file = None + self.valid = True + self.display = True + self.extract = True + self.plot = True + self.name = None + + for (k, v) in iterator(kwargs): + setattr(self, k, v) class Error(Result): - ''' - A subclass of binwalk.core.module.Result. - ''' - - def __init__(self, **kwargs): - ''' - Accepts all the same kwargs as binwalk.core.module.Result, but the following are also added: + ''' + A subclass of binwalk.core.module.Result. + ''' + + def __init__(self, **kwargs): + ''' + Accepts all the same kwargs as binwalk.core.module.Result, but the following are also added: - @exception - In case of an exception, this is the exception object. + @exception - In case of an exception, this is the exception object. - Returns None. - ''' - self.exception = None - Result.__init__(self, **kwargs) + Returns None. + ''' + self.exception = None + Result.__init__(self, **kwargs) class Module(object): - ''' - All module classes must be subclassed from this. - ''' - # The module title, as displayed in help output - TITLE = "" - - # A list of binwalk.core.module.Option command line options - CLI = [] - - # A list of binwalk.core.module.Kwargs accepted by __init__ - KWARGS = [] - - # A list of default dependencies for all modules; do not override this unless you - # understand the consequences of doing so. - DEFAULT_DEPENDS = [ - Dependency(name='General', - attribute='config'), - Dependency(name='Extractor', - attribute='extractor'), - ] - - # A list of dependencies that can be filled in as needed by each individual module. - DEPENDS = [] - - # Format string for printing the header during a scan. - # Must be set prior to calling self.header. - HEADER_FORMAT = "%-12s %-12s %s\n" - - # Format string for printing each result during a scan. - # Must be set prior to calling self.result. - RESULT_FORMAT = "%-12d 0x%-12X %s\n" - - # Format string for printing custom information in the verbose header output. - # Must be set prior to calling self.header. - VERBOSE_FORMAT = "" - - # The header to print during a scan. - # Set to None to not print a header. - # Note that this will be formatted per the HEADER_FORMAT format string. - # Must be set prior to calling self.header. - HEADER = ["DECIMAL", "HEXADECIMAL", "DESCRIPTION"] - - # The Result attribute names to print during a scan, as provided to the self.results method. - # Set to None to not print any results. - # Note that these will be formatted per the RESULT_FORMAT format string. - # Must be set prior to calling self.result. - RESULT = ["offset", "offset", "description"] - - # The custom data to print in the verbose header output. - # Note that these will be formatted per the VERBOSE_FORMAT format string. - # Must be set prior to calling self.header. - VERBOSE = [] - - # If set to True, the progress status will be automatically updated for each result - # containing valid file and offset attributes. - AUTO_UPDATE_STATUS = True - - # Modules with higher priorities are executed first - PRIORITY = 5 - - # Modules with a higher order are displayed first in help output - ORDER = 5 - - # Set to False if this is not a primary module (e.g., General, Extractor modules) - PRIMARY = True - - def __init__(self, **kwargs): - self.errors = [] - self.results = [] - - self.target_file_list = [] - self.status = None - self.enabled = False - self.current_target_file_name = None - self.name = self.__class__.__name__ - self.plugins = binwalk.core.plugin.Plugins(self) - self.dependencies = self.DEFAULT_DEPENDS + self.DEPENDS - - process_kwargs(self, kwargs) - - self.plugins.load_plugins() - - try: - self.load() - except KeyboardInterrupt as e: - raise e - except Exception as e: - self.error(exception=e) - - try: - self.target_file_list = list(self.config.target_files) - except AttributeError as e: - pass - - def __del__(self): - return None - - def __enter__(self): - return self - - def __exit__(self, x, z, y): - return None - - def load(self): - ''' - Invoked at module load time. - May be overridden by the module sub-class. - ''' - return None - - def reset(self): - ''' - Invoked only for dependency modules immediately prior to starting a new primary module. - ''' - return None - - def init(self): - ''' - Invoked prior to self.run. - May be overridden by the module sub-class. - - Returns None. - ''' - return None - - def run(self): - ''' - Executes the main module routine. - Must be overridden by the module sub-class. - - Returns True on success, False on failure. - ''' - return False - - def callback(self, r): - ''' - Processes the result from all modules. Called for all dependency modules when a valid result is found. - - @r - The result, an instance of binwalk.core.module.Result. - - Returns None. - ''' - return None - - def validate(self, r): - ''' - Validates the result. - May be overridden by the module sub-class. - - @r - The result, an instance of binwalk.core.module.Result. - - Returns None. - ''' - r.valid = True - return None - - def _plugins_pre_scan(self): - self.plugins.pre_scan_callbacks(self) - - def _plugins_post_scan(self): - self.plugins.post_scan_callbacks(self) - - def _plugins_result(self, r): - self.plugins.scan_callbacks(r) - - def _build_display_args(self, r): - args = [] - - if self.RESULT: - if type(self.RESULT) != type([]): - result = [self.RESULT] - else: - result = self.RESULT - - for name in result: - args.append(getattr(r, name)) - - return args - - def next_file(self): - ''' - Gets the next file to be scanned (including pending extracted files, if applicable). - Also re/initializes self.status. - All modules should access the target file list through this method. - ''' - fp = None - - # Add any pending extracted files to the target_files list and reset the extractor's pending file list - self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending] - self.extractor.pending = [] - - if self.target_file_list: - fp = self.target_file_list.pop(0) - self.status.clear() - self.status.total = fp.length - - if fp is not None: - self.current_target_file_name = fp.name - else: - self.current_target_file_name = None - - return fp - - def clear(self, results=True, errors=True): - ''' - Clears results and errors lists. - ''' - if results: - self.results = [] - if errors: - self.errors = [] - - def result(self, r=None, **kwargs): - ''' - Validates a result, stores it in self.results and prints it. - Accepts the same kwargs as the binwalk.core.module.Result class. - - @r - An existing instance of binwalk.core.module.Result. - - Returns an instance of binwalk.core.module.Result. - ''' - if r is None: - r = Result(**kwargs) - - r.module = self.__class__.__name__ - - # Any module that is reporting results, valid or not, should be marked as enabled - if not self.enabled: - self.enabled = True - - self.validate(r) - self._plugins_result(r) - - for dependency in self.dependencies: - try: - getattr(self, dependency.attribute).callback(r) - except AttributeError: - continue - - if r.valid: - self.results.append(r) - - # Update the progress status automatically if it is not being done manually by the module - if r.offset and r.file and self.AUTO_UPDATE_STATUS: - self.status.total = r.file.length - self.status.completed = r.offset - - if r.display: - display_args = self._build_display_args(r) - if display_args: - self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) - self.config.display.result(*display_args) - - return r - - def error(self, **kwargs): - ''' - Stores the specified error in self.errors. - - Accepts the same kwargs as the binwalk.core.module.Error class. - - Returns None. - ''' - exception_header_width = 100 - - e = Error(**kwargs) - e.module = self.__class__.__name__ - - self.errors.append(e) - - if e.exception: - sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n") - sys.stderr.write("-" * exception_header_width + "\n") - traceback.print_exc(file=sys.stderr) - sys.stderr.write("-" * exception_header_width + "\n\n") - elif e.description: - sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n") - - def header(self): - self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) - self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE) - - if type(self.HEADER) == type([]): - self.config.display.header(*self.HEADER, file_name=self.current_target_file_name) - elif self.HEADER: - self.config.display.header(self.HEADER, file_name=self.current_target_file_name) - - def footer(self): - self.config.display.footer() - - def main(self, parent): - ''' - Responsible for calling self.init, initializing self.config.display, and calling self.run. - - Returns the value returned from self.run. - ''' - self.status = parent.status - self.modules = parent.loaded_modules - - # Reset all dependency modules - for dependency in self.dependencies: - if hasattr(self, dependency.attribute): - getattr(self, dependency.attribute).reset() - - try: - self.init() - except KeyboardInterrupt as e: - raise e - except Exception as e: - self.error(exception=e) - return False - - try: - self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) - except KeyboardInterrupt as e: - raise e - except Exception as e: - self.error(exception=e) - return False - - self._plugins_pre_scan() - - try: - retval = self.run() - except KeyboardInterrupt as e: - raise e - except Exception as e: - self.error(exception=e) - return False - - self._plugins_post_scan() - - return retval + ''' + All module classes must be subclassed from this. + ''' + # The module title, as displayed in help output + TITLE = "" + + # A list of binwalk.core.module.Option command line options + CLI = [] + + # A list of binwalk.core.module.Kwargs accepted by __init__ + KWARGS = [] + + # A list of default dependencies for all modules; do not override this unless you + # understand the consequences of doing so. + DEFAULT_DEPENDS = [ + Dependency(name='General', + attribute='config'), + Dependency(name='Extractor', + attribute='extractor'), + ] + + # A list of dependencies that can be filled in as needed by each individual module. + DEPENDS = [] + + # Format string for printing the header during a scan. + # Must be set prior to calling self.header. + HEADER_FORMAT = "%-12s %-12s %s\n" + + # Format string for printing each result during a scan. + # Must be set prior to calling self.result. + RESULT_FORMAT = "%-12d 0x%-12X %s\n" + + # Format string for printing custom information in the verbose header output. + # Must be set prior to calling self.header. + VERBOSE_FORMAT = "" + + # The header to print during a scan. + # Set to None to not print a header. + # Note that this will be formatted per the HEADER_FORMAT format string. + # Must be set prior to calling self.header. + HEADER = ["DECIMAL", "HEXADECIMAL", "DESCRIPTION"] + + # The Result attribute names to print during a scan, as provided to the self.results method. + # Set to None to not print any results. + # Note that these will be formatted per the RESULT_FORMAT format string. + # Must be set prior to calling self.result. + RESULT = ["offset", "offset", "description"] + + # The custom data to print in the verbose header output. + # Note that these will be formatted per the VERBOSE_FORMAT format string. + # Must be set prior to calling self.header. + VERBOSE = [] + + # If set to True, the progress status will be automatically updated for each result + # containing valid file and offset attributes. + AUTO_UPDATE_STATUS = True + + # Modules with higher priorities are executed first + PRIORITY = 5 + + # Modules with a higher order are displayed first in help output + ORDER = 5 + + # Set to False if this is not a primary module (e.g., General, Extractor modules) + PRIMARY = True + + def __init__(self, **kwargs): + self.errors = [] + self.results = [] + + self.target_file_list = [] + self.status = None + self.enabled = False + self.current_target_file_name = None + self.name = self.__class__.__name__ + self.plugins = binwalk.core.plugin.Plugins(self) + self.dependencies = self.DEFAULT_DEPENDS + self.DEPENDS + + process_kwargs(self, kwargs) + + self.plugins.load_plugins() + + try: + self.load() + except KeyboardInterrupt as e: + raise e + except Exception as e: + self.error(exception=e) + + try: + self.target_file_list = list(self.config.target_files) + except AttributeError as e: + pass + + def __del__(self): + return None + + def __enter__(self): + return self + + def __exit__(self, x, z, y): + return None + + def load(self): + ''' + Invoked at module load time. + May be overridden by the module sub-class. + ''' + return None + + def reset(self): + ''' + Invoked only for dependency modules immediately prior to starting a new primary module. + ''' + return None + + def init(self): + ''' + Invoked prior to self.run. + May be overridden by the module sub-class. + + Returns None. + ''' + return None + + def run(self): + ''' + Executes the main module routine. + Must be overridden by the module sub-class. + + Returns True on success, False on failure. + ''' + return False + + def callback(self, r): + ''' + Processes the result from all modules. Called for all dependency modules when a valid result is found. + + @r - The result, an instance of binwalk.core.module.Result. + + Returns None. + ''' + return None + + def validate(self, r): + ''' + Validates the result. + May be overridden by the module sub-class. + + @r - The result, an instance of binwalk.core.module.Result. + + Returns None. + ''' + r.valid = True + return None + + def _plugins_pre_scan(self): + self.plugins.pre_scan_callbacks(self) + + def _plugins_post_scan(self): + self.plugins.post_scan_callbacks(self) + + def _plugins_result(self, r): + self.plugins.scan_callbacks(r) + + def _build_display_args(self, r): + args = [] + + if self.RESULT: + if type(self.RESULT) != type([]): + result = [self.RESULT] + else: + result = self.RESULT + + for name in result: + args.append(getattr(r, name)) + + return args + + def next_file(self): + ''' + Gets the next file to be scanned (including pending extracted files, if applicable). + Also re/initializes self.status. + All modules should access the target file list through this method. + ''' + fp = None + + # Add any pending extracted files to the target_files list and reset the extractor's pending file list + self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending] + self.extractor.pending = [] + + if self.target_file_list: + fp = self.target_file_list.pop(0) + self.status.clear() + self.status.total = fp.length + + if fp is not None: + self.current_target_file_name = fp.name + else: + self.current_target_file_name = None + + return fp + + def clear(self, results=True, errors=True): + ''' + Clears results and errors lists. + ''' + if results: + self.results = [] + if errors: + self.errors = [] + + def result(self, r=None, **kwargs): + ''' + Validates a result, stores it in self.results and prints it. + Accepts the same kwargs as the binwalk.core.module.Result class. + + @r - An existing instance of binwalk.core.module.Result. + + Returns an instance of binwalk.core.module.Result. + ''' + if r is None: + r = Result(**kwargs) + + r.module = self.__class__.__name__ + + # Any module that is reporting results, valid or not, should be marked as enabled + if not self.enabled: + self.enabled = True + + self.validate(r) + self._plugins_result(r) + + for dependency in self.dependencies: + try: + getattr(self, dependency.attribute).callback(r) + except AttributeError: + continue + + if r.valid: + self.results.append(r) + + # Update the progress status automatically if it is not being done manually by the module + if r.offset and r.file and self.AUTO_UPDATE_STATUS: + self.status.total = r.file.length + self.status.completed = r.offset + + if r.display: + display_args = self._build_display_args(r) + if display_args: + self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) + self.config.display.result(*display_args) + + return r + + def error(self, **kwargs): + ''' + Stores the specified error in self.errors. + + Accepts the same kwargs as the binwalk.core.module.Error class. + + Returns None. + ''' + exception_header_width = 100 + + e = Error(**kwargs) + e.module = self.__class__.__name__ + + self.errors.append(e) + + if e.exception: + sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n") + sys.stderr.write("-" * exception_header_width + "\n") + traceback.print_exc(file=sys.stderr) + sys.stderr.write("-" * exception_header_width + "\n\n") + elif e.description: + sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n") + + def header(self): + self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) + self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE) + + if type(self.HEADER) == type([]): + self.config.display.header(*self.HEADER, file_name=self.current_target_file_name) + elif self.HEADER: + self.config.display.header(self.HEADER, file_name=self.current_target_file_name) + + def footer(self): + self.config.display.footer() + + def main(self, parent): + ''' + Responsible for calling self.init, initializing self.config.display, and calling self.run. + + Returns the value returned from self.run. + ''' + self.status = parent.status + self.modules = parent.loaded_modules + + # Reset all dependency modules + for dependency in self.dependencies: + if hasattr(self, dependency.attribute): + getattr(self, dependency.attribute).reset() + + try: + self.init() + except KeyboardInterrupt as e: + raise e + except Exception as e: + self.error(exception=e) + return False + + try: + self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) + except KeyboardInterrupt as e: + raise e + except Exception as e: + self.error(exception=e) + return False + + self._plugins_pre_scan() + + try: + retval = self.run() + except KeyboardInterrupt as e: + raise e + except Exception as e: + self.error(exception=e) + return False + + self._plugins_post_scan() + + return retval class Status(object): - ''' - Class used for tracking module status (e.g., % complete). - ''' + ''' + Class used for tracking module status (e.g., % complete). + ''' - def __init__(self, **kwargs): - self.kwargs = kwargs - self.clear() + def __init__(self, **kwargs): + self.kwargs = kwargs + self.clear() - def clear(self): - for (k,v) in iterator(self.kwargs): - setattr(self, k, v) + def clear(self): + for (k,v) in iterator(self.kwargs): + setattr(self, k, v) class ModuleException(Exception): - ''' - Module exception class. - Nothing special here except the name. - ''' - pass + ''' + Module exception class. + Nothing special here except the name. + ''' + pass class Modules(object): - ''' - Main class used for running and managing modules. - ''' - - def __init__(self, *argv, **kargv): - ''' - Class constructor. - - @argv - List of command line options. Must not include the program name (e.g., sys.argv[1:]). - @kargv - Keyword dictionary of command line options. - - Returns None. - ''' - self.arguments = [] - self.loaded_modules = {} - self.default_dependency_modules = {} - self.status = Status(completed=0, total=0) - - self._set_arguments(list(argv), kargv) - - def _set_arguments(self, argv=[], kargv={}): - for (k,v) in iterator(kargv): - k = self._parse_api_opt(k) - if v not in [True, False, None]: - argv.append("%s %s" % (k, v)) - else: - argv.append(k) - - if not argv and not self.arguments: - self.arguments = sys.argv[1:] - elif argv: - self.arguments = argv - - def _parse_api_opt(self, opt): - # If the argument already starts with a hyphen, don't add hyphens in front of it - if opt.startswith('-'): - return opt - # Short options are only 1 character - elif len(opt) == 1: - return '-' + opt - else: - return '--' + opt - - def list(self, attribute="run"): - ''' - Finds all modules with the specified attribute. - - @attribute - The desired module attribute. - - Returns a list of modules that contain the specified attribute, in the order they should be executed. - ''' - import binwalk.modules - modules = {} - - for (name, module) in inspect.getmembers(binwalk.modules): - if inspect.isclass(module) and hasattr(module, attribute): - modules[module] = module.PRIORITY - - return sorted(modules, key=modules.get, reverse=True) - - def help(self): - ''' - Generates formatted help output. - - Returns the help string. - ''' - modules = {} - help_string = "\nBinwalk v%s\nCraig Heffner, http://www.binwalk.org\n" % binwalk.core.settings.Settings.VERSION - - # Build a dictionary of modules and their ORDER attributes. - # This makes it easy to sort modules by their ORDER attribute for display. - for module in self.list(attribute="CLI"): - if module.CLI: - modules[module] = module.ORDER - - for module in sorted(modules, key=modules.get, reverse=True): - help_string += "\n%s Options:\n" % module.TITLE - - for module_option in module.CLI: - if module_option.long: - long_opt = '--' + module_option.long - - if module_option.dtype: - optargs = "=<%s>" % module_option.dtype - else: - optargs = "" - - if module_option.short: - short_opt = "-" + module_option.short + "," - else: - short_opt = " " - - fmt = " %%s %%s%%-%ds%%s\n" % (32-len(long_opt)) - help_string += fmt % (short_opt, long_opt, optargs, module_option.description) - - return help_string + "\n" - - def execute(self, *args, **kwargs): - ''' - Executes all appropriate modules according to the options specified in args/kwargs. - - Returns a list of executed module objects. - ''' - run_modules = [] - orig_arguments = self.arguments - - if args or kwargs: - self._set_arguments(list(args), kwargs) - - # Run all modules - for module in self.list(): - obj = self.run(module) - - # Add all loaded modules that marked themselves as enabled to the run_modules list - for (module, obj) in iterator(self.loaded_modules): - # Report the results if the module is enabled and if it is a primary module or if it reported any results/errors - if obj.enabled and (obj.PRIMARY or obj.results or obj.errors): - run_modules.append(obj) - - self.arguments = orig_arguments - - return run_modules - - def run(self, module, dependency=False, kwargs={}): - ''' - Runs a specific module. - ''' - obj = self.load(module, kwargs) - - if isinstance(obj, binwalk.core.module.Module) and obj.enabled: - obj.main(parent=self) - self.status.clear() - - # If the module is not being loaded as a dependency, add it to the loaded modules dictionary - if not dependency: - self.loaded_modules[module] = obj - - return obj - - def load(self, module, kwargs={}): - argv = self.argv(module, argv=self.arguments) - argv.update(kwargs) - argv.update(self.dependencies(module, argv['enabled'])) - return module(**argv) - - def dependencies(self, module, module_enabled): - import binwalk.modules - attributes = {} - - for dependency in module.DEFAULT_DEPENDS+module.DEPENDS: - - # The dependency module must be imported by binwalk.modules.__init__.py - if hasattr(binwalk.modules, dependency.name): - dependency.module = getattr(binwalk.modules, dependency.name) - else: - raise ModuleException("%s depends on %s which was not found in binwalk.modules.__init__.py\n" % (str(module), dependency.name)) - - # No recursive dependencies, thanks - if dependency.module == module: - continue - - # Only load dependencies with custom kwargs from modules that are enabled, else madness ensues. - # Example: Heursitic module depends on entropy module, and sets entropy kwargs to contain 'enabled' : True. - # Without this check, an entropy scan would always be run, even if -H or -E weren't specified! - # - # Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't - # set any custom kwargs for those dependencies. - if module_enabled or not dependency.kwargs: - depobj = self.run(dependency.module, dependency=True, kwargs=dependency.kwargs) - - # If a dependency failed, consider this a non-recoverable error and raise an exception - if depobj.errors: - raise ModuleException("Failed to load " + dependency.name) - else: - attributes[dependency.attribute] = depobj - - return attributes - - def argv(self, module, argv=sys.argv[1:]): - ''' - Processes argv for any options specific to the specified module. - - @module - The module to process argv for. - @argv - A list of command line arguments (excluding argv[0]). - - Returns a dictionary of kwargs for the specified module. - ''' - kwargs = {'enabled' : False} - last_priority = {} - longs = [] - shorts = "" - parser = argparse.ArgumentParser(add_help=False) - - # Must build arguments from all modules so that: - # - # 1) Any conflicting arguments will raise an exception - # 2) The only unknown arguments will be the target files, making them easy to identify - for m in self.list(attribute="CLI"): - - for module_option in m.CLI: - if not module_option.long: - continue - - if module_option.type is None: - action = 'store_true' - else: - action = None - - if module_option.short: - parser.add_argument('-' + module_option.short, '--' + module_option.long, action=action, dest=module_option.long) - else: - parser.add_argument('--' + module_option.long, action=action, dest=module_option.long) - - args, unknown = parser.parse_known_args(argv) - args = args.__dict__ - - # Only add parsed options pertinent to the requested module - for module_option in module.CLI: - - if module_option.type == binwalk.core.common.BlockFile: - - for k in get_keys(module_option.kwargs): - kwargs[k] = [] - for unk in unknown: - kwargs[k].append(unk) - - elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]: - - for (name, value) in iterator(module_option.kwargs): - if not has_key(last_priority, name) or last_priority[name] <= module_option.priority: - - if module_option.type is not None: - value = args[module_option.long] - - last_priority[name] = module_option.priority - - # Do this manually as argparse doesn't seem to be able to handle hexadecimal values - if module_option.type == int: - kwargs[name] = int(value, 0) - elif module_option.type == float: - kwargs[name] = float(value) - elif module_option.type == dict: - if not has_key(kwargs, name): - kwargs[name] = {} - kwargs[name][len(kwargs[name])] = value - elif module_option.type == list: - if not has_key(kwargs, name): - kwargs[name] = [] - kwargs[name].append(value) - else: - kwargs[name] = value - - return kwargs - - def kwargs(self, obj, kwargs): - ''' - Processes a module's kwargs. All modules should use this for kwarg processing. - - @obj - An instance of the module (e.g., self) - @kwargs - The kwargs passed to the module - - Returns None. - ''' - if hasattr(obj, "KWARGS"): - for module_argument in obj.KWARGS: - if has_key(kwargs, module_argument.name): - arg_value = kwargs[module_argument.name] - else: - arg_value = module_argument.default - - setattr(obj, module_argument.name, arg_value) - - for (k, v) in iterator(kwargs): - if not hasattr(obj, k): - setattr(obj, k, v) - else: - raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj)) + ''' + Main class used for running and managing modules. + ''' + + def __init__(self, *argv, **kargv): + ''' + Class constructor. + + @argv - List of command line options. Must not include the program name (e.g., sys.argv[1:]). + @kargv - Keyword dictionary of command line options. + + Returns None. + ''' + self.arguments = [] + self.loaded_modules = {} + self.default_dependency_modules = {} + self.status = Status(completed=0, total=0) + + self._set_arguments(list(argv), kargv) + + def _set_arguments(self, argv=[], kargv={}): + for (k,v) in iterator(kargv): + k = self._parse_api_opt(k) + if v not in [True, False, None]: + argv.append("%s %s" % (k, v)) + else: + argv.append(k) + + if not argv and not self.arguments: + self.arguments = sys.argv[1:] + elif argv: + self.arguments = argv + + def _parse_api_opt(self, opt): + # If the argument already starts with a hyphen, don't add hyphens in front of it + if opt.startswith('-'): + return opt + # Short options are only 1 character + elif len(opt) == 1: + return '-' + opt + else: + return '--' + opt + + def list(self, attribute="run"): + ''' + Finds all modules with the specified attribute. + + @attribute - The desired module attribute. + + Returns a list of modules that contain the specified attribute, in the order they should be executed. + ''' + import binwalk.modules + modules = {} + + for (name, module) in inspect.getmembers(binwalk.modules): + if inspect.isclass(module) and hasattr(module, attribute): + modules[module] = module.PRIORITY + + return sorted(modules, key=modules.get, reverse=True) + + def help(self): + ''' + Generates formatted help output. + + Returns the help string. + ''' + modules = {} + help_string = "\nBinwalk v%s\nCraig Heffner, http://www.binwalk.org\n" % binwalk.core.settings.Settings.VERSION + + # Build a dictionary of modules and their ORDER attributes. + # This makes it easy to sort modules by their ORDER attribute for display. + for module in self.list(attribute="CLI"): + if module.CLI: + modules[module] = module.ORDER + + for module in sorted(modules, key=modules.get, reverse=True): + help_string += "\n%s Options:\n" % module.TITLE + + for module_option in module.CLI: + if module_option.long: + long_opt = '--' + module_option.long + + if module_option.dtype: + optargs = "=<%s>" % module_option.dtype + else: + optargs = "" + + if module_option.short: + short_opt = "-" + module_option.short + "," + else: + short_opt = " " + + fmt = " %%s %%s%%-%ds%%s\n" % (32-len(long_opt)) + help_string += fmt % (short_opt, long_opt, optargs, module_option.description) + + return help_string + "\n" + + def execute(self, *args, **kwargs): + ''' + Executes all appropriate modules according to the options specified in args/kwargs. + + Returns a list of executed module objects. + ''' + run_modules = [] + orig_arguments = self.arguments + + if args or kwargs: + self._set_arguments(list(args), kwargs) + + # Run all modules + for module in self.list(): + obj = self.run(module) + + # Add all loaded modules that marked themselves as enabled to the run_modules list + for (module, obj) in iterator(self.loaded_modules): + # Report the results if the module is enabled and if it is a primary module or if it reported any results/errors + if obj.enabled and (obj.PRIMARY or obj.results or obj.errors): + run_modules.append(obj) + + self.arguments = orig_arguments + + return run_modules + + def run(self, module, dependency=False, kwargs={}): + ''' + Runs a specific module. + ''' + obj = self.load(module, kwargs) + + if isinstance(obj, binwalk.core.module.Module) and obj.enabled: + obj.main(parent=self) + self.status.clear() + + # If the module is not being loaded as a dependency, add it to the loaded modules dictionary + if not dependency: + self.loaded_modules[module] = obj + + return obj + + def load(self, module, kwargs={}): + argv = self.argv(module, argv=self.arguments) + argv.update(kwargs) + argv.update(self.dependencies(module, argv['enabled'])) + return module(**argv) + + def dependencies(self, module, module_enabled): + import binwalk.modules + attributes = {} + + for dependency in module.DEFAULT_DEPENDS+module.DEPENDS: + + # The dependency module must be imported by binwalk.modules.__init__.py + if hasattr(binwalk.modules, dependency.name): + dependency.module = getattr(binwalk.modules, dependency.name) + else: + raise ModuleException("%s depends on %s which was not found in binwalk.modules.__init__.py\n" % (str(module), dependency.name)) + + # No recursive dependencies, thanks + if dependency.module == module: + continue + + # Only load dependencies with custom kwargs from modules that are enabled, else madness ensues. + # Example: Heursitic module depends on entropy module, and sets entropy kwargs to contain 'enabled' : True. + # Without this check, an entropy scan would always be run, even if -H or -E weren't specified! + # + # Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't + # set any custom kwargs for those dependencies. + if module_enabled or not dependency.kwargs: + depobj = self.run(dependency.module, dependency=True, kwargs=dependency.kwargs) + + # If a dependency failed, consider this a non-recoverable error and raise an exception + if depobj.errors: + raise ModuleException("Failed to load " + dependency.name) + else: + attributes[dependency.attribute] = depobj + + return attributes + + def argv(self, module, argv=sys.argv[1:]): + ''' + Processes argv for any options specific to the specified module. + + @module - The module to process argv for. + @argv - A list of command line arguments (excluding argv[0]). + + Returns a dictionary of kwargs for the specified module. + ''' + kwargs = {'enabled' : False} + last_priority = {} + longs = [] + shorts = "" + parser = argparse.ArgumentParser(add_help=False) + + # Must build arguments from all modules so that: + # + # 1) Any conflicting arguments will raise an exception + # 2) The only unknown arguments will be the target files, making them easy to identify + for m in self.list(attribute="CLI"): + + for module_option in m.CLI: + if not module_option.long: + continue + + if module_option.type is None: + action = 'store_true' + else: + action = None + + if module_option.short: + parser.add_argument('-' + module_option.short, '--' + module_option.long, action=action, dest=module_option.long) + else: + parser.add_argument('--' + module_option.long, action=action, dest=module_option.long) + + args, unknown = parser.parse_known_args(argv) + args = args.__dict__ + + # Only add parsed options pertinent to the requested module + for module_option in module.CLI: + + if module_option.type == binwalk.core.common.BlockFile: + + for k in get_keys(module_option.kwargs): + kwargs[k] = [] + for unk in unknown: + kwargs[k].append(unk) + + elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]: + + for (name, value) in iterator(module_option.kwargs): + if not has_key(last_priority, name) or last_priority[name] <= module_option.priority: + + if module_option.type is not None: + value = args[module_option.long] + + last_priority[name] = module_option.priority + + # Do this manually as argparse doesn't seem to be able to handle hexadecimal values + if module_option.type == int: + kwargs[name] = int(value, 0) + elif module_option.type == float: + kwargs[name] = float(value) + elif module_option.type == dict: + if not has_key(kwargs, name): + kwargs[name] = {} + kwargs[name][len(kwargs[name])] = value + elif module_option.type == list: + if not has_key(kwargs, name): + kwargs[name] = [] + kwargs[name].append(value) + else: + kwargs[name] = value + + return kwargs + + def kwargs(self, obj, kwargs): + ''' + Processes a module's kwargs. All modules should use this for kwarg processing. + + @obj - An instance of the module (e.g., self) + @kwargs - The kwargs passed to the module + + Returns None. + ''' + if hasattr(obj, "KWARGS"): + for module_argument in obj.KWARGS: + if has_key(kwargs, module_argument.name): + arg_value = kwargs[module_argument.name] + else: + arg_value = module_argument.default + + setattr(obj, module_argument.name, arg_value) + + for (k, v) in iterator(kwargs): + if not hasattr(obj, k): + setattr(obj, k, v) + else: + raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj)) def process_kwargs(obj, kwargs): - ''' - Convenience wrapper around binwalk.core.module.Modules.kwargs. + ''' + Convenience wrapper around binwalk.core.module.Modules.kwargs. - @obj - The class object (an instance of a sub-class of binwalk.core.module.Module). - @kwargs - The kwargs provided to the object's __init__ method. + @obj - The class object (an instance of a sub-class of binwalk.core.module.Module). + @kwargs - The kwargs provided to the object's __init__ method. - Returns None. - ''' - return Modules().kwargs(obj, kwargs) + Returns None. + ''' + return Modules().kwargs(obj, kwargs) def show_help(fd=sys.stdout): - ''' - Convenience wrapper around binwalk.core.module.Modules.help. + ''' + Convenience wrapper around binwalk.core.module.Modules.help. - @fd - An object with a write method (e.g., sys.stdout, sys.stderr, etc). + @fd - An object with a write method (e.g., sys.stdout, sys.stderr, etc). - Returns None. - ''' - fd.write(Modules().help()) + Returns None. + ''' + fd.write(Modules().help()) diff --git a/src/binwalk/core/parser.py b/src/binwalk/core/parser.py index 3b9b2a6..5c957cf 100644 --- a/src/binwalk/core/parser.py +++ b/src/binwalk/core/parser.py @@ -7,350 +7,350 @@ from binwalk.core.filter import FilterType class MagicSignature(object): - def __init__(self, **kwargs): - self.offset = 0 - self.type = '' - self.condition = '' - self.description = '' - self.length = 0 - - for (k,v) in iterator(kwargs): - try: - v = int(v, 0) - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - setattr(self, k, v) + def __init__(self, **kwargs): + self.offset = 0 + self.type = '' + self.condition = '' + self.description = '' + self.length = 0 + + for (k,v) in iterator(kwargs): + try: + v = int(v, 0) + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + setattr(self, k, v) class MagicParser(object): - ''' - Class for loading, parsing and creating libmagic-compatible magic files. - - This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object. - - One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string: - - import binwalk - - bw = binwalk.Binwalk() - - # Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting - # temporary file name to the list of magic files in the Binwalk class instance. - bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature')) - - bw.scan('firmware.bin') - - All magic files generated by this class will be deleted when the class deconstructor is called. - ''' - - BIG_ENDIAN = 'big' - LITTLE_ENDIAN = 'little' - - MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n" - DEFAULT_DISPLAY_NAME = "Raw string signature" - - WILDCARD = 'x' - - # If libmagic returns multiple results, they are delimited with this string. - RESULT_SEPERATOR = "\\012- " - - def __init__(self, filter=None, smart=None): - ''' - Class constructor. - - @filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used. - @smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used. - Returns None. - ''' - self.matches = set([]) - self.signatures = {} - self.filter = filter - self.smart = smart - self.raw_fd = None - self.signature_count = 0 - - def __del__(self): - try: - self.cleanup() - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - def rm_magic_files(self): - ''' - Cleans up the temporary magic file(s). - - Returns None. - ''' - try: - self.fd.close() - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - try: - self.raw_fd.close() - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - def cleanup(self): - ''' - Cleans up any tempfiles created by the class instance. - - Returns None. - ''' - self.rm_magic_files() - - def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME): - ''' - Generates a magic file from a signature string. - This method is intended to be used once per instance. - If invoked multiple times, any previously created magic files will be closed and deleted. - - @signature_string - The string signature to search for. - @offset - The offset at which the signature should occur. - @display_name - The text to display when the signature is found. - - Returns the name of the generated temporary magic file. - ''' - self.raw_fd = tempfile.NamedTemporaryFile() - self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name)) - self.raw_fd.seek(0) - return self.raw_fd.name - - def parse(self, file_name): - ''' - Parses magic file(s) and contatenates them into a single temporary magic file - while simultaneously removing filtered signatures. - - @file_name - Magic file, or list of magic files, to parse. - - Returns the name of the generated temporary magic file, which will be automatically - deleted when the class deconstructor is called. - ''' - self.matches = set([]) - self.signatures = {} - self.signature_count = 0 - self.fd = tempfile.NamedTemporaryFile() - - if isinstance(file_name, type([])): - files = file_name - else: - files = [file_name] - - for fname in files: - if os.path.exists(fname): - self.parse_file(fname) - else: - sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname) - - self.fd.seek(0) - return self.fd.name - - def parse_file(self, file_name): - ''' - Parses a magic file and appends valid signatures to the temporary magic file, as allowed - by the existing filter rules. - - @file_name - Magic file to parse. - - Returns None. - ''' - # Default to not including signature entries until we've - # found what looks like a valid entry. - include = False - line_count = 0 - - try: - for line in open(file_name, 'r').readlines(): - line_count += 1 - - # Check if this is the first line of a signature entry - entry = self._parse_line(line) - - if entry is not None: - # If this signature is marked for inclusion, include it. - if self.filter.filter(entry.description) == FilterType.FILTER_INCLUDE: - - include = True - self.signature_count += 1 - - if not has_key(self.signatures, entry.offset): - self.signatures[entry.offset] = [] - - if entry.condition not in self.signatures[entry.offset]: - self.signatures[entry.offset].append(entry.condition) - else: - include = False - - # Keep writing lines of the signature to the temporary magic file until - # we detect a signature that should not be included. - if include: - self.fd.write(str2bytes(line)) - - self.build_signature_set() - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e))) - - def _parse_line(self, line): - ''' - Parses a signature line into its four parts (offset, type, condition and description), - looking for the first line of a given signature. - - @line - The signature line to parse. - - Returns a dictionary with the respective line parts populated if the line is the first of a signature. - Returns a dictionary with all parts set to None if the line is not the first of a signature. - ''' - entry = MagicSignature() - - # Quick and dirty pre-filter. We are only concerned with the first line of a - # signature, which will always start with a number. Make sure the first byte of - # the line is a number; if not, don't process. - if line[:1] < '0' or line[:1] > '9': - return None - - try: - # Split the line into white-space separated parts. - # For this to work properly, replace escaped spaces ('\ ') with '\x20'. - # This means the same thing, but doesn't confuse split(). - line_parts = line.replace('\\ ', '\\x20').split() - entry.offset = line_parts[0] - entry.type = line_parts[1] - # The condition line may contain escaped sequences, so be sure to decode it properly. - entry.condition = string_decode(line_parts[2]) - entry.description = ' '.join(line_parts[3:]) - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("%s :: %s", (str(e), line)) - - # We've already verified that the first character in this line is a number, so this *shouldn't* - # throw an exception, but let's catch it just in case... - try: - entry.offset = int(entry.offset, 0) - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("%s :: %s", (str(e), line)) - - # If this is a string, get the length of the string - if 'string' in entry.type or entry.condition == self.WILDCARD: - entry.length = len(entry.condition) - # Else, we need to jump through a few more hoops... - else: - # Default to little endian, unless the type field starts with 'be'. - # This assumes that we're running on a little endian system... - if entry.type.startswith('be'): - endianess = self.BIG_ENDIAN - else: - endianess = self.LITTLE_ENDIAN - - # Try to convert the condition to an integer. This does not allow - # for more advanced conditions for the first line of a signature, - # but needing that is rare. - try: - intval = int(entry.condition.strip('L'), 0) - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e))) - - # How long is the field type? - if entry.type == 'byte': - entry.length = 1 - elif 'short' in entry.type: - entry.length = 2 - elif 'long' in entry.type: - entry.length = 4 - elif 'quad' in entry.type: - entry.length = 8 - - # Convert the integer value to a string of the appropriate endianess - entry.condition = self._to_string(intval, entry.length, endianess) - - return entry - - def build_signature_set(self): - ''' - Builds a set of signature tuples. - - Returns a set of tuples in the format: [(<signature offset>, [signature regex])]. - ''' - self.signature_set = set() - - for (offset, sigs) in iterator(self.signatures): - - for sig in sigs: - if sig == self.WILDCARD: - sig = re.compile('.') - else: - sig = re.compile(re.escape(sig)) - - self.signature_set.add((offset, sig)) - - return self.signature_set - - def find_signature_candidates(self, data, end): - ''' - Finds candidate signatures inside of the data buffer. - Called internally by Binwalk.single_scan. - - @data - Data to scan for candidate signatures. - @end - Don't look for signatures beyond this offset. - - Returns an ordered list of offsets inside of data at which candidate offsets were found. - ''' - candidate_offsets = [] - - for (offset, regex) in self.signature_set: - candidate_offsets += [(match.start() - offset) for match in regex.finditer(data) if (match.start() - offset) < end and (match.start() - offset) >= 0] - - candidate_offsets = list(set(candidate_offsets)) - candidate_offsets.sort() - - return candidate_offsets - - def _to_string(self, value, size, endianess): - ''' - Converts an integer value into a raw string. - - @value - The integer value to convert. - @size - Size, in bytes, of the integer value. - @endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN. - - Returns a raw string containing value. - ''' - data = "" - - for i in range(0, size): - data += chr((value >> (8*i)) & 0xFF) - - if endianess != self.LITTLE_ENDIAN: - data = data[::-1] - - return data - - def split(self, data): - ''' - Splits multiple libmagic results in the data string into a list of separate results. - - @data - Data string returned from libmagic. - - Returns a list of result strings. - ''' - try: - return data.split(self.RESULT_SEPERATOR) - except KeyboardInterrupt as e: - raise e - except Exception: - return [] + ''' + Class for loading, parsing and creating libmagic-compatible magic files. + + This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object. + + One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string: + + import binwalk + + bw = binwalk.Binwalk() + + # Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting + # temporary file name to the list of magic files in the Binwalk class instance. + bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature')) + + bw.scan('firmware.bin') + + All magic files generated by this class will be deleted when the class deconstructor is called. + ''' + + BIG_ENDIAN = 'big' + LITTLE_ENDIAN = 'little' + + MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n" + DEFAULT_DISPLAY_NAME = "Raw string signature" + + WILDCARD = 'x' + + # If libmagic returns multiple results, they are delimited with this string. + RESULT_SEPERATOR = "\\012- " + + def __init__(self, filter=None, smart=None): + ''' + Class constructor. + + @filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used. + @smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used. + Returns None. + ''' + self.matches = set([]) + self.signatures = {} + self.filter = filter + self.smart = smart + self.raw_fd = None + self.signature_count = 0 + + def __del__(self): + try: + self.cleanup() + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + def rm_magic_files(self): + ''' + Cleans up the temporary magic file(s). + + Returns None. + ''' + try: + self.fd.close() + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + try: + self.raw_fd.close() + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + def cleanup(self): + ''' + Cleans up any tempfiles created by the class instance. + + Returns None. + ''' + self.rm_magic_files() + + def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME): + ''' + Generates a magic file from a signature string. + This method is intended to be used once per instance. + If invoked multiple times, any previously created magic files will be closed and deleted. + + @signature_string - The string signature to search for. + @offset - The offset at which the signature should occur. + @display_name - The text to display when the signature is found. + + Returns the name of the generated temporary magic file. + ''' + self.raw_fd = tempfile.NamedTemporaryFile() + self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name)) + self.raw_fd.seek(0) + return self.raw_fd.name + + def parse(self, file_name): + ''' + Parses magic file(s) and contatenates them into a single temporary magic file + while simultaneously removing filtered signatures. + + @file_name - Magic file, or list of magic files, to parse. + + Returns the name of the generated temporary magic file, which will be automatically + deleted when the class deconstructor is called. + ''' + self.matches = set([]) + self.signatures = {} + self.signature_count = 0 + self.fd = tempfile.NamedTemporaryFile() + + if isinstance(file_name, type([])): + files = file_name + else: + files = [file_name] + + for fname in files: + if os.path.exists(fname): + self.parse_file(fname) + else: + sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname) + + self.fd.seek(0) + return self.fd.name + + def parse_file(self, file_name): + ''' + Parses a magic file and appends valid signatures to the temporary magic file, as allowed + by the existing filter rules. + + @file_name - Magic file to parse. + + Returns None. + ''' + # Default to not including signature entries until we've + # found what looks like a valid entry. + include = False + line_count = 0 + + try: + for line in open(file_name, 'r').readlines(): + line_count += 1 + + # Check if this is the first line of a signature entry + entry = self._parse_line(line) + + if entry is not None: + # If this signature is marked for inclusion, include it. + if self.filter.filter(entry.description) == FilterType.FILTER_INCLUDE: + + include = True + self.signature_count += 1 + + if not has_key(self.signatures, entry.offset): + self.signatures[entry.offset] = [] + + if entry.condition not in self.signatures[entry.offset]: + self.signatures[entry.offset].append(entry.condition) + else: + include = False + + # Keep writing lines of the signature to the temporary magic file until + # we detect a signature that should not be included. + if include: + self.fd.write(str2bytes(line)) + + self.build_signature_set() + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e))) + + def _parse_line(self, line): + ''' + Parses a signature line into its four parts (offset, type, condition and description), + looking for the first line of a given signature. + + @line - The signature line to parse. + + Returns a dictionary with the respective line parts populated if the line is the first of a signature. + Returns a dictionary with all parts set to None if the line is not the first of a signature. + ''' + entry = MagicSignature() + + # Quick and dirty pre-filter. We are only concerned with the first line of a + # signature, which will always start with a number. Make sure the first byte of + # the line is a number; if not, don't process. + if line[:1] < '0' or line[:1] > '9': + return None + + try: + # Split the line into white-space separated parts. + # For this to work properly, replace escaped spaces ('\ ') with '\x20'. + # This means the same thing, but doesn't confuse split(). + line_parts = line.replace('\\ ', '\\x20').split() + entry.offset = line_parts[0] + entry.type = line_parts[1] + # The condition line may contain escaped sequences, so be sure to decode it properly. + entry.condition = string_decode(line_parts[2]) + entry.description = ' '.join(line_parts[3:]) + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("%s :: %s", (str(e), line)) + + # We've already verified that the first character in this line is a number, so this *shouldn't* + # throw an exception, but let's catch it just in case... + try: + entry.offset = int(entry.offset, 0) + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("%s :: %s", (str(e), line)) + + # If this is a string, get the length of the string + if 'string' in entry.type or entry.condition == self.WILDCARD: + entry.length = len(entry.condition) + # Else, we need to jump through a few more hoops... + else: + # Default to little endian, unless the type field starts with 'be'. + # This assumes that we're running on a little endian system... + if entry.type.startswith('be'): + endianess = self.BIG_ENDIAN + else: + endianess = self.LITTLE_ENDIAN + + # Try to convert the condition to an integer. This does not allow + # for more advanced conditions for the first line of a signature, + # but needing that is rare. + try: + intval = int(entry.condition.strip('L'), 0) + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e))) + + # How long is the field type? + if entry.type == 'byte': + entry.length = 1 + elif 'short' in entry.type: + entry.length = 2 + elif 'long' in entry.type: + entry.length = 4 + elif 'quad' in entry.type: + entry.length = 8 + + # Convert the integer value to a string of the appropriate endianess + entry.condition = self._to_string(intval, entry.length, endianess) + + return entry + + def build_signature_set(self): + ''' + Builds a set of signature tuples. + + Returns a set of tuples in the format: [(<signature offset>, [signature regex])]. + ''' + self.signature_set = set() + + for (offset, sigs) in iterator(self.signatures): + + for sig in sigs: + if sig == self.WILDCARD: + sig = re.compile('.') + else: + sig = re.compile(re.escape(sig)) + + self.signature_set.add((offset, sig)) + + return self.signature_set + + def find_signature_candidates(self, data, end): + ''' + Finds candidate signatures inside of the data buffer. + Called internally by Binwalk.single_scan. + + @data - Data to scan for candidate signatures. + @end - Don't look for signatures beyond this offset. + + Returns an ordered list of offsets inside of data at which candidate offsets were found. + ''' + candidate_offsets = [] + + for (offset, regex) in self.signature_set: + candidate_offsets += [(match.start() - offset) for match in regex.finditer(data) if (match.start() - offset) < end and (match.start() - offset) >= 0] + + candidate_offsets = list(set(candidate_offsets)) + candidate_offsets.sort() + + return candidate_offsets + + def _to_string(self, value, size, endianess): + ''' + Converts an integer value into a raw string. + + @value - The integer value to convert. + @size - Size, in bytes, of the integer value. + @endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN. + + Returns a raw string containing value. + ''' + data = "" + + for i in range(0, size): + data += chr((value >> (8*i)) & 0xFF) + + if endianess != self.LITTLE_ENDIAN: + data = data[::-1] + + return data + + def split(self, data): + ''' + Splits multiple libmagic results in the data string into a list of separate results. + + @data - Data string returned from libmagic. + + Returns a list of result strings. + ''' + try: + return data.split(self.RESULT_SEPERATOR) + except KeyboardInterrupt as e: + raise e + except Exception: + return [] diff --git a/src/binwalk/core/plugin.py b/src/binwalk/core/plugin.py index 733cd7e..d29bc05 100644 --- a/src/binwalk/core/plugin.py +++ b/src/binwalk/core/plugin.py @@ -5,180 +5,180 @@ import binwalk.core.settings from binwalk.core.compat import * class Plugins: - ''' - Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan. - An instance of this class is available during a scan via the Binwalk.plugins object. - - Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'. - The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class. - The Plugin class constructor is called once prior to scanning a file or set of files. - The Plugin class destructor (__del__) is called once after scanning all files. - - The Plugin class can define one or all of the following callback methods: - - o pre_scan(self, fd) - This method is called prior to running a scan against a file. It is passed the file object of - the file about to be scanned. - - o pre_parser(self, result) - This method is called every time any result - valid or invalid - is found in the file being scanned. - It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic. - The contents of this dictionary key may be modified as necessary by the plugin. - - o callback(self, results) - This method is called every time a valid result is found in the file being scanned. It is passed a - dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback - function, and its contents may be modified as necessary by the plugin. - - o post_scan(self, fd) - This method is called after running a scan against a file, but before the file has been closed. - It is passed the file object of the scanned file. - - Values returned by pre_scan affect all results during the scan of that particular file. - Values returned by callback affect only that specific scan result. - Values returned by post_scan are ignored since the scan of that file has already been completed. - - By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by - default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the - plugin will only be loaded if it is explicitly named in the plugins whitelist. - ''' - - SCAN = 'scan' - PRESCAN = 'pre_scan' - POSTSCAN = 'post_scan' - PLUGIN = 'Plugin' - MODULE_EXTENSION = '.py' - - def __init__(self, parent=None): - self.scan = [] - self.pre_scan = [] - self.post_scan = [] - self.parent = parent - self.settings = binwalk.core.settings.Settings() - - def __del__(self): - pass - - def __enter__(self): - return self - - def __exit__(self, t, v, traceback): - pass - - def _call_plugins(self, callback_list, arg): - for callback in callback_list: - try: - callback(arg) - except KeyboardInterrupt as e: - raise e - except Exception as e: - sys.stderr.write("WARNING: %s.%s failed: %s\n" % (callback.__module__, callback.__name__, e)) - - def list_plugins(self): - ''' - Obtain a list of all user and system plugin modules. - - Returns a dictionary of: - - { - 'user' : { - 'modules' : [list, of, module, names], - 'descriptions' : {'module_name' : 'module pydoc string'}, - 'enabled' : {'module_name' : True}, - 'path' : "path/to/module/plugin/directory" - }, - 'system' : { - 'modules' : [list, of, module, names], - 'descriptions' : {'module_name' : 'module pydoc string'}, - 'enabled' : {'module_name' : True}, - 'path' : "path/to/module/plugin/directory" - } - } - ''' - - plugins = { - 'user' : { - 'modules' : [], - 'descriptions' : {}, - 'enabled' : {}, - 'path' : None, - }, - 'system' : { - 'modules' : [], - 'descriptions' : {}, - 'enabled' : {}, - 'path' : None, - } - } - - for key in plugins.keys(): - plugins[key]['path'] = self.settings.paths[key][self.settings.PLUGINS] - - for file_name in os.listdir(plugins[key]['path']): - if file_name.endswith(self.MODULE_EXTENSION): - module = file_name[:-len(self.MODULE_EXTENSION)] - - plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name)) - plugin_class = getattr(plugin, self.PLUGIN) - - plugins[key]['enabled'][module] = True - plugins[key]['modules'].append(module) - - try: - plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0] - except KeyboardInterrupt as e: - raise e - except Exception as e: - plugins[key]['descriptions'][module] = 'No description' - return plugins - - def load_plugins(self): - plugins = self.list_plugins() - self._load_plugin_modules(plugins['user']) - self._load_plugin_modules(plugins['system']) - - def _load_plugin_modules(self, plugins): - for module in plugins['modules']: - file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION) - - try: - plugin = imp.load_source(module, file_path) - plugin_class = getattr(plugin, self.PLUGIN) - - class_instance = plugin_class(self.parent) - - try: - self.scan.append(getattr(class_instance, self.SCAN)) - except KeyboardInterrupt as e: - raise e - except Exception as e: - pass - - try: - self.pre_scan.append(getattr(class_instance, self.PRESCAN)) - except KeyboardInterrupt as e: - raise e - except Exception as e: - pass - - try: - self.post_scan.append(getattr(class_instance, self.POSTSCAN)) - except KeyboardInterrupt as e: - raise e - except Exception as e: - pass - - except KeyboardInterrupt as e: - raise e - except Exception as e: - sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e))) - - def pre_scan_callbacks(self, obj): - return self._call_plugins(self.pre_scan, obj) - - def post_scan_callbacks(self, obj): - return self._call_plugins(self.post_scan, obj) - - def scan_callbacks(self, obj): - return self._call_plugins(self.scan, obj) + ''' + Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan. + An instance of this class is available during a scan via the Binwalk.plugins object. + + Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'. + The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class. + The Plugin class constructor is called once prior to scanning a file or set of files. + The Plugin class destructor (__del__) is called once after scanning all files. + + The Plugin class can define one or all of the following callback methods: + + o pre_scan(self, fd) + This method is called prior to running a scan against a file. It is passed the file object of + the file about to be scanned. + + o pre_parser(self, result) + This method is called every time any result - valid or invalid - is found in the file being scanned. + It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic. + The contents of this dictionary key may be modified as necessary by the plugin. + + o callback(self, results) + This method is called every time a valid result is found in the file being scanned. It is passed a + dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback + function, and its contents may be modified as necessary by the plugin. + + o post_scan(self, fd) + This method is called after running a scan against a file, but before the file has been closed. + It is passed the file object of the scanned file. + + Values returned by pre_scan affect all results during the scan of that particular file. + Values returned by callback affect only that specific scan result. + Values returned by post_scan are ignored since the scan of that file has already been completed. + + By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by + default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the + plugin will only be loaded if it is explicitly named in the plugins whitelist. + ''' + + SCAN = 'scan' + PRESCAN = 'pre_scan' + POSTSCAN = 'post_scan' + PLUGIN = 'Plugin' + MODULE_EXTENSION = '.py' + + def __init__(self, parent=None): + self.scan = [] + self.pre_scan = [] + self.post_scan = [] + self.parent = parent + self.settings = binwalk.core.settings.Settings() + + def __del__(self): + pass + + def __enter__(self): + return self + + def __exit__(self, t, v, traceback): + pass + + def _call_plugins(self, callback_list, arg): + for callback in callback_list: + try: + callback(arg) + except KeyboardInterrupt as e: + raise e + except Exception as e: + sys.stderr.write("WARNING: %s.%s failed: %s\n" % (callback.__module__, callback.__name__, e)) + + def list_plugins(self): + ''' + Obtain a list of all user and system plugin modules. + + Returns a dictionary of: + + { + 'user' : { + 'modules' : [list, of, module, names], + 'descriptions' : {'module_name' : 'module pydoc string'}, + 'enabled' : {'module_name' : True}, + 'path' : "path/to/module/plugin/directory" + }, + 'system' : { + 'modules' : [list, of, module, names], + 'descriptions' : {'module_name' : 'module pydoc string'}, + 'enabled' : {'module_name' : True}, + 'path' : "path/to/module/plugin/directory" + } + } + ''' + + plugins = { + 'user' : { + 'modules' : [], + 'descriptions' : {}, + 'enabled' : {}, + 'path' : None, + }, + 'system' : { + 'modules' : [], + 'descriptions' : {}, + 'enabled' : {}, + 'path' : None, + } + } + + for key in plugins.keys(): + plugins[key]['path'] = self.settings.paths[key][self.settings.PLUGINS] + + for file_name in os.listdir(plugins[key]['path']): + if file_name.endswith(self.MODULE_EXTENSION): + module = file_name[:-len(self.MODULE_EXTENSION)] + + plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name)) + plugin_class = getattr(plugin, self.PLUGIN) + + plugins[key]['enabled'][module] = True + plugins[key]['modules'].append(module) + + try: + plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0] + except KeyboardInterrupt as e: + raise e + except Exception as e: + plugins[key]['descriptions'][module] = 'No description' + return plugins + + def load_plugins(self): + plugins = self.list_plugins() + self._load_plugin_modules(plugins['user']) + self._load_plugin_modules(plugins['system']) + + def _load_plugin_modules(self, plugins): + for module in plugins['modules']: + file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION) + + try: + plugin = imp.load_source(module, file_path) + plugin_class = getattr(plugin, self.PLUGIN) + + class_instance = plugin_class(self.parent) + + try: + self.scan.append(getattr(class_instance, self.SCAN)) + except KeyboardInterrupt as e: + raise e + except Exception as e: + pass + + try: + self.pre_scan.append(getattr(class_instance, self.PRESCAN)) + except KeyboardInterrupt as e: + raise e + except Exception as e: + pass + + try: + self.post_scan.append(getattr(class_instance, self.POSTSCAN)) + except KeyboardInterrupt as e: + raise e + except Exception as e: + pass + + except KeyboardInterrupt as e: + raise e + except Exception as e: + sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e))) + + def pre_scan_callbacks(self, obj): + return self._call_plugins(self.pre_scan, obj) + + def post_scan_callbacks(self, obj): + return self._call_plugins(self.post_scan, obj) + + def scan_callbacks(self, obj): + return self._call_plugins(self.scan, obj) diff --git a/src/binwalk/core/smart.py b/src/binwalk/core/smart.py index 5a35742..d4d6e31 100644 --- a/src/binwalk/core/smart.py +++ b/src/binwalk/core/smart.py @@ -4,310 +4,310 @@ from binwalk.core.compat import * from binwalk.core.common import get_quoted_strings, MathExpression class Tag(object): - - TAG_DELIM_START = "{" - TAG_DELIM_END = "}" - TAG_ARG_SEPERATOR = ":" - - def __init__(self, **kwargs): - self.name = None - self.keyword = None - self.type = None - self.handler = None - self.tag = None - self.default = None - - for (k,v) in iterator(kwargs): - setattr(self, k, v) - - if self.type == int: - self.default = 0 - elif self.type == str: - self.default = '' - - if self.keyword is not None: - self.tag = self.TAG_DELIM_START + self.keyword - if self.type is None: - self.tag += self.TAG_DELIM_END - else: - self.tag += self.TAG_ARG_SEPERATOR - - if self.handler is None: - if self.type == int: - self.handler = 'get_math_arg' - elif self.type == str: - self.handler = 'get_keyword_arg' + + TAG_DELIM_START = "{" + TAG_DELIM_END = "}" + TAG_ARG_SEPERATOR = ":" + + def __init__(self, **kwargs): + self.name = None + self.keyword = None + self.type = None + self.handler = None + self.tag = None + self.default = None + + for (k,v) in iterator(kwargs): + setattr(self, k, v) + + if self.type == int: + self.default = 0 + elif self.type == str: + self.default = '' + + if self.keyword is not None: + self.tag = self.TAG_DELIM_START + self.keyword + if self.type is None: + self.tag += self.TAG_DELIM_END + else: + self.tag += self.TAG_ARG_SEPERATOR + + if self.handler is None: + if self.type == int: + self.handler = 'get_math_arg' + elif self.type == str: + self.handler = 'get_keyword_arg' class Signature(object): - ''' - Class for parsing smart signature tags in libmagic result strings. - - This class is intended for internal use only, but a list of supported 'smart keywords' that may be used - in magic files is available via the SmartSignature.KEYWORDS dictionary: - - from binwalk import SmartSignature - - for tag in SmartSignature.TAGS: - print tag.keyword - ''' - - TAGS = [ - Tag(name='raw-string', keyword='raw-string', type=str, handler='parse_raw_string'), - Tag(name='string-len', keyword='string-len', type=str, handler='parse_string_len'), - Tag(name='math', keyword='math', type=int, handler='parse_math'), - Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'), - - Tag(name='jump', keyword='jump-to-offset', type=int), - Tag(name='name', keyword='file-name', type=str), - Tag(name='size', keyword='file-size', type=int), - Tag(name='adjust', keyword='offset-adjust', type=int), - Tag(name='delay', keyword='extract-delay', type=str), - Tag(name='year', keyword='file-year', type=str), - Tag(name='epoch', keyword='file-epoch', type=int), - - Tag(name='raw-size', keyword='raw-string-length', type=int), - Tag(name='raw-replace', keyword='raw-replace'), - Tag(name='string-len-replace', keyword='string-len'), - ] - - def __init__(self, filter, ignore_smart_signatures=False): - ''' - Class constructor. - - @filter - Instance of the MagicFilter class. - @ignore_smart_signatures - Set to True to ignore smart signature keywords. - - Returns None. - ''' - self.filter = filter - self.valid = True - self.last_one_of_many = None - self.ignore_smart_signatures = ignore_smart_signatures - - def parse(self, data): - ''' - Parse a given data string for smart signature keywords. If any are found, interpret them and strip them. - - @data - String to parse, as returned by libmagic. - - Returns a dictionary of parsed values. - ''' - results = {} - self.valid = True - - if data: - for tag in self.TAGS: - if tag.handler is not None: - (d, arg) = getattr(self, tag.handler)(data, tag) - if not self.ignore_smart_signatures: - data = d - - if isinstance(arg, type(False)) and arg == False and not self.ignore_smart_signatures: - self.valid = False - elif tag.type is not None: - if self.ignore_smart_signatures: - results[tag.name] = tag.default - else: - results[tag.name] = arg - - if self.ignore_smart_signatures: - results['description'] = data - else: - results['description'] = self.strip_tags(data) - else: - self.valid = False - - results['valid'] = self.valid - - return binwalk.core.module.Result(**results) - - def tag_lookup(self, keyword): - for tag in self.TAGS: - if tag.keyword == keyword: - return tag - return None - - def is_valid(self, data): - ''' - Validates that result data does not contain smart keywords in file-supplied strings. - - @data - Data string to validate. - - Returns True if data is OK. - Returns False if data is not OK. - ''' - # All strings printed from the target file should be placed in strings, else there is - # no way to distinguish between intended keywords and unintended keywords. Get all the - # quoted strings. - quoted_data = get_quoted_strings(data) - - # Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter - if quoted_data and Tag.TAG_DELIM_START in quoted_data: - # If so, check to see if the quoted data contains any of our keywords. - # If any keywords are found inside of quoted data, consider the keywords invalid. - for tag in self.TAGS: - if tag.tag in quoted_data: - return False - return True - - def safe_string(self, data): - ''' - Strips out quoted data (i.e., data taken directly from a file). - ''' - quoted_string = get_quoted_strings(data) - if quoted_string: - data = data.replace(quoted_string, "") - return data - - def one_of_many(self, data, tag): - ''' - Determines if a given data string is one result of many. - - @data - String result data. - - Returns False if the string result is one of many and should not be displayed. - Returns True if the string result is not one of many and should be displayed. - ''' - if self.filter.valid_result(data): - if self.last_one_of_many is not None and data.startswith(self.last_one_of_many): - return (data, False) - - if tag.tag in data: - # Only match on the data before the first comma, as that is typically unique and static - self.last_one_of_many = data.split(',')[0] - else: - self.last_one_of_many = None - - return (data, True) - - def get_keyword_arg(self, data, tag): - ''' - Retrieves the argument for keywords that specify arguments. - - @data - String result data, as returned by libmagic. - @keyword - Keyword index in KEYWORDS. - - Returns the argument string value on success. - Returns a blank string on failure. - ''' - arg = '' - safe_data = self.safe_string(data) - - if tag.tag in safe_data: - arg = safe_data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0] - - return (data, arg) - - def get_math_arg(self, data, tag): - ''' - Retrieves the argument for keywords that specifiy mathematical expressions as arguments. - - @data - String result data, as returned by libmagic. - @keyword - Keyword index in KEYWORDS. - - Returns the resulting calculated value. - ''' - value = 0 - - (data, arg) = self.get_keyword_arg(data, tag) - if arg: - value = MathExpression(arg).value - if value is None: - value = 0 - self.valid = False - - return (data, value) - - def parse_math(self, data, tag): - ''' - Replace math keywords with the requested values. - - @data - String result data. - - Returns the modified string result data. - ''' - while tag.keyword in data: - (data, arg) = self.get_keyword_arg(data, tag.name) - v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END) - math_value = "%d" % self.get_math_arg(data, tag.name) - data = data.replace(v, math_value) - - return (data, None) - - def parse_raw_string(self, data, raw_str_tag): - ''' - Process strings that aren't NULL byte terminated, but for which we know the string length. - This should be called prior to any other smart parsing functions. - - @data - String to parse. - - Returns a parsed string. - ''' - if self.is_valid(data): - raw_str_length_tag = self.tag_lookup('raw-string-length') - raw_replace_tag = self.tag_lookup('raw-replace') - - # Get the raw string keyword arg - (data, raw_string) = self.get_keyword_arg(data, raw_str_tag) - - # Was a raw string keyword specified? - if raw_string: - # Get the raw string length arg - (data, raw_size) = self.get_math_arg(data, raw_str_length_tag) - - # Replace all instances of raw-replace in data with raw_string[:raw_size] - # Also strip out everything after the raw-string keyword, including the keyword itself. - # Failure to do so may (will) result in non-printable characters and this string will be - # marked as invalid when it shouldn't be. - data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"') - - return (data, True) - - def parse_string_len(self, data, str_len_tag): - ''' - Process {string-len} macros. - - @data - String to parse. - - Returns parsed data string. - ''' - if not self.ignore_smart_signatures and self.is_valid(data): - - str_len_replace_tag = self.tag_lookup('string-len-replace') - - # Get the raw string keyword arg - (data, raw_string) = self.get_keyword_arg(data, str_len_tag) - - # Was a string-len keyword specified? - if raw_string: - # Get the string length - try: - string_length = '%d' % len(raw_string) - except KeyboardInterrupt as e: - raise e - except Exception: - string_length = '0' - - # Strip out *everything* after the string-len keyword, including the keyword itself. - # Failure to do so can potentially allow keyword injection from a maliciously created file. - data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length) - - return (data, True) - - def strip_tags(self, data): - ''' - Strips the smart tags from a result string. - - @data - String result data. - - Returns a sanitized string. - ''' - if not self.ignore_smart_signatures: - for tag in self.TAGS: - start = data.find(tag.tag) - if start != -1: - end = data[start:].find(tag.TAG_DELIM_END) - if end != -1: - data = data.replace(data[start:start+end+1], "") - return data + ''' + Class for parsing smart signature tags in libmagic result strings. + + This class is intended for internal use only, but a list of supported 'smart keywords' that may be used + in magic files is available via the SmartSignature.KEYWORDS dictionary: + + from binwalk import SmartSignature + + for tag in SmartSignature.TAGS: + print tag.keyword + ''' + + TAGS = [ + Tag(name='raw-string', keyword='raw-string', type=str, handler='parse_raw_string'), + Tag(name='string-len', keyword='string-len', type=str, handler='parse_string_len'), + Tag(name='math', keyword='math', type=int, handler='parse_math'), + Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'), + + Tag(name='jump', keyword='jump-to-offset', type=int), + Tag(name='name', keyword='file-name', type=str), + Tag(name='size', keyword='file-size', type=int), + Tag(name='adjust', keyword='offset-adjust', type=int), + Tag(name='delay', keyword='extract-delay', type=str), + Tag(name='year', keyword='file-year', type=str), + Tag(name='epoch', keyword='file-epoch', type=int), + + Tag(name='raw-size', keyword='raw-string-length', type=int), + Tag(name='raw-replace', keyword='raw-replace'), + Tag(name='string-len-replace', keyword='string-len'), + ] + + def __init__(self, filter, ignore_smart_signatures=False): + ''' + Class constructor. + + @filter - Instance of the MagicFilter class. + @ignore_smart_signatures - Set to True to ignore smart signature keywords. + + Returns None. + ''' + self.filter = filter + self.valid = True + self.last_one_of_many = None + self.ignore_smart_signatures = ignore_smart_signatures + + def parse(self, data): + ''' + Parse a given data string for smart signature keywords. If any are found, interpret them and strip them. + + @data - String to parse, as returned by libmagic. + + Returns a dictionary of parsed values. + ''' + results = {} + self.valid = True + + if data: + for tag in self.TAGS: + if tag.handler is not None: + (d, arg) = getattr(self, tag.handler)(data, tag) + if not self.ignore_smart_signatures: + data = d + + if isinstance(arg, type(False)) and arg == False and not self.ignore_smart_signatures: + self.valid = False + elif tag.type is not None: + if self.ignore_smart_signatures: + results[tag.name] = tag.default + else: + results[tag.name] = arg + + if self.ignore_smart_signatures: + results['description'] = data + else: + results['description'] = self.strip_tags(data) + else: + self.valid = False + + results['valid'] = self.valid + + return binwalk.core.module.Result(**results) + + def tag_lookup(self, keyword): + for tag in self.TAGS: + if tag.keyword == keyword: + return tag + return None + + def is_valid(self, data): + ''' + Validates that result data does not contain smart keywords in file-supplied strings. + + @data - Data string to validate. + + Returns True if data is OK. + Returns False if data is not OK. + ''' + # All strings printed from the target file should be placed in strings, else there is + # no way to distinguish between intended keywords and unintended keywords. Get all the + # quoted strings. + quoted_data = get_quoted_strings(data) + + # Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter + if quoted_data and Tag.TAG_DELIM_START in quoted_data: + # If so, check to see if the quoted data contains any of our keywords. + # If any keywords are found inside of quoted data, consider the keywords invalid. + for tag in self.TAGS: + if tag.tag in quoted_data: + return False + return True + + def safe_string(self, data): + ''' + Strips out quoted data (i.e., data taken directly from a file). + ''' + quoted_string = get_quoted_strings(data) + if quoted_string: + data = data.replace(quoted_string, "") + return data + + def one_of_many(self, data, tag): + ''' + Determines if a given data string is one result of many. + + @data - String result data. + + Returns False if the string result is one of many and should not be displayed. + Returns True if the string result is not one of many and should be displayed. + ''' + if self.filter.valid_result(data): + if self.last_one_of_many is not None and data.startswith(self.last_one_of_many): + return (data, False) + + if tag.tag in data: + # Only match on the data before the first comma, as that is typically unique and static + self.last_one_of_many = data.split(',')[0] + else: + self.last_one_of_many = None + + return (data, True) + + def get_keyword_arg(self, data, tag): + ''' + Retrieves the argument for keywords that specify arguments. + + @data - String result data, as returned by libmagic. + @keyword - Keyword index in KEYWORDS. + + Returns the argument string value on success. + Returns a blank string on failure. + ''' + arg = '' + safe_data = self.safe_string(data) + + if tag.tag in safe_data: + arg = safe_data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0] + + return (data, arg) + + def get_math_arg(self, data, tag): + ''' + Retrieves the argument for keywords that specifiy mathematical expressions as arguments. + + @data - String result data, as returned by libmagic. + @keyword - Keyword index in KEYWORDS. + + Returns the resulting calculated value. + ''' + value = 0 + + (data, arg) = self.get_keyword_arg(data, tag) + if arg: + value = MathExpression(arg).value + if value is None: + value = 0 + self.valid = False + + return (data, value) + + def parse_math(self, data, tag): + ''' + Replace math keywords with the requested values. + + @data - String result data. + + Returns the modified string result data. + ''' + while tag.keyword in data: + (data, arg) = self.get_keyword_arg(data, tag.name) + v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END) + math_value = "%d" % self.get_math_arg(data, tag.name) + data = data.replace(v, math_value) + + return (data, None) + + def parse_raw_string(self, data, raw_str_tag): + ''' + Process strings that aren't NULL byte terminated, but for which we know the string length. + This should be called prior to any other smart parsing functions. + + @data - String to parse. + + Returns a parsed string. + ''' + if self.is_valid(data): + raw_str_length_tag = self.tag_lookup('raw-string-length') + raw_replace_tag = self.tag_lookup('raw-replace') + + # Get the raw string keyword arg + (data, raw_string) = self.get_keyword_arg(data, raw_str_tag) + + # Was a raw string keyword specified? + if raw_string: + # Get the raw string length arg + (data, raw_size) = self.get_math_arg(data, raw_str_length_tag) + + # Replace all instances of raw-replace in data with raw_string[:raw_size] + # Also strip out everything after the raw-string keyword, including the keyword itself. + # Failure to do so may (will) result in non-printable characters and this string will be + # marked as invalid when it shouldn't be. + data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"') + + return (data, True) + + def parse_string_len(self, data, str_len_tag): + ''' + Process {string-len} macros. + + @data - String to parse. + + Returns parsed data string. + ''' + if not self.ignore_smart_signatures and self.is_valid(data): + + str_len_replace_tag = self.tag_lookup('string-len-replace') + + # Get the raw string keyword arg + (data, raw_string) = self.get_keyword_arg(data, str_len_tag) + + # Was a string-len keyword specified? + if raw_string: + # Get the string length + try: + string_length = '%d' % len(raw_string) + except KeyboardInterrupt as e: + raise e + except Exception: + string_length = '0' + + # Strip out *everything* after the string-len keyword, including the keyword itself. + # Failure to do so can potentially allow keyword injection from a maliciously created file. + data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length) + + return (data, True) + + def strip_tags(self, data): + ''' + Strips the smart tags from a result string. + + @data - String result data. + + Returns a sanitized string. + ''' + if not self.ignore_smart_signatures: + for tag in self.TAGS: + start = data.find(tag.tag) + if start != -1: + end = data[start:].find(tag.TAG_DELIM_END) + if end != -1: + data = data.replace(data[start:start+end+1], "") + return data diff --git a/src/binwalk/magic/bincast b/src/binwalk/magic/bincast new file mode 100644 index 0000000..af0a75a --- /dev/null +++ b/src/binwalk/magic/bincast @@ -0,0 +1,10 @@ +0 belong x Hex: 0x%.8X +#0 string x String: %s +#0 lequad x Little Endian Quad: %lld +#0 bequad x Big Endian Quad: %lld +0 lelong x Little Endian Long: %d +0 belong x Big Endian Long: %d +0 leshort x Little Endian Short: %d +0 beshort x Big Endian Short: %d +0 ledate x Little Endian Date: %s +0 bedate x Big Endian Date: %s diff --git a/src/binwalk/modules/binvis.py b/src/binwalk/modules/binvis.py index 594befe..63254e6 100644 --- a/src/binwalk/modules/binvis.py +++ b/src/binwalk/modules/binvis.py @@ -4,300 +4,300 @@ from binwalk.core.common import BlockFile from binwalk.core.module import Module, Option, Kwarg class Plotter(Module): - ''' - Base class for visualizing binaries in Qt. - Other plotter classes are derived from this. - ''' - VIEW_DISTANCE = 1024 - MAX_2D_PLOT_POINTS = 12500 - MAX_3D_PLOT_POINTS = 25000 - - TITLE = "Binary Visualization" - - CLI = [ - Option(short='3', - long='3D', - kwargs={'axis' : 3, 'enabled' : True}, - description='Generate a 3D binary visualization'), - Option(short='2', - long='2D', - kwargs={'axis' : 2, 'enabled' : True}, - description='Project data points onto 3D cube walls only'), - Option(short='Z', - long='max-points', - type=int, - kwargs={'max_points' : 0}, - description='Set the maximum number of plotted data points'), - Option(short='V', - long='show-grids', - kwargs={'show_grids' : True}, - description='Display the x-y-z grids in the resulting plot'), - ] - - KWARGS = [ - Kwarg(name='axis', default=3), - Kwarg(name='max_points', default=0), - Kwarg(name='show_grids', default=False), - Kwarg(name='enabled', default=False), - ] - - # There isn't really any useful data to print to console. Disable header and result output. - HEADER = None - RESULT = None - - def init(self): - import pyqtgraph.opengl as gl - from pyqtgraph.Qt import QtGui - - self.verbose = self.config.verbose - self.offset = self.config.offset - self.length = self.config.length - self.plane_count = -1 - self.plot_points = None - - if self.axis == 2: - self.MAX_PLOT_POINTS = self.MAX_2D_PLOT_POINTS - self._generate_data_point = self._generate_2d_data_point - elif self.axis == 3: - self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS - self._generate_data_point = self._generate_3d_data_point - else: - raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis) - - if not self.max_points: - self.max_points = self.MAX_PLOT_POINTS - - self.app = QtGui.QApplication([]) - self.window = gl.GLViewWidget() - self.window.opts['distance'] = self.VIEW_DISTANCE - - if len(self.config.target_files) == 1: - self.window.setWindowTitle(self.config.target_files[0].name) - - def _print(self, message): - ''' - Print console messages. For internal use only. - ''' - if self.verbose: - print(message) - - def _generate_plot_points(self, data_points): - ''' - Generates plot points from a list of data points. - - @data_points - A dictionary containing each unique point and its frequency of occurance. - - Returns a set of plot points. - ''' - total = 0 - min_weight = 0 - weightings = {} - plot_points = {} - - # If the number of data points exceeds the maximum number of allowed data points, use a - # weighting system to eliminate data points that occur less freqently. - if sum(data_points.itervalues()) > self.max_points: - - # First, generate a set of weight values 1 - 10 - for i in range(1, 11): - weightings[i] = 0 - - # Go through every data point and how many times that point occurs - for (point, count) in iterator(data_points): - # For each data point, compare it to each remaining weight value - for w in get_keys(weightings): - - # If the number of times this data point occurred is >= the weight value, - # then increment the weight value. Since weight values are ordered lowest - # to highest, this means that more frequent data points also increment lower - # weight values. Thus, the more high-frequency data points there are, the - # more lower-frequency data points are eliminated. - if count >= w: - weightings[w] += 1 - else: - break - - # Throw out weight values that exceed the maximum number of data points - if weightings[w] > self.max_points: - del weightings[w] - - # If there's only one weight value left, no sense in continuing the loop... - if len(weightings) == 1: - break - - # The least weighted value is our minimum weight - min_weight = min(weightings) - - # Get rid of all data points that occur less frequently than our minimum weight - for point in get_keys(data_points): - if data_points[point] < min_weight: - del data_points[point] - - for point in sorted(data_points, key=data_points.get, reverse=True): - plot_points[point] = data_points[point] - self.result(point=point) - total += 1 - if total >= self.max_points: - break - - return plot_points - - def _generate_data_point(self, data): - ''' - Subclasses must override this to return the appropriate data point. - - @data - A string of data self.axis in length. - - Returns a data point tuple. - ''' - return (0,0,0) - - def _generate_data_points(self, fp): - ''' - Generates a dictionary of data points and their frequency of occurrance. - - @fp - The BlockFile object to generate data points from. - - Returns a dictionary. - ''' - i = 0 - data_points = {} - - self._print("Generating data points for %s" % fp.name) - - # We don't need any extra data from BlockFile - fp.set_block_size(peek=0) - - while True: - (data, dlen) = fp.read_block() - if not data or not dlen: - break - - i = 0 - while (i+(self.axis-1)) < dlen: - point = self._generate_data_point(data[i:i+self.axis]) - if has_key(data_points, point): - data_points[point] += 1 - else: - data_points[point] = 1 - i += 3 - - return data_points - - def _generate_plot(self, plot_points): - import numpy as np - import pyqtgraph.opengl as gl - - nitems = float(len(plot_points)) - - pos = np.empty((nitems, 3)) - size = np.empty((nitems)) - color = np.empty((nitems, 4)) - - i = 0 - for (point, weight) in iterator(plot_points): - r = 0.0 - g = 0.0 - b = 0.0 - - pos[i] = point - frequency_percentage = (weight / nitems) - - # Give points that occur more frequently a brighter color and larger point size. - # Frequency is determined as a percentage of total unique data points. - if frequency_percentage > .005: - size[i] = .20 - r = 1.0 - elif frequency_percentage > .002: - size[i] = .10 - g = 1.0 - r = 1.0 - else: - size[i] = .05 - g = 1.0 - - color[i] = (r, g, b, 1.0) - - i += 1 - - scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False) - scatter_plot.translate(-127.5, -127.5, -127.5) - - return scatter_plot - - def plot(self, wait=True): - import pyqtgraph.opengl as gl - - self.window.show() - - if self.show_grids: - xgrid = gl.GLGridItem() - ygrid = gl.GLGridItem() - zgrid = gl.GLGridItem() - - self.window.addItem(xgrid) - self.window.addItem(ygrid) - self.window.addItem(zgrid) - - # Rotate x and y grids to face the correct direction - xgrid.rotate(90, 0, 1, 0) - ygrid.rotate(90, 1, 0, 0) - - # Scale grids to the appropriate dimensions - xgrid.scale(12.8, 12.8, 12.8) - ygrid.scale(12.8, 12.8, 12.8) - zgrid.scale(12.8, 12.8, 12.8) - - for fd in iter(self.next_file, None): - data_points = self._generate_data_points(fd) - - self._print("Generating plot points from %d data points" % len(data_points)) - - self.plot_points = self._generate_plot_points(data_points) - del data_points - - self._print("Generating graph from %d plot points" % len(self.plot_points)) - - self.window.addItem(self._generate_plot(self.plot_points)) - - if wait: - self.wait() - - def wait(self): - from pyqtgraph.Qt import QtCore, QtGui - - t = QtCore.QTimer() - t.start(50) - QtGui.QApplication.instance().exec_() - - def _generate_3d_data_point(self, data): - ''' - Plot data points within a 3D cube. - ''' - return (ord(data[0]), ord(data[1]), ord(data[2])) - - def _generate_2d_data_point(self, data): - ''' - Plot data points projected on each cube face. - ''' - self.plane_count += 1 - if self.plane_count > 5: - self.plane_count = 0 - - if self.plane_count == 0: - return (0, ord(data[0]), ord(data[1])) - elif self.plane_count == 1: - return (ord(data[0]), 0, ord(data[1])) - elif self.plane_count == 2: - return (ord(data[0]), ord(data[1]), 0) - elif self.plane_count == 3: - return (255, ord(data[0]), ord(data[1])) - elif self.plane_count == 4: - return (ord(data[0]), 255, ord(data[1])) - elif self.plane_count == 5: - return (ord(data[0]), ord(data[1]), 255) - - def run(self): - self.plot() - return True + ''' + Base class for visualizing binaries in Qt. + Other plotter classes are derived from this. + ''' + VIEW_DISTANCE = 1024 + MAX_2D_PLOT_POINTS = 12500 + MAX_3D_PLOT_POINTS = 25000 + + TITLE = "Binary Visualization" + + CLI = [ + Option(short='3', + long='3D', + kwargs={'axis' : 3, 'enabled' : True}, + description='Generate a 3D binary visualization'), + Option(short='2', + long='2D', + kwargs={'axis' : 2, 'enabled' : True}, + description='Project data points onto 3D cube walls only'), + Option(short='Z', + long='max-points', + type=int, + kwargs={'max_points' : 0}, + description='Set the maximum number of plotted data points'), + Option(short='V', + long='show-grids', + kwargs={'show_grids' : True}, + description='Display the x-y-z grids in the resulting plot'), + ] + + KWARGS = [ + Kwarg(name='axis', default=3), + Kwarg(name='max_points', default=0), + Kwarg(name='show_grids', default=False), + Kwarg(name='enabled', default=False), + ] + + # There isn't really any useful data to print to console. Disable header and result output. + HEADER = None + RESULT = None + + def init(self): + import pyqtgraph.opengl as gl + from pyqtgraph.Qt import QtGui + + self.verbose = self.config.verbose + self.offset = self.config.offset + self.length = self.config.length + self.plane_count = -1 + self.plot_points = None + + if self.axis == 2: + self.MAX_PLOT_POINTS = self.MAX_2D_PLOT_POINTS + self._generate_data_point = self._generate_2d_data_point + elif self.axis == 3: + self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS + self._generate_data_point = self._generate_3d_data_point + else: + raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis) + + if not self.max_points: + self.max_points = self.MAX_PLOT_POINTS + + self.app = QtGui.QApplication([]) + self.window = gl.GLViewWidget() + self.window.opts['distance'] = self.VIEW_DISTANCE + + if len(self.config.target_files) == 1: + self.window.setWindowTitle(self.config.target_files[0].name) + + def _print(self, message): + ''' + Print console messages. For internal use only. + ''' + if self.verbose: + print(message) + + def _generate_plot_points(self, data_points): + ''' + Generates plot points from a list of data points. + + @data_points - A dictionary containing each unique point and its frequency of occurance. + + Returns a set of plot points. + ''' + total = 0 + min_weight = 0 + weightings = {} + plot_points = {} + + # If the number of data points exceeds the maximum number of allowed data points, use a + # weighting system to eliminate data points that occur less freqently. + if sum(data_points.itervalues()) > self.max_points: + + # First, generate a set of weight values 1 - 10 + for i in range(1, 11): + weightings[i] = 0 + + # Go through every data point and how many times that point occurs + for (point, count) in iterator(data_points): + # For each data point, compare it to each remaining weight value + for w in get_keys(weightings): + + # If the number of times this data point occurred is >= the weight value, + # then increment the weight value. Since weight values are ordered lowest + # to highest, this means that more frequent data points also increment lower + # weight values. Thus, the more high-frequency data points there are, the + # more lower-frequency data points are eliminated. + if count >= w: + weightings[w] += 1 + else: + break + + # Throw out weight values that exceed the maximum number of data points + if weightings[w] > self.max_points: + del weightings[w] + + # If there's only one weight value left, no sense in continuing the loop... + if len(weightings) == 1: + break + + # The least weighted value is our minimum weight + min_weight = min(weightings) + + # Get rid of all data points that occur less frequently than our minimum weight + for point in get_keys(data_points): + if data_points[point] < min_weight: + del data_points[point] + + for point in sorted(data_points, key=data_points.get, reverse=True): + plot_points[point] = data_points[point] + self.result(point=point) + total += 1 + if total >= self.max_points: + break + + return plot_points + + def _generate_data_point(self, data): + ''' + Subclasses must override this to return the appropriate data point. + + @data - A string of data self.axis in length. + + Returns a data point tuple. + ''' + return (0,0,0) + + def _generate_data_points(self, fp): + ''' + Generates a dictionary of data points and their frequency of occurrance. + + @fp - The BlockFile object to generate data points from. + + Returns a dictionary. + ''' + i = 0 + data_points = {} + + self._print("Generating data points for %s" % fp.name) + + # We don't need any extra data from BlockFile + fp.set_block_size(peek=0) + + while True: + (data, dlen) = fp.read_block() + if not data or not dlen: + break + + i = 0 + while (i+(self.axis-1)) < dlen: + point = self._generate_data_point(data[i:i+self.axis]) + if has_key(data_points, point): + data_points[point] += 1 + else: + data_points[point] = 1 + i += 3 + + return data_points + + def _generate_plot(self, plot_points): + import numpy as np + import pyqtgraph.opengl as gl + + nitems = float(len(plot_points)) + + pos = np.empty((nitems, 3)) + size = np.empty((nitems)) + color = np.empty((nitems, 4)) + + i = 0 + for (point, weight) in iterator(plot_points): + r = 0.0 + g = 0.0 + b = 0.0 + + pos[i] = point + frequency_percentage = (weight / nitems) + + # Give points that occur more frequently a brighter color and larger point size. + # Frequency is determined as a percentage of total unique data points. + if frequency_percentage > .005: + size[i] = .20 + r = 1.0 + elif frequency_percentage > .002: + size[i] = .10 + g = 1.0 + r = 1.0 + else: + size[i] = .05 + g = 1.0 + + color[i] = (r, g, b, 1.0) + + i += 1 + + scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False) + scatter_plot.translate(-127.5, -127.5, -127.5) + + return scatter_plot + + def plot(self, wait=True): + import pyqtgraph.opengl as gl + + self.window.show() + + if self.show_grids: + xgrid = gl.GLGridItem() + ygrid = gl.GLGridItem() + zgrid = gl.GLGridItem() + + self.window.addItem(xgrid) + self.window.addItem(ygrid) + self.window.addItem(zgrid) + + # Rotate x and y grids to face the correct direction + xgrid.rotate(90, 0, 1, 0) + ygrid.rotate(90, 1, 0, 0) + + # Scale grids to the appropriate dimensions + xgrid.scale(12.8, 12.8, 12.8) + ygrid.scale(12.8, 12.8, 12.8) + zgrid.scale(12.8, 12.8, 12.8) + + for fd in iter(self.next_file, None): + data_points = self._generate_data_points(fd) + + self._print("Generating plot points from %d data points" % len(data_points)) + + self.plot_points = self._generate_plot_points(data_points) + del data_points + + self._print("Generating graph from %d plot points" % len(self.plot_points)) + + self.window.addItem(self._generate_plot(self.plot_points)) + + if wait: + self.wait() + + def wait(self): + from pyqtgraph.Qt import QtCore, QtGui + + t = QtCore.QTimer() + t.start(50) + QtGui.QApplication.instance().exec_() + + def _generate_3d_data_point(self, data): + ''' + Plot data points within a 3D cube. + ''' + return (ord(data[0]), ord(data[1]), ord(data[2])) + + def _generate_2d_data_point(self, data): + ''' + Plot data points projected on each cube face. + ''' + self.plane_count += 1 + if self.plane_count > 5: + self.plane_count = 0 + + if self.plane_count == 0: + return (0, ord(data[0]), ord(data[1])) + elif self.plane_count == 1: + return (ord(data[0]), 0, ord(data[1])) + elif self.plane_count == 2: + return (ord(data[0]), ord(data[1]), 0) + elif self.plane_count == 3: + return (255, ord(data[0]), ord(data[1])) + elif self.plane_count == 4: + return (ord(data[0]), 255, ord(data[1])) + elif self.plane_count == 5: + return (ord(data[0]), ord(data[1]), 255) + + def run(self): + self.plot() + return True diff --git a/src/binwalk/modules/compression.py b/src/binwalk/modules/compression.py index 5913002..83810e0 100644 --- a/src/binwalk/modules/compression.py +++ b/src/binwalk/modules/compression.py @@ -5,87 +5,87 @@ import binwalk.core.C from binwalk.core.module import Option, Kwarg, Module class Deflate(object): - ''' - Finds and extracts raw deflate compression streams. - ''' + ''' + Finds and extracts raw deflate compression streams. + ''' - ENABLED = False - BLOCK_SIZE = 33*1024 - # To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes - MIN_DECOMP_SIZE = 32*1024 - DESCRIPTION = "Raw deflate compression stream" + ENABLED = False + BLOCK_SIZE = 33*1024 + # To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes + MIN_DECOMP_SIZE = 32*1024 + DESCRIPTION = "Raw deflate compression stream" - TINFL_NAME = "tinfl" + TINFL_NAME = "tinfl" - TINFL_FUNCTIONS = [ - binwalk.core.C.Function(name="is_deflated", type=int), - binwalk.core.C.Function(name="inflate_raw_file", type=None), - ] + TINFL_FUNCTIONS = [ + binwalk.core.C.Function(name="is_deflated", type=int), + binwalk.core.C.Function(name="inflate_raw_file", type=None), + ] - def __init__(self, module): - self.module = module + def __init__(self, module): + self.module = module - # The tinfl library is built and installed with binwalk - self.tinfl = binwalk.core.C.Library(self.TINFL_NAME, self.TINFL_FUNCTIONS) - - # Add an extraction rule - if self.module.extractor.enabled: - self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor) + # The tinfl library is built and installed with binwalk + self.tinfl = binwalk.core.C.Library(self.TINFL_NAME, self.TINFL_FUNCTIONS) + + # Add an extraction rule + if self.module.extractor.enabled: + self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor) - def _extractor(self, file_name): - out_file = os.path.splitext(file_name)[0] - self.tinfl.inflate_raw_file(file_name, out_file) + def _extractor(self, file_name): + out_file = os.path.splitext(file_name)[0] + self.tinfl.inflate_raw_file(file_name, out_file) - def decompress(self, data): - description = None + def decompress(self, data): + description = None - decomp_size = self.tinfl.is_deflated(data, len(data), 0) - if decomp_size >= self.MIN_DECOMP_SIZE: - description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size + decomp_size = self.tinfl.is_deflated(data, len(data), 0) + if decomp_size >= self.MIN_DECOMP_SIZE: + description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size - return description + return description class RawCompression(Module): - DECOMPRESSORS = { - 'deflate' : Deflate, - } + DECOMPRESSORS = { + 'deflate' : Deflate, + } - TITLE = 'Raw Compression' + TITLE = 'Raw Compression' - CLI = [ - Option(short='X', - long='deflate', - kwargs={'enabled' : True, 'decompressor_class' : 'deflate'}, - description='Scan for raw deflate compression streams'), - ] + CLI = [ + Option(short='X', + long='deflate', + kwargs={'enabled' : True, 'decompressor_class' : 'deflate'}, + description='Scan for raw deflate compression streams'), + ] - KWARGS = [ - Kwarg(name='enabled', default=False), - Kwarg(name='decompressor_class', default=None), - ] + KWARGS = [ + Kwarg(name='enabled', default=False), + Kwarg(name='decompressor_class', default=None), + ] - def init(self): - self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self) + def init(self): + self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self) - def run(self): - for fp in iter(self.next_file, None): + def run(self): + for fp in iter(self.next_file, None): - fp.set_block_size(peek=self.decompressor.BLOCK_SIZE) + fp.set_block_size(peek=self.decompressor.BLOCK_SIZE) - self.header() + self.header() - while True: - (data, dlen) = fp.read_block() - if not data: - break + while True: + (data, dlen) = fp.read_block() + if not data: + break - for i in range(0, dlen): - description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE]) - if description: - self.result(description=description, file=fp, offset=fp.tell()-dlen+i) + for i in range(0, dlen): + description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE]) + if description: + self.result(description=description, file=fp, offset=fp.tell()-dlen+i) - self.status.completed = fp.tell() - fp.offset + self.status.completed = fp.tell() - fp.offset - self.footer() + self.footer() diff --git a/src/binwalk/modules/entropy.py b/src/binwalk/modules/entropy.py index e3720e3..f519725 100644 --- a/src/binwalk/modules/entropy.py +++ b/src/binwalk/modules/entropy.py @@ -6,192 +6,192 @@ from binwalk.core.module import Module, Option, Kwarg class Entropy(Module): - XLABEL = 'Offset' - YLABEL = 'Entropy' - - XUNITS = 'B' - YUNITS = 'E' - - FILE_WIDTH = 1024 - FILE_FORMAT = 'png' - - COLORS = ['r', 'g', 'c', 'b', 'm'] - - DEFAULT_BLOCK_SIZE = 1024 - - TITLE = "Entropy" - ORDER = 8 - - CLI = [ - Option(short='E', - long='entropy', - kwargs={'enabled' : True}, - description='Calculate file entropy'), - Option(short='J', - long='save-plot', - kwargs={'save_plot' : True}, - description='Save plot as a PNG'), - Option(short='N', - long='no-plot', - kwargs={'do_plot' : False}, - description='Do not generate an entropy plot graph'), - Option(short='Q', - long='no-legend', - kwargs={'show_legend' : False}, - description='Omit the legend from the entropy plot graph'), - ] - - KWARGS = [ - Kwarg(name='enabled', default=False), - Kwarg(name='save_plot', default=False), - Kwarg(name='display_results', default=True), - Kwarg(name='do_plot', default=True), - Kwarg(name='show_legend', default=True), - Kwarg(name='block_size', default=0), - ] - - # Run this module last so that it can process all other module's results and overlay them on the entropy graph - PRIORITY = 0 - - def init(self): - self.HEADER[-1] = "ENTROPY" - self.algorithm = self.shannon - self.max_description_length = 0 - self.file_markers = {} - - # Get a list of all other module's results to mark on the entropy graph - for (module, obj) in iterator(self.modules): - for result in obj.results: - if result.file and result.description: - description = result.description.split(',')[0] - - if not has_key(self.file_markers, result.file.name): - self.file_markers[result.file.name] = [] - - if len(description) > self.max_description_length: - self.max_description_length = len(description) - - self.file_markers[result.file.name].append((result.offset, description)) - - # If other modules have been run and they produced results, don't spam the terminal with entropy results - if self.file_markers: - self.display_results = False - - if not self.block_size: - if self.config.block: - self.block_size = self.config.block - else: - self.block_size = self.DEFAULT_BLOCK_SIZE - - def run(self): - from pyqtgraph.Qt import QtGui - - for fp in iter(self.next_file, None): - - if self.display_results: - self.header() - - self.calculate_file_entropy(fp) - - if self.display_results: - self.footer() - - if self.do_plot and not self.save_plot: - QtGui.QApplication.instance().exec_() - - def calculate_file_entropy(self, fp): - # Clear results from any previously analyzed files - self.clear(results=True) - - while True: - file_offset = fp.tell() - - (data, dlen) = fp.read_block() - if not data: - break - - i = 0 - while i < dlen: - entropy = self.algorithm(data[i:i+self.block_size]) - r = self.result(offset=(file_offset + i), file=fp, entropy=entropy, description=("%f" % entropy), display=self.display_results) - i += self.block_size - - if self.do_plot: - self.plot_entropy(fp.name) - - def shannon(self, data): - ''' - Performs a Shannon entropy analysis on a given block of data. - ''' - entropy = 0 - - if data: - for x in range(0, 256): - p_x = float(data.count(chr(x))) / len(data) - if p_x > 0: - entropy += - p_x*math.log(p_x, 2) - - return (entropy / 8) - - def gzip(self, data, truncate=True): - ''' - Performs an entropy analysis based on zlib compression ratio. - This is faster than the shannon entropy analysis, but not as accurate. - ''' - # Entropy is a simple ratio of: <zlib compressed size> / <original size> - e = float(float(len(zlib.compress(data, 9))) / float(len(data))) - - if truncate and e > 1.0: - e = 1.0 - - return e - - def plot_entropy(self, fname): - import numpy as np - import pyqtgraph as pg - - i = 0 - x = [] - y = [] - plotted_colors = {} - - for r in self.results: - x.append(r.offset) - y.append(r.entropy) - - plt = pg.plot(title=fname, clear=True) - - if self.show_legend and has_key(self.file_markers, fname): - plt.addLegend(size=(self.max_description_length*10, 0)) - - for (offset, description) in self.file_markers[fname]: - # If this description has already been plotted at a different offset, we need to - # use the same color for the marker, but set the description to None to prevent - # duplicate entries in the graph legend. - # - # Else, get the next color and use it to mark descriptions of this type. - if has_key(plotted_colors, description): - color = plotted_colors[description] - description = None - else: - color = self.COLORS[i] - plotted_colors[description] = color - - i += 1 - if i >= len(self.COLORS): - i = 0 - - plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5)) - - # Plot data points - plt.plot(x, y, pen='y') - - # TODO: legend is not displayed properly when saving plots to disk - if self.save_plot: - exporter = pg.exporters.ImageExporter.ImageExporter(plt.plotItem) - exporter.parameters()['width'] = self.FILE_WIDTH - exporter.export(binwalk.core.common.unique_file_name(os.path.basename(fname), self.FILE_FORMAT)) - else: - plt.setLabel('left', self.YLABEL, units=self.YUNITS) - plt.setLabel('bottom', self.XLABEL, units=self.XUNITS) + XLABEL = 'Offset' + YLABEL = 'Entropy' + + XUNITS = 'B' + YUNITS = 'E' + + FILE_WIDTH = 1024 + FILE_FORMAT = 'png' + + COLORS = ['r', 'g', 'c', 'b', 'm'] + + DEFAULT_BLOCK_SIZE = 1024 + + TITLE = "Entropy" + ORDER = 8 + + CLI = [ + Option(short='E', + long='entropy', + kwargs={'enabled' : True}, + description='Calculate file entropy'), + Option(short='J', + long='save-plot', + kwargs={'save_plot' : True}, + description='Save plot as a PNG'), + Option(short='N', + long='no-plot', + kwargs={'do_plot' : False}, + description='Do not generate an entropy plot graph'), + Option(short='Q', + long='no-legend', + kwargs={'show_legend' : False}, + description='Omit the legend from the entropy plot graph'), + ] + + KWARGS = [ + Kwarg(name='enabled', default=False), + Kwarg(name='save_plot', default=False), + Kwarg(name='display_results', default=True), + Kwarg(name='do_plot', default=True), + Kwarg(name='show_legend', default=True), + Kwarg(name='block_size', default=0), + ] + + # Run this module last so that it can process all other module's results and overlay them on the entropy graph + PRIORITY = 0 + + def init(self): + self.HEADER[-1] = "ENTROPY" + self.algorithm = self.shannon + self.max_description_length = 0 + self.file_markers = {} + + # Get a list of all other module's results to mark on the entropy graph + for (module, obj) in iterator(self.modules): + for result in obj.results: + if result.file and result.description: + description = result.description.split(',')[0] + + if not has_key(self.file_markers, result.file.name): + self.file_markers[result.file.name] = [] + + if len(description) > self.max_description_length: + self.max_description_length = len(description) + + self.file_markers[result.file.name].append((result.offset, description)) + + # If other modules have been run and they produced results, don't spam the terminal with entropy results + if self.file_markers: + self.display_results = False + + if not self.block_size: + if self.config.block: + self.block_size = self.config.block + else: + self.block_size = self.DEFAULT_BLOCK_SIZE + + def run(self): + from pyqtgraph.Qt import QtGui + + for fp in iter(self.next_file, None): + + if self.display_results: + self.header() + + self.calculate_file_entropy(fp) + + if self.display_results: + self.footer() + + if self.do_plot and not self.save_plot: + QtGui.QApplication.instance().exec_() + + def calculate_file_entropy(self, fp): + # Clear results from any previously analyzed files + self.clear(results=True) + + while True: + file_offset = fp.tell() + + (data, dlen) = fp.read_block() + if not data: + break + + i = 0 + while i < dlen: + entropy = self.algorithm(data[i:i+self.block_size]) + r = self.result(offset=(file_offset + i), file=fp, entropy=entropy, description=("%f" % entropy), display=self.display_results) + i += self.block_size + + if self.do_plot: + self.plot_entropy(fp.name) + + def shannon(self, data): + ''' + Performs a Shannon entropy analysis on a given block of data. + ''' + entropy = 0 + + if data: + for x in range(0, 256): + p_x = float(data.count(chr(x))) / len(data) + if p_x > 0: + entropy += - p_x*math.log(p_x, 2) + + return (entropy / 8) + + def gzip(self, data, truncate=True): + ''' + Performs an entropy analysis based on zlib compression ratio. + This is faster than the shannon entropy analysis, but not as accurate. + ''' + # Entropy is a simple ratio of: <zlib compressed size> / <original size> + e = float(float(len(zlib.compress(data, 9))) / float(len(data))) + + if truncate and e > 1.0: + e = 1.0 + + return e + + def plot_entropy(self, fname): + import numpy as np + import pyqtgraph as pg + + i = 0 + x = [] + y = [] + plotted_colors = {} + + for r in self.results: + x.append(r.offset) + y.append(r.entropy) + + plt = pg.plot(title=fname, clear=True) + + if self.show_legend and has_key(self.file_markers, fname): + plt.addLegend(size=(self.max_description_length*10, 0)) + + for (offset, description) in self.file_markers[fname]: + # If this description has already been plotted at a different offset, we need to + # use the same color for the marker, but set the description to None to prevent + # duplicate entries in the graph legend. + # + # Else, get the next color and use it to mark descriptions of this type. + if has_key(plotted_colors, description): + color = plotted_colors[description] + description = None + else: + color = self.COLORS[i] + plotted_colors[description] = color + + i += 1 + if i >= len(self.COLORS): + i = 0 + + plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5)) + + # Plot data points + plt.plot(x, y, pen='y') + + # TODO: legend is not displayed properly when saving plots to disk + if self.save_plot: + exporter = pg.exporters.ImageExporter.ImageExporter(plt.plotItem) + exporter.parameters()['width'] = self.FILE_WIDTH + exporter.export(binwalk.core.common.unique_file_name(os.path.basename(fname), self.FILE_FORMAT)) + else: + plt.setLabel('left', self.YLABEL, units=self.YUNITS) + plt.setLabel('bottom', self.XLABEL, units=self.XUNITS) diff --git a/src/binwalk/modules/extractor.py b/src/binwalk/modules/extractor.py index 46b7831..4ff442f 100644 --- a/src/binwalk/modules/extractor.py +++ b/src/binwalk/modules/extractor.py @@ -9,560 +9,560 @@ from binwalk.core.module import Module, Option, Kwarg from binwalk.core.common import file_size, unique_file_name, BlockFile class Extractor(Module): - ''' - Extractor class, responsible for extracting files from the target file and executing external applications, if requested. - ''' - # Extract rules are delimited with a colon. - # <case insensitive matching string>:<file extension>[:<command to run>] - RULE_DELIM = ':' - - # Comments in the extract.conf files start with a pound - COMMENT_DELIM ='#' - - # Place holder for the extracted file name in the command - FILE_NAME_PLACEHOLDER = '%e' - - TITLE = 'Extraction' - ORDER = 9 - PRIMARY = False - - CLI = [ - Option(short='e', - long='extract', - kwargs={'load_default_rules' : True, 'enabled' : True}, - description='Automatically extract known file types'), - Option(short='D', - long='dd', - type=[], - dtype='type:ext:cmd', - kwargs={'manual_rules' : [], 'enabled' : True}, - description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'), - Option(short='M', - long='matryoshka', - kwargs={'matryoshka' : 8}, - description='Recursively scan extracted files'), - Option(short='d', - long='depth', - type=int, - kwargs={'matryoshka' : 0}, - description='Limit matryoshka recursion depth (default: 8 levels deep)'), - Option(short='j', - long='max-size', - type=int, - kwargs={'max_size' : 0}, - description='Limit the size of each extracted file'), - Option(short='r', - long='rm', - kwargs={'remove_after_execute' : True}, - description='Cleanup extracted / zero-size files after extraction'), - Option(short='z', - long='carve', - kwargs={'run_extractors' : False}, - description="Carve data from files, but don't execute extraction utilities"), - ] - - KWARGS = [ - Kwarg(name='max_size', default=None), - Kwarg(name='remove_after_execute', default=False), - Kwarg(name='load_default_rules', default=False), - Kwarg(name='run_extractors', default=True), - Kwarg(name='manual_rules', default=[]), - Kwarg(name='matryoshka', default=0), - Kwarg(name='enabled', default=False), - ] - - def load(self): - # Holds a list of extraction rules loaded either from a file or when manually specified. - self.extract_rules = [] - - if self.load_default_rules: - self.load_defaults() - - for manual_rule in self.manual_rules: - self.add_rule(manual_rule) - - def reset(self): - # Holds a list of pending files that should be scanned; only populated if self.matryoshka == True - self.pending = [] - # Holds a dictionary of extraction directories created for each scanned file. - self.extraction_directories = {} - # Holds a dictionary of the last directory listing for a given directory; used for identifying - # newly created/extracted files that need to be appended to self.pending. - self.last_directory_listing = {} - # Set to the directory path of the first extracted directory; this allows us to track recursion depth. - self.base_recursion_dir = "" - - def callback(self, r): - # Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile - try: - r.file.size - except KeyboardInterrupt as e: - pass - except Exception as e: - return - - if not r.size: - size = r.file.size - r.offset - else: - size = r.size - - # Only extract valid results - if r.valid: - # Do the extraction - (extraction_directory, dd_file) = self.extract(r.offset, r.description, r.file.name, size, r.name) - - # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file - if extraction_directory and dd_file: - # Get the full path to the dd'd file - dd_file_path = os.path.join(extraction_directory, dd_file) - - # Do a directory listing of the output directory - directory_listing = set(os.listdir(extraction_directory)) - - # If this is a newly created output directory, self.last_directory_listing won't have a record of it. - # If we've extracted other files to this directory before, it will. - if not has_key(self.last_directory_listing, extraction_directory): - self.last_directory_listing[extraction_directory] = set() - - # Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing) - for f in directory_listing.difference(self.last_directory_listing[extraction_directory]): - # Build the full file path and add it to the extractor results - file_path = os.path.join(extraction_directory, f) - real_file_path = os.path.realpath(file_path) - self.result(description=file_path, display=False) - - # If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory - if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path): - # If the recursion level of this file is less than or equal to our desired recursion level - if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka: - # Add the file to our list of pending files - self.pending.append(file_path) - - # Update the last directory listing for the next time we extract a file to this same output directory - self.last_directory_listing[extraction_directory] = directory_listing - - def append_rule(self, r): - self.extract_rules.append(r.copy()) - - def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None): - ''' - Adds a set of rules to the extraction rule list. - - @txtrule - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>] - @regex - If rule string is not specified, this is the regular expression string to use. - @extension - If rule string is not specified, this is the file extension to use. - @cmd - If rule string is not specified, this is the command to run. - Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract. - - Returns None. - ''' - rules = [] - match = False - r = { - 'extension' : '', - 'cmd' : '', - 'regex' : None - } - - # Process single explicitly specified rule - if not txtrule and regex and extension: - r['extension'] = extension - r['regex'] = re.compile(regex) - if cmd: - r['cmd'] = cmd - - self.append_rule(r) - return - - # Process rule string, or list of rule strings - if not isinstance(txtrule, type([])): - rules = [txtrule] - else: - rules = txtrule - - for rule in rules: - r['cmd'] = '' - r['extension'] = '' - - try: - values = self._parse_rule(rule) - match = values[0] - r['regex'] = re.compile(values[0]) - r['extension'] = values[1] - r['cmd'] = values[2] - except KeyboardInterrupt as e: - raise e - except Exception: - pass - - # Verify that the match string was retrieved. - if match: - self.append_rule(r) - - def remove_rule(self, text): - ''' - Remove all rules that match a specified text. - - @text - The text to match against. - - Returns the number of rules removed. - ''' - rm = [] - - for i in range(0, len(self.extract_rules)): - if self.extract_rules[i]['regex'].match(text): - rm.append(i) - - for i in rm: - self.extract_rules.pop(i) - - return len(rm) - - def clear_rules(self): - ''' - Deletes all extraction rules. - - Returns None. - ''' - self.extract_rules = [] - - def get_rules(self): - ''' - Returns a list of all extraction rules. - ''' - return self.extract_rules - - def load_from_file(self, fname): - ''' - Loads extraction rules from the specified file. - - @fname - Path to the extraction rule file. - - Returns None. - ''' - try: - # Process each line from the extract file, ignoring comments - with open(fname, 'r') as f: - for rule in f.readlines(): - self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0]) - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e))) - - def load_defaults(self): - ''' - Loads default extraction rules from the user and system extract.conf files. - - Returns None. - ''' - # Load the user extract file first to ensure its rules take precedence. - extract_files = [ - self.config.settings.paths['user'][self.config.settings.EXTRACT_FILE], - self.config.settings.paths['system'][self.config.settings.EXTRACT_FILE], - ] - - for extract_file in extract_files: - try: - self.load_from_file(extract_file) - except KeyboardInterrupt as e: - raise e - except Exception as e: - if self.config.verbose: - raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e))) - - def build_output_directory(self, path): - ''' - Set the output directory for extracted files. - - @path - The path to the file that data will be extracted from. - - Returns None. - ''' - # If we have not already created an output directory for this target file, create one now - if not has_key(self.extraction_directories, path): - output_directory = os.path.join(os.path.dirname(path), unique_file_name('_' + os.path.basename(path), extension='extracted')) - - if not os.path.exists(output_directory): - os.mkdir(output_directory) - - self.extraction_directories[path] = output_directory - # Else, just use the already created directory - else: - output_directory = self.extraction_directories[path] - - # Set the initial base extraction directory for later determining the level of recusion - if not self.base_recursion_dir: - self.base_recursion_dir = os.path.realpath(output_directory) + os.path.sep - - return output_directory - - - def cleanup_extracted_files(self, tf=None): - ''' - Set the action to take after a file is extracted. - - @tf - If set to True, extracted files will be cleaned up after running a command against them. - If set to False, extracted files will not be cleaned up after running a command against them. - If set to None or not specified, the current setting will not be changed. - - Returns the current cleanup status (True/False). - ''' - if tf is not None: - self.remove_after_execute = tf - - return self.remove_after_execute - - def extract(self, offset, description, file_name, size, name=None): - ''' - Extract an embedded file from the target file, if it matches an extract rule. - Called automatically by Binwalk.scan(). - - @offset - Offset inside the target file to begin the extraction. - @description - Description of the embedded file to extract, as returned by libmagic. - @file_name - Path to the target file. - @size - Number of bytes to extract. - @name - Name to save the file as. - - Returns the name of the extracted file (blank string if nothing was extracted). - ''' - fname = '' - cleanup_extracted_fname = True - original_dir = os.getcwd() - rules = self._match(description) - file_path = os.path.realpath(file_name) - - # No extraction rules for this file - if not rules: - return (None, None) - - output_directory = self.build_output_directory(file_name) - - # Extract to end of file if no size was specified - if not size: - size = file_size(file_path) - offset - - if os.path.isfile(file_path): - os.chdir(output_directory) - - # Loop through each extraction rule until one succeeds - for i in range(0, len(rules)): - rule = rules[i] - - # Copy out the data to disk, if we haven't already - fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name) - - # If there was a command specified for this rule, try to execute it. - # If execution fails, the next rule will be attempted. - if rule['cmd']: - - # Many extraction utilities will extract the file to a new file, just without - # the file extension (i.e., myfile.7z -> myfile). If the presumed resulting - # file name already exists before executing the extract command, do not attempt - # to clean it up even if its resulting file size is 0. - if self.remove_after_execute: - extracted_fname = os.path.splitext(fname)[0] - if os.path.exists(extracted_fname): - cleanup_extracted_fname = False - - # Execute the specified command against the extracted file - if self.run_extractors: - extract_ok = self.execute(rule['cmd'], fname) - else: - extract_ok = True - - # Only clean up files if remove_after_execute was specified - if extract_ok and self.remove_after_execute: - - # Remove the original file that we extracted - try: - os.unlink(fname) - except KeyboardInterrupt as e: - raise e - except Exception as e: - pass - - # If the command worked, assume it removed the file extension from the extracted file - # If the extracted file name file exists and is empty, remove it - if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0: - try: - os.unlink(extracted_fname) - except KeyboardInterrupt as e: - raise e - except Exception as e: - pass - - # If the command executed OK, don't try any more rules - if extract_ok: - break - # Else, remove the extracted file if this isn't the last rule in the list. - # If it is the last rule, leave the file on disk for the user to examine. - elif i != (len(rules)-1): - try: - os.unlink(fname) - except KeyboardInterrupt as e: - raise e - except Exception as e: - pass - - # If there was no command to execute, just use the first rule - else: - break - - os.chdir(original_dir) - - return (output_directory, fname) - - def _entry_offset(self, index, entries, description): - ''' - Gets the offset of the first entry that matches the description. - - @index - Index into the entries list to begin searching. - @entries - Dictionary of result entries. - @description - Case insensitive description. - - Returns the offset, if a matching description is found. - Returns -1 if a matching description is not found. - ''' - description = description.lower() - - for (offset, infos) in entries[index:]: - for info in infos: - if info['description'].lower().startswith(description): - return offset - return -1 - - def _match(self, description): - ''' - Check to see if the provided description string matches an extract rule. - Called internally by self.extract(). - - @description - Description string to check. - - Returns the associated rule dictionary if a match is found. - Returns None if no match is found. - ''' - rules = [] - description = description.lower() - - for rule in self.extract_rules: - if rule['regex'].search(description): - rules.append(rule) - return rules - - def _parse_rule(self, rule): - ''' - Parses an extraction rule. - - @rule - Rule string. - - Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>']. - ''' - return rule.strip().split(self.RULE_DELIM, 2) - - def _dd(self, file_name, offset, size, extension, output_file_name=None): - ''' - Extracts a file embedded inside the target file. - - @file_name - Path to the target file. - @offset - Offset inside the target file where the embedded file begins. - @size - Number of bytes to extract. - @extension - The file exension to assign to the extracted file on disk. - @output_file_name - The requested name of the output file. - - Returns the extracted file name. - ''' - total_size = 0 - # Default extracted file name is <hex offset>.<extension> - default_bname = "%X" % offset - - if self.max_size and size > self.max_size: - size = self.max_size - - if not output_file_name or output_file_name is None: - bname = default_bname - else: - # Strip the output file name of invalid/dangerous characters (like file paths) - bname = os.path.basename(output_file_name) - - fname = unique_file_name(bname, extension) - - try: - # Open the target file and seek to the offset - fdin = self.config.open_file(file_name, length=size, offset=offset) - - # Open the output file - try: - fdout = BlockFile(fname, 'w') - except KeyboardInterrupt as e: - raise e - except Exception as e: - # Fall back to the default name if the requested name fails - fname = unique_file_name(default_bname, extension) - fdout = BlockFile(fname, 'w') - - while total_size < size: - (data, dlen) = fdin.read_block() - if not data: - break - else: - fdout.write(str2bytes(data[:dlen])) - total_size += dlen - - # Cleanup - fdout.close() - fdin.close() - except KeyboardInterrupt as e: - raise e - except Exception as e: - raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e))) - - return fname - - def execute(self, cmd, fname): - ''' - Execute a command against the specified file. - - @cmd - Command to execute. - @fname - File to run command against. - - Returns True on success, False on failure. - ''' - tmp = None - retval = True - - try: - if callable(cmd): - try: - cmd(fname) - except KeyboardInterrupt as e: - raise e - except Exception as e: - sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e))) - else: - # If not in verbose mode, create a temporary file to redirect stdout and stderr to - if not self.config.verbose: - tmp = tempfile.TemporaryFile() - - # Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname - cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname) - - # Execute. - if subprocess.call(shlex.split(cmd), stdout=tmp, stderr=tmp) != 0: - retval = False - except KeyboardInterrupt as e: - raise e - except Exception as e: - # Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when - # making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's - # annoying to see this spammed out to the console every time. - if e.errno != 2: - sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e))) - retval = False - - if tmp is not None: - tmp.close() - - return retval - + ''' + Extractor class, responsible for extracting files from the target file and executing external applications, if requested. + ''' + # Extract rules are delimited with a colon. + # <case insensitive matching string>:<file extension>[:<command to run>] + RULE_DELIM = ':' + + # Comments in the extract.conf files start with a pound + COMMENT_DELIM ='#' + + # Place holder for the extracted file name in the command + FILE_NAME_PLACEHOLDER = '%e' + + TITLE = 'Extraction' + ORDER = 9 + PRIMARY = False + + CLI = [ + Option(short='e', + long='extract', + kwargs={'load_default_rules' : True, 'enabled' : True}, + description='Automatically extract known file types'), + Option(short='D', + long='dd', + type=[], + dtype='type:ext:cmd', + kwargs={'manual_rules' : [], 'enabled' : True}, + description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'), + Option(short='M', + long='matryoshka', + kwargs={'matryoshka' : 8}, + description='Recursively scan extracted files'), + Option(short='d', + long='depth', + type=int, + kwargs={'matryoshka' : 0}, + description='Limit matryoshka recursion depth (default: 8 levels deep)'), + Option(short='j', + long='max-size', + type=int, + kwargs={'max_size' : 0}, + description='Limit the size of each extracted file'), + Option(short='r', + long='rm', + kwargs={'remove_after_execute' : True}, + description='Cleanup extracted / zero-size files after extraction'), + Option(short='z', + long='carve', + kwargs={'run_extractors' : False}, + description="Carve data from files, but don't execute extraction utilities"), + ] + + KWARGS = [ + Kwarg(name='max_size', default=None), + Kwarg(name='remove_after_execute', default=False), + Kwarg(name='load_default_rules', default=False), + Kwarg(name='run_extractors', default=True), + Kwarg(name='manual_rules', default=[]), + Kwarg(name='matryoshka', default=0), + Kwarg(name='enabled', default=False), + ] + + def load(self): + # Holds a list of extraction rules loaded either from a file or when manually specified. + self.extract_rules = [] + + if self.load_default_rules: + self.load_defaults() + + for manual_rule in self.manual_rules: + self.add_rule(manual_rule) + + def reset(self): + # Holds a list of pending files that should be scanned; only populated if self.matryoshka == True + self.pending = [] + # Holds a dictionary of extraction directories created for each scanned file. + self.extraction_directories = {} + # Holds a dictionary of the last directory listing for a given directory; used for identifying + # newly created/extracted files that need to be appended to self.pending. + self.last_directory_listing = {} + # Set to the directory path of the first extracted directory; this allows us to track recursion depth. + self.base_recursion_dir = "" + + def callback(self, r): + # Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile + try: + r.file.size + except KeyboardInterrupt as e: + pass + except Exception as e: + return + + if not r.size: + size = r.file.size - r.offset + else: + size = r.size + + # Only extract valid results + if r.valid: + # Do the extraction + (extraction_directory, dd_file) = self.extract(r.offset, r.description, r.file.name, size, r.name) + + # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file + if extraction_directory and dd_file: + # Get the full path to the dd'd file + dd_file_path = os.path.join(extraction_directory, dd_file) + + # Do a directory listing of the output directory + directory_listing = set(os.listdir(extraction_directory)) + + # If this is a newly created output directory, self.last_directory_listing won't have a record of it. + # If we've extracted other files to this directory before, it will. + if not has_key(self.last_directory_listing, extraction_directory): + self.last_directory_listing[extraction_directory] = set() + + # Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing) + for f in directory_listing.difference(self.last_directory_listing[extraction_directory]): + # Build the full file path and add it to the extractor results + file_path = os.path.join(extraction_directory, f) + real_file_path = os.path.realpath(file_path) + self.result(description=file_path, display=False) + + # If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory + if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path): + # If the recursion level of this file is less than or equal to our desired recursion level + if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka: + # Add the file to our list of pending files + self.pending.append(file_path) + + # Update the last directory listing for the next time we extract a file to this same output directory + self.last_directory_listing[extraction_directory] = directory_listing + + def append_rule(self, r): + self.extract_rules.append(r.copy()) + + def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None): + ''' + Adds a set of rules to the extraction rule list. + + @txtrule - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>] + @regex - If rule string is not specified, this is the regular expression string to use. + @extension - If rule string is not specified, this is the file extension to use. + @cmd - If rule string is not specified, this is the command to run. + Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract. + + Returns None. + ''' + rules = [] + match = False + r = { + 'extension' : '', + 'cmd' : '', + 'regex' : None + } + + # Process single explicitly specified rule + if not txtrule and regex and extension: + r['extension'] = extension + r['regex'] = re.compile(regex) + if cmd: + r['cmd'] = cmd + + self.append_rule(r) + return + + # Process rule string, or list of rule strings + if not isinstance(txtrule, type([])): + rules = [txtrule] + else: + rules = txtrule + + for rule in rules: + r['cmd'] = '' + r['extension'] = '' + + try: + values = self._parse_rule(rule) + match = values[0] + r['regex'] = re.compile(values[0]) + r['extension'] = values[1] + r['cmd'] = values[2] + except KeyboardInterrupt as e: + raise e + except Exception: + pass + + # Verify that the match string was retrieved. + if match: + self.append_rule(r) + + def remove_rule(self, text): + ''' + Remove all rules that match a specified text. + + @text - The text to match against. + + Returns the number of rules removed. + ''' + rm = [] + + for i in range(0, len(self.extract_rules)): + if self.extract_rules[i]['regex'].match(text): + rm.append(i) + + for i in rm: + self.extract_rules.pop(i) + + return len(rm) + + def clear_rules(self): + ''' + Deletes all extraction rules. + + Returns None. + ''' + self.extract_rules = [] + + def get_rules(self): + ''' + Returns a list of all extraction rules. + ''' + return self.extract_rules + + def load_from_file(self, fname): + ''' + Loads extraction rules from the specified file. + + @fname - Path to the extraction rule file. + + Returns None. + ''' + try: + # Process each line from the extract file, ignoring comments + with open(fname, 'r') as f: + for rule in f.readlines(): + self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0]) + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e))) + + def load_defaults(self): + ''' + Loads default extraction rules from the user and system extract.conf files. + + Returns None. + ''' + # Load the user extract file first to ensure its rules take precedence. + extract_files = [ + self.config.settings.paths['user'][self.config.settings.EXTRACT_FILE], + self.config.settings.paths['system'][self.config.settings.EXTRACT_FILE], + ] + + for extract_file in extract_files: + try: + self.load_from_file(extract_file) + except KeyboardInterrupt as e: + raise e + except Exception as e: + if self.config.verbose: + raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e))) + + def build_output_directory(self, path): + ''' + Set the output directory for extracted files. + + @path - The path to the file that data will be extracted from. + + Returns None. + ''' + # If we have not already created an output directory for this target file, create one now + if not has_key(self.extraction_directories, path): + output_directory = os.path.join(os.path.dirname(path), unique_file_name('_' + os.path.basename(path), extension='extracted')) + + if not os.path.exists(output_directory): + os.mkdir(output_directory) + + self.extraction_directories[path] = output_directory + # Else, just use the already created directory + else: + output_directory = self.extraction_directories[path] + + # Set the initial base extraction directory for later determining the level of recusion + if not self.base_recursion_dir: + self.base_recursion_dir = os.path.realpath(output_directory) + os.path.sep + + return output_directory + + + def cleanup_extracted_files(self, tf=None): + ''' + Set the action to take after a file is extracted. + + @tf - If set to True, extracted files will be cleaned up after running a command against them. + If set to False, extracted files will not be cleaned up after running a command against them. + If set to None or not specified, the current setting will not be changed. + + Returns the current cleanup status (True/False). + ''' + if tf is not None: + self.remove_after_execute = tf + + return self.remove_after_execute + + def extract(self, offset, description, file_name, size, name=None): + ''' + Extract an embedded file from the target file, if it matches an extract rule. + Called automatically by Binwalk.scan(). + + @offset - Offset inside the target file to begin the extraction. + @description - Description of the embedded file to extract, as returned by libmagic. + @file_name - Path to the target file. + @size - Number of bytes to extract. + @name - Name to save the file as. + + Returns the name of the extracted file (blank string if nothing was extracted). + ''' + fname = '' + cleanup_extracted_fname = True + original_dir = os.getcwd() + rules = self._match(description) + file_path = os.path.realpath(file_name) + + # No extraction rules for this file + if not rules: + return (None, None) + + output_directory = self.build_output_directory(file_name) + + # Extract to end of file if no size was specified + if not size: + size = file_size(file_path) - offset + + if os.path.isfile(file_path): + os.chdir(output_directory) + + # Loop through each extraction rule until one succeeds + for i in range(0, len(rules)): + rule = rules[i] + + # Copy out the data to disk, if we haven't already + fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name) + + # If there was a command specified for this rule, try to execute it. + # If execution fails, the next rule will be attempted. + if rule['cmd']: + + # Many extraction utilities will extract the file to a new file, just without + # the file extension (i.e., myfile.7z -> myfile). If the presumed resulting + # file name already exists before executing the extract command, do not attempt + # to clean it up even if its resulting file size is 0. + if self.remove_after_execute: + extracted_fname = os.path.splitext(fname)[0] + if os.path.exists(extracted_fname): + cleanup_extracted_fname = False + + # Execute the specified command against the extracted file + if self.run_extractors: + extract_ok = self.execute(rule['cmd'], fname) + else: + extract_ok = True + + # Only clean up files if remove_after_execute was specified + if extract_ok and self.remove_after_execute: + + # Remove the original file that we extracted + try: + os.unlink(fname) + except KeyboardInterrupt as e: + raise e + except Exception as e: + pass + + # If the command worked, assume it removed the file extension from the extracted file + # If the extracted file name file exists and is empty, remove it + if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0: + try: + os.unlink(extracted_fname) + except KeyboardInterrupt as e: + raise e + except Exception as e: + pass + + # If the command executed OK, don't try any more rules + if extract_ok: + break + # Else, remove the extracted file if this isn't the last rule in the list. + # If it is the last rule, leave the file on disk for the user to examine. + elif i != (len(rules)-1): + try: + os.unlink(fname) + except KeyboardInterrupt as e: + raise e + except Exception as e: + pass + + # If there was no command to execute, just use the first rule + else: + break + + os.chdir(original_dir) + + return (output_directory, fname) + + def _entry_offset(self, index, entries, description): + ''' + Gets the offset of the first entry that matches the description. + + @index - Index into the entries list to begin searching. + @entries - Dictionary of result entries. + @description - Case insensitive description. + + Returns the offset, if a matching description is found. + Returns -1 if a matching description is not found. + ''' + description = description.lower() + + for (offset, infos) in entries[index:]: + for info in infos: + if info['description'].lower().startswith(description): + return offset + return -1 + + def _match(self, description): + ''' + Check to see if the provided description string matches an extract rule. + Called internally by self.extract(). + + @description - Description string to check. + + Returns the associated rule dictionary if a match is found. + Returns None if no match is found. + ''' + rules = [] + description = description.lower() + + for rule in self.extract_rules: + if rule['regex'].search(description): + rules.append(rule) + return rules + + def _parse_rule(self, rule): + ''' + Parses an extraction rule. + + @rule - Rule string. + + Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>']. + ''' + return rule.strip().split(self.RULE_DELIM, 2) + + def _dd(self, file_name, offset, size, extension, output_file_name=None): + ''' + Extracts a file embedded inside the target file. + + @file_name - Path to the target file. + @offset - Offset inside the target file where the embedded file begins. + @size - Number of bytes to extract. + @extension - The file exension to assign to the extracted file on disk. + @output_file_name - The requested name of the output file. + + Returns the extracted file name. + ''' + total_size = 0 + # Default extracted file name is <hex offset>.<extension> + default_bname = "%X" % offset + + if self.max_size and size > self.max_size: + size = self.max_size + + if not output_file_name or output_file_name is None: + bname = default_bname + else: + # Strip the output file name of invalid/dangerous characters (like file paths) + bname = os.path.basename(output_file_name) + + fname = unique_file_name(bname, extension) + + try: + # Open the target file and seek to the offset + fdin = self.config.open_file(file_name, length=size, offset=offset) + + # Open the output file + try: + fdout = BlockFile(fname, 'w') + except KeyboardInterrupt as e: + raise e + except Exception as e: + # Fall back to the default name if the requested name fails + fname = unique_file_name(default_bname, extension) + fdout = BlockFile(fname, 'w') + + while total_size < size: + (data, dlen) = fdin.read_block() + if not data: + break + else: + fdout.write(str2bytes(data[:dlen])) + total_size += dlen + + # Cleanup + fdout.close() + fdin.close() + except KeyboardInterrupt as e: + raise e + except Exception as e: + raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e))) + + return fname + + def execute(self, cmd, fname): + ''' + Execute a command against the specified file. + + @cmd - Command to execute. + @fname - File to run command against. + + Returns True on success, False on failure. + ''' + tmp = None + retval = True + + try: + if callable(cmd): + try: + cmd(fname) + except KeyboardInterrupt as e: + raise e + except Exception as e: + sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e))) + else: + # If not in verbose mode, create a temporary file to redirect stdout and stderr to + if not self.config.verbose: + tmp = tempfile.TemporaryFile() + + # Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname + cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname) + + # Execute. + if subprocess.call(shlex.split(cmd), stdout=tmp, stderr=tmp) != 0: + retval = False + except KeyboardInterrupt as e: + raise e + except Exception as e: + # Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when + # making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's + # annoying to see this spammed out to the console every time. + if e.errno != 2: + sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e))) + retval = False + + if tmp is not None: + tmp.close() + + return retval + diff --git a/src/binwalk/modules/general.py b/src/binwalk/modules/general.py index 0e645ad..5ac8276 100644 --- a/src/binwalk/modules/general.py +++ b/src/binwalk/modules/general.py @@ -10,177 +10,177 @@ from binwalk.core.module import Module, Option, Kwarg, show_help class General(Module): - TITLE = "General" - ORDER = 0 - - DEFAULT_DEPENDS = [] - - CLI = [ - Option(long='length', - short='l', - type=int, - kwargs={'length' : 0}, - description='Number of bytes to scan'), - Option(long='offset', - short='o', - type=int, - kwargs={'offset' : 0}, - description='Start scan at this file offset'), - Option(long='block', - short='K', - type=int, - kwargs={'block' : 0}, - description='Set file block size'), - Option(long='swap', - short='g', - type=int, - kwargs={'swap_size' : 0}, - description='Reverse every n bytes before scanning'), - Option(short='I', - long='show-invalid', - kwargs={'show_invalid' : True}, - description='Show results marked as invalid'), - Option(short='x', - long='exclude', - kwargs={'exclude_filters' : []}, - type=list, - dtype=str.__name__, - description='Exclude results that match <str>'), - Option(short='y', - long='include', - kwargs={'include_filters' : []}, - type=list, - dtype=str.__name__, - description='Only show results that match <str>'), - Option(long='log', - short='f', - type=argparse.FileType, - kwargs={'log_file' : None}, - description='Log results to file'), - Option(long='csv', - short='c', - kwargs={'csv' : True}, - description='Log results to file in CSV format'), - Option(long='term', - short='t', - kwargs={'format_to_terminal' : True}, - description='Format output to fit the terminal window'), - Option(long='quiet', - short='q', - kwargs={'quiet' : True}, - description='Supress output to stdout'), - Option(long='verbose', - short='v', - kwargs={'verbose' : True}, - description='Enable verbose output'), - Option(short='h', - long='help', - kwargs={'show_help' : True}, - description='Show help output'), - Option(long=None, - short=None, - type=binwalk.core.common.BlockFile, - kwargs={'files' : []}), - ] - - KWARGS = [ - Kwarg(name='length', default=0), - Kwarg(name='offset', default=0), - Kwarg(name='block', default=0), - Kwarg(name='swap_size', default=0), - Kwarg(name='show_invalid', default=False), - Kwarg(name='include_filters', default=[]), - Kwarg(name='exclude_filters', default=[]), - Kwarg(name='log_file', default=None), - Kwarg(name='csv', default=False), - Kwarg(name='format_to_terminal', default=False), - Kwarg(name='quiet', default=False), - Kwarg(name='verbose', default=False), - Kwarg(name='files', default=[]), - Kwarg(name='show_help', default=False), - ] - - PRIMARY = False - - def load(self): - self.target_files = [] - - # Order is important with these two methods - self._open_target_files() - self._set_verbosity() - - self.filter = binwalk.core.filter.Filter(self.show_invalid) - - # Set any specified include/exclude filters - for regex in self.exclude_filters: - self.filter.exclude(regex) - for regex in self.include_filters: - self.filter.include(regex) - - self.settings = binwalk.core.settings.Settings() - self.display = binwalk.core.display.Display(log=self.log_file, - csv=self.csv, - quiet=self.quiet, - verbose=self.verbose, - filter=self.filter, - fit_to_screen=self.format_to_terminal) - - if self.show_help: - show_help() - sys.exit(0) - - def reset(self): - for fp in self.target_files: - fp.reset() - - def __del__(self): - self._cleanup() - - def __exit__(self, a, b, c): - self._cleanup() - - def _cleanup(self): - if hasattr(self, 'target_files'): - for fp in self.target_files: - fp.close() - - def _set_verbosity(self): - ''' - Sets the appropriate verbosity. - Must be called after self._test_target_files so that self.target_files is properly set. - ''' - # If more than one target file was specified, enable verbose mode; else, there is - # nothing in some outputs to indicate which scan corresponds to which file. - if len(self.target_files) > 1 and not self.verbose: - self.verbose = True - - def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None): - ''' - Opens the specified file with all pertinent configuration settings. - ''' - if length is None: - length = self.length - if offset is None: - offset = self.offset - if swap is None: - swap = self.swap_size - - return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek) - - def _open_target_files(self): - ''' - Checks if the target files can be opened. - Any files that cannot be opened are removed from the self.target_files list. - ''' - # Validate the target files listed in target_files - for tfile in self.files: - # Ignore directories. - if not os.path.isdir(tfile): - # Make sure we can open the target files - try: - self.target_files.append(self.open_file(tfile)) - except KeyboardInterrupt as e: - raise e - except Exception as e: - self.error(description="Cannot open file : %s" % str(e)) - + TITLE = "General" + ORDER = 0 + + DEFAULT_DEPENDS = [] + + CLI = [ + Option(long='length', + short='l', + type=int, + kwargs={'length' : 0}, + description='Number of bytes to scan'), + Option(long='offset', + short='o', + type=int, + kwargs={'offset' : 0}, + description='Start scan at this file offset'), + Option(long='block', + short='K', + type=int, + kwargs={'block' : 0}, + description='Set file block size'), + Option(long='swap', + short='g', + type=int, + kwargs={'swap_size' : 0}, + description='Reverse every n bytes before scanning'), + Option(short='I', + long='show-invalid', + kwargs={'show_invalid' : True}, + description='Show results marked as invalid'), + Option(short='x', + long='exclude', + kwargs={'exclude_filters' : []}, + type=list, + dtype=str.__name__, + description='Exclude results that match <str>'), + Option(short='y', + long='include', + kwargs={'include_filters' : []}, + type=list, + dtype=str.__name__, + description='Only show results that match <str>'), + Option(long='log', + short='f', + type=argparse.FileType, + kwargs={'log_file' : None}, + description='Log results to file'), + Option(long='csv', + short='c', + kwargs={'csv' : True}, + description='Log results to file in CSV format'), + Option(long='term', + short='t', + kwargs={'format_to_terminal' : True}, + description='Format output to fit the terminal window'), + Option(long='quiet', + short='q', + kwargs={'quiet' : True}, + description='Supress output to stdout'), + Option(long='verbose', + short='v', + kwargs={'verbose' : True}, + description='Enable verbose output'), + Option(short='h', + long='help', + kwargs={'show_help' : True}, + description='Show help output'), + Option(long=None, + short=None, + type=binwalk.core.common.BlockFile, + kwargs={'files' : []}), + ] + + KWARGS = [ + Kwarg(name='length', default=0), + Kwarg(name='offset', default=0), + Kwarg(name='block', default=0), + Kwarg(name='swap_size', default=0), + Kwarg(name='show_invalid', default=False), + Kwarg(name='include_filters', default=[]), + Kwarg(name='exclude_filters', default=[]), + Kwarg(name='log_file', default=None), + Kwarg(name='csv', default=False), + Kwarg(name='format_to_terminal', default=False), + Kwarg(name='quiet', default=False), + Kwarg(name='verbose', default=False), + Kwarg(name='files', default=[]), + Kwarg(name='show_help', default=False), + ] + + PRIMARY = False + + def load(self): + self.target_files = [] + + # Order is important with these two methods + self._open_target_files() + self._set_verbosity() + + self.filter = binwalk.core.filter.Filter(self.show_invalid) + + # Set any specified include/exclude filters + for regex in self.exclude_filters: + self.filter.exclude(regex) + for regex in self.include_filters: + self.filter.include(regex) + + self.settings = binwalk.core.settings.Settings() + self.display = binwalk.core.display.Display(log=self.log_file, + csv=self.csv, + quiet=self.quiet, + verbose=self.verbose, + filter=self.filter, + fit_to_screen=self.format_to_terminal) + + if self.show_help: + show_help() + sys.exit(0) + + def reset(self): + for fp in self.target_files: + fp.reset() + + def __del__(self): + self._cleanup() + + def __exit__(self, a, b, c): + self._cleanup() + + def _cleanup(self): + if hasattr(self, 'target_files'): + for fp in self.target_files: + fp.close() + + def _set_verbosity(self): + ''' + Sets the appropriate verbosity. + Must be called after self._test_target_files so that self.target_files is properly set. + ''' + # If more than one target file was specified, enable verbose mode; else, there is + # nothing in some outputs to indicate which scan corresponds to which file. + if len(self.target_files) > 1 and not self.verbose: + self.verbose = True + + def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None): + ''' + Opens the specified file with all pertinent configuration settings. + ''' + if length is None: + length = self.length + if offset is None: + offset = self.offset + if swap is None: + swap = self.swap_size + + return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek) + + def _open_target_files(self): + ''' + Checks if the target files can be opened. + Any files that cannot be opened are removed from the self.target_files list. + ''' + # Validate the target files listed in target_files + for tfile in self.files: + # Ignore directories. + if not os.path.isdir(tfile): + # Make sure we can open the target files + try: + self.target_files.append(self.open_file(tfile)) + except KeyboardInterrupt as e: + raise e + except Exception as e: + self.error(description="Cannot open file : %s" % str(e)) + diff --git a/src/binwalk/modules/hashmatch.py b/src/binwalk/modules/hashmatch.py index d926803..4ebe1b0 100644 --- a/src/binwalk/modules/hashmatch.py +++ b/src/binwalk/modules/hashmatch.py @@ -8,311 +8,311 @@ from binwalk.core.compat import * from binwalk.core.module import Module, Option, Kwarg class HashResult(object): - ''' - Class for storing libfuzzy hash results. - For internal use only. - ''' + ''' + Class for storing libfuzzy hash results. + For internal use only. + ''' - def __init__(self, name, hash=None, strings=None): - self.name = name - self.hash = hash - self.strings = strings + def __init__(self, name, hash=None, strings=None): + self.name = name + self.hash = hash + self.strings = strings class HashMatch(Module): - ''' - Class for fuzzy hash matching of files and directories. - ''' - DEFAULT_CUTOFF = 0 - CONSERVATIVE_CUTOFF = 90 - - TITLE = "Fuzzy Hash" - - CLI = [ - Option(short='F', - long='fuzzy', - kwargs={'enabled' : True}, - description='Perform fuzzy hash matching on files/directories'), - Option(short='u', - long='cutoff', - priority=100, - type=int, - kwargs={'cutoff' : DEFAULT_CUTOFF}, - description='Set the cutoff percentage'), - Option(short='S', - long='strings', - kwargs={'strings' : True}, - description='Diff strings inside files instead of the entire file'), - Option(short='s', - long='same', - kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF}, - description='Only show files that are the same'), - Option(short='p', - long='diff', - kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF}, - description='Only show files that are different'), - Option(short='n', - long='name', - kwargs={'filter_by_name' : True}, - description='Only compare files whose base names are the same'), - Option(short='L', - long='symlinks', - kwargs={'symlinks' : True}, - description="Don't ignore symlinks"), - ] - - KWARGS = [ - Kwarg(name='cutoff', default=DEFAULT_CUTOFF), - Kwarg(name='strings', default=False), - Kwarg(name='same', default=True), - Kwarg(name='symlinks', default=False), - Kwarg(name='name', default=False), - Kwarg(name='max_results', default=None), - Kwarg(name='abspath', default=False), - Kwarg(name='filter_by_name', default=False), - Kwarg(name='symlinks', default=False), - Kwarg(name='enabled', default=False), - ] - - # Requires libfuzzy.so - LIBRARY_NAME = "fuzzy" - LIBRARY_FUNCTIONS = [ - binwalk.core.C.Function(name="fuzzy_hash_buf", type=int), - binwalk.core.C.Function(name="fuzzy_hash_filename", type=int), - binwalk.core.C.Function(name="fuzzy_compare", type=int), - ] - - # Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html) - FUZZY_MAX_RESULT = 150 - # Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h) - FUZZY_MIN_FILE_SIZE = 4096 - - HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n" - RESULT_FORMAT = "%4d%%" + " " * 16 + "%s\n" - HEADER = ["SIMILARITY", "FILE NAME"] - RESULT = ["percentage", "description"] - - def init(self): - self.total = 0 - self.last_file1 = HashResult(None) - self.last_file2 = HashResult(None) - - self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS) - - def _get_strings(self, fname): - return ''.join(list(binwalk.core.common.strings(fname, minimum=10))) - - def _show_result(self, match, fname): - if self.abspath: - fname = os.path.abspath(fname) - self.result(percentage=match, description=fname) - - def _compare_files(self, file1, file2): - ''' - Fuzzy diff two files. - - @file1 - The first file to diff. - @file2 - The second file to diff. - - Returns the match percentage. - Returns None on error. - ''' - status = 0 - file1_dup = False - file2_dup = False - - if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2): - if os.path.exists(file1) and os.path.exists(file2): - - hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) - hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) - - # Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match. - if file1 == self.last_file1.name and self.last_file1.hash: - file1_dup = True - else: - self.last_file1.name = file1 - - if file2 == self.last_file2.name and self.last_file2.hash: - file2_dup = True - else: - self.last_file2.name = file2 - - try: - if self.strings: - if file1_dup: - file1_strings = self.last_file1.strings - else: - self.last_file1.strings = file1_strings = self._get_strings(file1) - - if file2_dup: - file2_strings = self.last_file2.strings - else: - self.last_file2.strings = file2_strings = self._get_strings(file2) - - if file1_strings == file2_strings: - return 100 - else: - if file1_dup: - hash1 = self.last_file1.hash - else: - status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1) - - if file2_dup: - hash2 = self.last_file2.hash - else: - status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2) - - else: - if file1_dup: - hash1 = self.last_file1.hash - else: - status |= self.lib.fuzzy_hash_filename(file1, hash1) - - if file2_dup: - hash2 = self.last_file2.hash - else: - status |= self.lib.fuzzy_hash_filename(file2, hash2) - - if status == 0: - if not file1_dup: - self.last_file1.hash = hash1 - if not file2_dup: - self.last_file2.hash = hash2 - - if hash1.raw == hash2.raw: - return 100 - else: - return self.lib.fuzzy_compare(hash1, hash2) - except Exception as e: - print ("WARNING: Exception while doing fuzzy hash: %s" % e) - - return None - - def is_match(self, match): - ''' - Returns True if this is a good match. - Returns False if his is not a good match. - ''' - return (match is not None and ((match >= self.cutoff and self.same) or (match < self.cutoff and not self.same))) - - def _get_file_list(self, directory): - ''' - Generates a directory tree. - - @directory - The root directory to start from. - - Returns a set of file paths, excluding the root directory. - ''' - file_list = [] - - # Normalize directory path so that we can exclude it from each individual file path - directory = os.path.abspath(directory) + os.path.sep - - for (root, dirs, files) in os.walk(directory): - # Don't include the root directory in the file paths - root = ''.join(root.split(directory, 1)[1:]) - - # Get a list of files, with or without symlinks as specified during __init__ - files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)] - - file_list += files - - return set(file_list) - - def hash_files(self, needle, haystack): - ''' - Compare one file against a list of other files. - - Returns a list of tuple results. - ''' - self.total = 0 - - for f in haystack: - m = self._compare_files(needle, f) - if m is not None and self.is_match(m): - self._show_result(m, f) - - self.total += 1 - if self.max_results and self.total >= self.max_results: - break - - def hash_file(self, needle, haystack): - ''' - Search for one file inside one or more directories. - - Returns a list of tuple results. - ''' - matching_files = [] - self.total = 0 - done = False - - for directory in haystack: - for f in self._get_file_list(directory): - f = os.path.join(directory, f) - m = self._compare_files(needle, f) - if m is not None and self.is_match(m): - self._show_result(m, f) - matching_files.append((m, f)) - - self.total += 1 - if self.max_results and self.total >= self.max_results: - done = True - break - if done: - break - - return matching_files - - def hash_directories(self, needle, haystack): - ''' - Compare the contents of one directory with the contents of other directories. - - Returns a list of tuple results. - ''' - done = False - self.total = 0 - - source_files = self._get_file_list(needle) - - for directory in haystack: - dir_files = self._get_file_list(directory) - - for f in source_files: - if f in dir_files: - file1 = os.path.join(needle, f) - file2 = os.path.join(directory, f) - - m = self._compare_files(file1, file2) - if m is not None and self.is_match(m): - self._show_result(m, file2) - - self.total += 1 - if self.max_results and self.total >= self.max_results: - done = True - break - if done: - break - - def run(self): - ''' - Main module method. - ''' - needle = self.next_file().name - haystack = [] - - self.header() - - for fp in iter(self.next_file, None): - haystack.append(fp.name) - - if os.path.isfile(needle): - if os.path.isfile(haystack[0]): - self.hash_files(needle, haystack) - else: - self.hash_file(needle, haystack) - else: - self.hash_directories(needle, haystack) - - self.footer() - - return True + ''' + Class for fuzzy hash matching of files and directories. + ''' + DEFAULT_CUTOFF = 0 + CONSERVATIVE_CUTOFF = 90 + + TITLE = "Fuzzy Hash" + + CLI = [ + Option(short='F', + long='fuzzy', + kwargs={'enabled' : True}, + description='Perform fuzzy hash matching on files/directories'), + Option(short='u', + long='cutoff', + priority=100, + type=int, + kwargs={'cutoff' : DEFAULT_CUTOFF}, + description='Set the cutoff percentage'), + Option(short='S', + long='strings', + kwargs={'strings' : True}, + description='Diff strings inside files instead of the entire file'), + Option(short='s', + long='same', + kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF}, + description='Only show files that are the same'), + Option(short='p', + long='diff', + kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF}, + description='Only show files that are different'), + Option(short='n', + long='name', + kwargs={'filter_by_name' : True}, + description='Only compare files whose base names are the same'), + Option(short='L', + long='symlinks', + kwargs={'symlinks' : True}, + description="Don't ignore symlinks"), + ] + + KWARGS = [ + Kwarg(name='cutoff', default=DEFAULT_CUTOFF), + Kwarg(name='strings', default=False), + Kwarg(name='same', default=True), + Kwarg(name='symlinks', default=False), + Kwarg(name='name', default=False), + Kwarg(name='max_results', default=None), + Kwarg(name='abspath', default=False), + Kwarg(name='filter_by_name', default=False), + Kwarg(name='symlinks', default=False), + Kwarg(name='enabled', default=False), + ] + + # Requires libfuzzy.so + LIBRARY_NAME = "fuzzy" + LIBRARY_FUNCTIONS = [ + binwalk.core.C.Function(name="fuzzy_hash_buf", type=int), + binwalk.core.C.Function(name="fuzzy_hash_filename", type=int), + binwalk.core.C.Function(name="fuzzy_compare", type=int), + ] + + # Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html) + FUZZY_MAX_RESULT = 150 + # Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h) + FUZZY_MIN_FILE_SIZE = 4096 + + HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n" + RESULT_FORMAT = "%4d%%" + " " * 16 + "%s\n" + HEADER = ["SIMILARITY", "FILE NAME"] + RESULT = ["percentage", "description"] + + def init(self): + self.total = 0 + self.last_file1 = HashResult(None) + self.last_file2 = HashResult(None) + + self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS) + + def _get_strings(self, fname): + return ''.join(list(binwalk.core.common.strings(fname, minimum=10))) + + def _show_result(self, match, fname): + if self.abspath: + fname = os.path.abspath(fname) + self.result(percentage=match, description=fname) + + def _compare_files(self, file1, file2): + ''' + Fuzzy diff two files. + + @file1 - The first file to diff. + @file2 - The second file to diff. + + Returns the match percentage. + Returns None on error. + ''' + status = 0 + file1_dup = False + file2_dup = False + + if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2): + if os.path.exists(file1) and os.path.exists(file2): + + hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) + hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) + + # Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match. + if file1 == self.last_file1.name and self.last_file1.hash: + file1_dup = True + else: + self.last_file1.name = file1 + + if file2 == self.last_file2.name and self.last_file2.hash: + file2_dup = True + else: + self.last_file2.name = file2 + + try: + if self.strings: + if file1_dup: + file1_strings = self.last_file1.strings + else: + self.last_file1.strings = file1_strings = self._get_strings(file1) + + if file2_dup: + file2_strings = self.last_file2.strings + else: + self.last_file2.strings = file2_strings = self._get_strings(file2) + + if file1_strings == file2_strings: + return 100 + else: + if file1_dup: + hash1 = self.last_file1.hash + else: + status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1) + + if file2_dup: + hash2 = self.last_file2.hash + else: + status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2) + + else: + if file1_dup: + hash1 = self.last_file1.hash + else: + status |= self.lib.fuzzy_hash_filename(file1, hash1) + + if file2_dup: + hash2 = self.last_file2.hash + else: + status |= self.lib.fuzzy_hash_filename(file2, hash2) + + if status == 0: + if not file1_dup: + self.last_file1.hash = hash1 + if not file2_dup: + self.last_file2.hash = hash2 + + if hash1.raw == hash2.raw: + return 100 + else: + return self.lib.fuzzy_compare(hash1, hash2) + except Exception as e: + print ("WARNING: Exception while doing fuzzy hash: %s" % e) + + return None + + def is_match(self, match): + ''' + Returns True if this is a good match. + Returns False if his is not a good match. + ''' + return (match is not None and ((match >= self.cutoff and self.same) or (match < self.cutoff and not self.same))) + + def _get_file_list(self, directory): + ''' + Generates a directory tree. + + @directory - The root directory to start from. + + Returns a set of file paths, excluding the root directory. + ''' + file_list = [] + + # Normalize directory path so that we can exclude it from each individual file path + directory = os.path.abspath(directory) + os.path.sep + + for (root, dirs, files) in os.walk(directory): + # Don't include the root directory in the file paths + root = ''.join(root.split(directory, 1)[1:]) + + # Get a list of files, with or without symlinks as specified during __init__ + files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)] + + file_list += files + + return set(file_list) + + def hash_files(self, needle, haystack): + ''' + Compare one file against a list of other files. + + Returns a list of tuple results. + ''' + self.total = 0 + + for f in haystack: + m = self._compare_files(needle, f) + if m is not None and self.is_match(m): + self._show_result(m, f) + + self.total += 1 + if self.max_results and self.total >= self.max_results: + break + + def hash_file(self, needle, haystack): + ''' + Search for one file inside one or more directories. + + Returns a list of tuple results. + ''' + matching_files = [] + self.total = 0 + done = False + + for directory in haystack: + for f in self._get_file_list(directory): + f = os.path.join(directory, f) + m = self._compare_files(needle, f) + if m is not None and self.is_match(m): + self._show_result(m, f) + matching_files.append((m, f)) + + self.total += 1 + if self.max_results and self.total >= self.max_results: + done = True + break + if done: + break + + return matching_files + + def hash_directories(self, needle, haystack): + ''' + Compare the contents of one directory with the contents of other directories. + + Returns a list of tuple results. + ''' + done = False + self.total = 0 + + source_files = self._get_file_list(needle) + + for directory in haystack: + dir_files = self._get_file_list(directory) + + for f in source_files: + if f in dir_files: + file1 = os.path.join(needle, f) + file2 = os.path.join(directory, f) + + m = self._compare_files(file1, file2) + if m is not None and self.is_match(m): + self._show_result(m, file2) + + self.total += 1 + if self.max_results and self.total >= self.max_results: + done = True + break + if done: + break + + def run(self): + ''' + Main module method. + ''' + needle = self.next_file().name + haystack = [] + + self.header() + + for fp in iter(self.next_file, None): + haystack.append(fp.name) + + if os.path.isfile(needle): + if os.path.isfile(haystack[0]): + self.hash_files(needle, haystack) + else: + self.hash_file(needle, haystack) + else: + self.hash_directories(needle, haystack) + + self.footer() + + return True diff --git a/src/binwalk/modules/heuristics.py b/src/binwalk/modules/heuristics.py index a5250de..c031b03 100644 --- a/src/binwalk/modules/heuristics.py +++ b/src/binwalk/modules/heuristics.py @@ -8,186 +8,186 @@ from binwalk.core.compat import * from binwalk.core.module import Module, Kwarg, Option, Dependency class ChiSquare(object): - ''' - Performs a Chi Squared test against the provided data. - ''' + ''' + Performs a Chi Squared test against the provided data. + ''' - IDEAL = 256.0 + IDEAL = 256.0 - def __init__(self): - ''' - Class constructor. + def __init__(self): + ''' + Class constructor. - Returns None. - ''' - self.bytes = {} - self.freedom = self.IDEAL - 1 - - # Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255) - for i in range(0, int(self.IDEAL)): - self.bytes[chr(i)] = 0 - - self.reset() + Returns None. + ''' + self.bytes = {} + self.freedom = self.IDEAL - 1 + + # Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255) + for i in range(0, int(self.IDEAL)): + self.bytes[chr(i)] = 0 + + self.reset() - def reset(self): - self.xc2 = 0.0 - self.byte_count = 0 + def reset(self): + self.xc2 = 0.0 + self.byte_count = 0 - for key in self.bytes.keys(): - self.bytes[key] = 0 + for key in self.bytes.keys(): + self.bytes[key] = 0 - def update(self, data): - ''' - Updates the current byte counts with new data. + def update(self, data): + ''' + Updates the current byte counts with new data. - @data - String of bytes to update. + @data - String of bytes to update. - Returns None. - ''' - # Count the number of occurances of each byte value - for i in data: - self.bytes[i] += 1 + Returns None. + ''' + # Count the number of occurances of each byte value + for i in data: + self.bytes[i] += 1 - self.byte_count += len(data) + self.byte_count += len(data) - def chisq(self): - ''' - Calculate the Chi Square critical value. + def chisq(self): + ''' + Calculate the Chi Square critical value. - Returns the critical value. - ''' - expected = self.byte_count / self.IDEAL + Returns the critical value. + ''' + expected = self.byte_count / self.IDEAL - if expected: - for byte in self.bytes.values(): - self.xc2 += ((byte - expected) ** 2 ) / expected + if expected: + for byte in self.bytes.values(): + self.xc2 += ((byte - expected) ** 2 ) / expected - return self.xc2 + return self.xc2 class EntropyBlock(object): - def __init__(self, **kwargs): - self.start = None - self.end = None - self.length = None - for (k,v) in iterator(kwargs): - setattr(self, k, v) + def __init__(self, **kwargs): + self.start = None + self.end = None + self.length = None + for (k,v) in iterator(kwargs): + setattr(self, k, v) class HeuristicCompressionAnalyzer(Module): - ''' - Performs analysis and attempts to interpret the results. - ''' - - BLOCK_SIZE = 32 - CHI_CUTOFF = 512 - ENTROPY_TRIGGER = .90 - MIN_BLOCK_SIZE = 4096 - BLOCK_OFFSET = 1024 - ENTROPY_BLOCK_SIZE = 1024 - - TITLE = "Heuristic Compression" - - DEPENDS = [ - Dependency(name='Entropy', - attribute='entropy', - kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}), - ] - - CLI = [ - Option(short='H', - long='heuristic', - kwargs={'enabled' : True}, - description='Heuristically classify high entropy data'), - Option(short='a', - long='trigger', - kwargs={'trigger_level' : 0}, - type=float, - description='Set the entropy trigger level (0.0 - 1.0)'), - ] - - KWARGS = [ - Kwarg(name='enabled', default=False), - Kwarg(name='trigger_level', default=ENTROPY_TRIGGER), - ] - - def init(self): - self.blocks = {} - - self.HEADER[-1] = "HEURISTIC ENTROPY ANALYSIS" - - if self.config.block: - self.block_size = self.config.block - else: - self.block_size = self.BLOCK_SIZE - - for result in self.entropy.results: - if not has_key(self.blocks, result.file.name): - self.blocks[result.file.name] = [] - - if result.entropy >= self.trigger_level and (not self.blocks[result.file.name] or self.blocks[result.file.name][-1].end is not None): - self.blocks[result.file.name].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET)) - elif result.entropy < self.trigger_level and self.blocks[result.file.name] and self.blocks[result.file.name][-1].end is None: - self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET - - def run(self): - for fp in iter(self.next_file, None): - - if has_key(self.blocks, fp.name): - - self.header() - - for block in self.blocks[fp.name]: - - if block.end is None: - block.length = fp.offset + fp.length - block.start - else: - block.length = block.end - block.start - - if block.length >= self.MIN_BLOCK_SIZE: - self.analyze(fp, block) - - self.footer() - - def analyze(self, fp, block): - ''' - Perform analysis and interpretation. - ''' - i = 0 - num_error = 0 - analyzer_results = [] - - chi = ChiSquare() - fp.seek(block.start) - - while i < block.length: - j = 0 - (d, dlen) = fp.read_block() - if not d: - break - - while j < dlen: - chi.reset() - - data = d[j:j+self.block_size] - if len(data) < self.block_size: - break - - chi.update(data) - - if chi.chisq() >= self.CHI_CUTOFF: - num_error += 1 - - j += self.block_size - - if (j + i) > block.length: - break - - i += dlen - - if num_error > 0: - verdict = 'Moderate entropy data, best guess: compressed' - else: - verdict = 'High entropy data, best guess: encrypted' - - desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error) - self.result(offset=block.start, description=desc, file=fp) + ''' + Performs analysis and attempts to interpret the results. + ''' + + BLOCK_SIZE = 32 + CHI_CUTOFF = 512 + ENTROPY_TRIGGER = .90 + MIN_BLOCK_SIZE = 4096 + BLOCK_OFFSET = 1024 + ENTROPY_BLOCK_SIZE = 1024 + + TITLE = "Heuristic Compression" + + DEPENDS = [ + Dependency(name='Entropy', + attribute='entropy', + kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}), + ] + + CLI = [ + Option(short='H', + long='heuristic', + kwargs={'enabled' : True}, + description='Heuristically classify high entropy data'), + Option(short='a', + long='trigger', + kwargs={'trigger_level' : 0}, + type=float, + description='Set the entropy trigger level (0.0 - 1.0)'), + ] + + KWARGS = [ + Kwarg(name='enabled', default=False), + Kwarg(name='trigger_level', default=ENTROPY_TRIGGER), + ] + + def init(self): + self.blocks = {} + + self.HEADER[-1] = "HEURISTIC ENTROPY ANALYSIS" + + if self.config.block: + self.block_size = self.config.block + else: + self.block_size = self.BLOCK_SIZE + + for result in self.entropy.results: + if not has_key(self.blocks, result.file.name): + self.blocks[result.file.name] = [] + + if result.entropy >= self.trigger_level and (not self.blocks[result.file.name] or self.blocks[result.file.name][-1].end is not None): + self.blocks[result.file.name].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET)) + elif result.entropy < self.trigger_level and self.blocks[result.file.name] and self.blocks[result.file.name][-1].end is None: + self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET + + def run(self): + for fp in iter(self.next_file, None): + + if has_key(self.blocks, fp.name): + + self.header() + + for block in self.blocks[fp.name]: + + if block.end is None: + block.length = fp.offset + fp.length - block.start + else: + block.length = block.end - block.start + + if block.length >= self.MIN_BLOCK_SIZE: + self.analyze(fp, block) + + self.footer() + + def analyze(self, fp, block): + ''' + Perform analysis and interpretation. + ''' + i = 0 + num_error = 0 + analyzer_results = [] + + chi = ChiSquare() + fp.seek(block.start) + + while i < block.length: + j = 0 + (d, dlen) = fp.read_block() + if not d: + break + + while j < dlen: + chi.reset() + + data = d[j:j+self.block_size] + if len(data) < self.block_size: + break + + chi.update(data) + + if chi.chisq() >= self.CHI_CUTOFF: + num_error += 1 + + j += self.block_size + + if (j + i) > block.length: + break + + i += dlen + + if num_error > 0: + verdict = 'Moderate entropy data, best guess: compressed' + else: + verdict = 'High entropy data, best guess: encrypted' + + desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error) + self.result(offset=block.start, description=desc, file=fp) diff --git a/src/binwalk/plugins/compressd.py b/src/binwalk/plugins/compressd.py index cecff0f..86ed592 100644 --- a/src/binwalk/plugins/compressd.py +++ b/src/binwalk/plugins/compressd.py @@ -2,32 +2,32 @@ import binwalk.core.C from binwalk.core.common import * class Plugin(object): - ''' - Searches for and validates compress'd data. - ''' - - READ_SIZE = 64 - - COMPRESS42 = "compress42" - COMPRESS42_FUNCTIONS = [ - binwalk.core.C.Function(name="is_compressed", type=bool), - ] - - def __init__(self, module): - self.fd = None - self.comp = None - - if module.name == 'Signature': - self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS) - - def scan(self, result): - if self.comp: - if result.file and result.description.lower().startswith("compress'd data"): - fd = BlockFile(result.file.name, "r", offset=result.offset, length=self.READ_SIZE) - compressed_data = fd.read(self.READ_SIZE) - fd.close() + ''' + Searches for and validates compress'd data. + ''' + + READ_SIZE = 64 + + COMPRESS42 = "compress42" + COMPRESS42_FUNCTIONS = [ + binwalk.core.C.Function(name="is_compressed", type=bool), + ] + + def __init__(self, module): + self.fd = None + self.comp = None + + if module.name == 'Signature': + self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS) + + def scan(self, result): + if self.comp: + if result.file and result.description.lower().startswith("compress'd data"): + fd = BlockFile(result.file.name, "r", offset=result.offset, length=self.READ_SIZE) + compressed_data = fd.read(self.READ_SIZE) + fd.close() - if not self.comp.is_compressed(compressed_data, len(compressed_data)): - result.valid = False + if not self.comp.is_compressed(compressed_data, len(compressed_data)): + result.valid = False diff --git a/src/binwalk/plugins/cpio.py b/src/binwalk/plugins/cpio.py index 78c65c2..ac404f7 100644 --- a/src/binwalk/plugins/cpio.py +++ b/src/binwalk/plugins/cpio.py @@ -1,30 +1,30 @@ class Plugin(object): - ''' - Ensures that ASCII CPIO archive entries only get extracted once. - ''' + ''' + Ensures that ASCII CPIO archive entries only get extracted once. + ''' - def __init__(self, module): - self.found_archive = False - self.enabled = (module.name == 'Signature') - - def pre_scan(self, module): - # Be sure to re-set this at the beginning of every scan - self.found_archive = False + def __init__(self, module): + self.found_archive = False + self.enabled = (module.name == 'Signature') + + def pre_scan(self, module): + # Be sure to re-set this at the beginning of every scan + self.found_archive = False - def scan(self, result): - if self.enabled and result.valid: - # ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'. - # Displaying each entry is useful, as it shows what files are contained in the archive, - # but we only want to extract the archive when the first entry is found. - if result.description.startswith('ASCII cpio archive'): - if not self.found_archive: - # This is the first entry. Set found_archive and allow the scan to continue normally. - self.found_archive = True - result.extract = True - elif 'TRAILER!!!' in results['description']: - # This is the last entry, un-set found_archive. - self.found_archive = False - - # The first entry has already been found and this is the last entry, or the last entry - # has not yet been found. Don't extract. - result.extract = False + def scan(self, result): + if self.enabled and result.valid: + # ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'. + # Displaying each entry is useful, as it shows what files are contained in the archive, + # but we only want to extract the archive when the first entry is found. + if result.description.startswith('ASCII cpio archive'): + if not self.found_archive: + # This is the first entry. Set found_archive and allow the scan to continue normally. + self.found_archive = True + result.extract = True + elif 'TRAILER!!!' in results['description']: + # This is the last entry, un-set found_archive. + self.found_archive = False + + # The first entry has already been found and this is the last entry, or the last entry + # has not yet been found. Don't extract. + result.extract = False diff --git a/src/binwalk/plugins/lzmamod.py b/src/binwalk/plugins/lzmamod.py index 304117c..479fd8d 100644 --- a/src/binwalk/plugins/lzmamod.py +++ b/src/binwalk/plugins/lzmamod.py @@ -4,61 +4,61 @@ from binwalk.core.compat import * from binwalk.core.common import BlockFile class Plugin(object): - ''' - Finds and extracts modified LZMA files commonly found in cable modems. - Based on Bernardo Rodrigues' work: http://w00tsec.blogspot.com/2013/11/unpacking-firmware-images-from-cable.html - ''' + ''' + Finds and extracts modified LZMA files commonly found in cable modems. + Based on Bernardo Rodrigues' work: http://w00tsec.blogspot.com/2013/11/unpacking-firmware-images-from-cable.html + ''' - FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00" - SIGNATURE = "lzma compressed data" + FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00" + SIGNATURE = "lzma compressed data" - def __init__(self, module): - self.original_cmd = '' - self.enabled = (module.name == 'Signature') - self.module = module + def __init__(self, module): + self.original_cmd = '' + self.enabled = (module.name == 'Signature') + self.module = module - if self.enabled: - # Replace the existing LZMA extraction command with our own - rules = self.module.extractor.get_rules() - for i in range(0, len(rules)): - if rules[i]['regex'].match(self.SIGNATURE): - self.original_cmd = rules[i]['cmd'] - rules[i]['cmd'] = self.lzma_cable_extractor - break + if self.enabled: + # Replace the existing LZMA extraction command with our own + rules = self.module.extractor.get_rules() + for i in range(0, len(rules)): + if rules[i]['regex'].match(self.SIGNATURE): + self.original_cmd = rules[i]['cmd'] + rules[i]['cmd'] = self.lzma_cable_extractor + break - def lzma_cable_extractor(self, fname): - # Try extracting the LZMA file without modification first - if not self.module.extractor.execute(self.original_cmd, fname): - out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1] - fp_out = BlockFile(out_name, 'w') - # Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored - fp_in = self.module.config.open_file(fname, offset=0, length=0) - fp_in.set_block_size(peek=0) - i = 0 + def lzma_cable_extractor(self, fname): + # Try extracting the LZMA file without modification first + if not self.module.extractor.execute(self.original_cmd, fname): + out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1] + fp_out = BlockFile(out_name, 'w') + # Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored + fp_in = self.module.config.open_file(fname, offset=0, length=0) + fp_in.set_block_size(peek=0) + i = 0 - while i < fp_in.length: - (data, dlen) = fp_in.read_block() - - if i == 0: - out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:] - else: - out_data = data - - fp_out.write(out_data) - - i += dlen + while i < fp_in.length: + (data, dlen) = fp_in.read_block() + + if i == 0: + out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:] + else: + out_data = data + + fp_out.write(out_data) + + i += dlen - fp_in.close() - fp_out.close() + fp_in.close() + fp_out.close() - # Overwrite the original file so that it can be cleaned up if -r was specified - shutil.move(out_name, fname) - self.module.extractor.execute(self.original_cmd, fname) + # Overwrite the original file so that it can be cleaned up if -r was specified + shutil.move(out_name, fname) + self.module.extractor.execute(self.original_cmd, fname) - def scan(self, result): - # The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D. - if self.enabled and result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description: - if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description: - result.valid = True - result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size" + def scan(self, result): + # The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D. + if self.enabled and result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description: + if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description: + result.valid = True + result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size" diff --git a/src/binwalk/plugins/zlibvalid.py b/src/binwalk/plugins/zlibvalid.py index ae73267..64ae862 100644 --- a/src/binwalk/plugins/zlibvalid.py +++ b/src/binwalk/plugins/zlibvalid.py @@ -2,39 +2,39 @@ import binwalk.core.C from binwalk.core.common import BlockFile class Plugin(object): - ''' - Searches for and validates zlib compressed data. - ''' - - MIN_DECOMP_SIZE = 16 * 1024 - MAX_DATA_SIZE = 33 * 1024 - - TINFL = "tinfl" - TINFL_FUNCTIONS = [ - binwalk.core.C.Function(name="is_deflated", type=int), - ] - - def __init__(self, module): - self.tinfl = None - self.module = module - - # Only initialize this plugin if this is a signature scan - if module.name == 'Signature': - # Load libtinfl.so - self.tinfl = binwalk.core.C.Library(self.TINFL, self.TINFL_FUNCTIONS) - - def scan(self, result): - # If this result is a zlib signature match, try to decompress the data - if self.tinfl and result.file and result.description.lower().startswith('zlib'): - # Seek to and read the suspected zlib data - fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE) - data = fd.read(self.MAX_DATA_SIZE) - fd.close() - - # Check if this is valid zlib data - decomp_size = self.tinfl.is_deflated(data, len(data), 1) - if decomp_size > 0: - result.description += ", uncompressed size >= %d" % decomp_size - else: - result.valid = False + ''' + Searches for and validates zlib compressed data. + ''' + + MIN_DECOMP_SIZE = 16 * 1024 + MAX_DATA_SIZE = 33 * 1024 + + TINFL = "tinfl" + TINFL_FUNCTIONS = [ + binwalk.core.C.Function(name="is_deflated", type=int), + ] + + def __init__(self, module): + self.tinfl = None + self.module = module + + # Only initialize this plugin if this is a signature scan + if module.name == 'Signature': + # Load libtinfl.so + self.tinfl = binwalk.core.C.Library(self.TINFL, self.TINFL_FUNCTIONS) + + def scan(self, result): + # If this result is a zlib signature match, try to decompress the data + if self.tinfl and result.file and result.description.lower().startswith('zlib'): + # Seek to and read the suspected zlib data + fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE) + data = fd.read(self.MAX_DATA_SIZE) + fd.close() + + # Check if this is valid zlib data + decomp_size = self.tinfl.is_deflated(data, len(data), 1) + if decomp_size > 0: + result.description += ", uncompressed size >= %d" % decomp_size + else: + result.valid = False -- libgit2 0.26.0