Commit a5217d62 by devttys0

s/\t/ /g

parent 84e83d0f
...@@ -6,126 +6,126 @@ import ctypes.util ...@@ -6,126 +6,126 @@ import ctypes.util
from binwalk.core.compat import * from binwalk.core.compat import *
class Function(object): class Function(object):
''' '''
Container class for defining library functions. Container class for defining library functions.
''' '''
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.name = None self.name = None
self.type = int self.type = int
for (k, v) in iterator(kwargs): for (k, v) in iterator(kwargs):
setattr(self, k, v) setattr(self, k, v)
class FunctionHandler(object): class FunctionHandler(object):
''' '''
Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues. Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues.
''' '''
PY2CTYPES = { PY2CTYPES = {
bytes : ctypes.c_char_p, bytes : ctypes.c_char_p,
str : ctypes.c_char_p, str : ctypes.c_char_p,
int : ctypes.c_int, int : ctypes.c_int,
float : ctypes.c_float, float : ctypes.c_float,
bool : ctypes.c_int, bool : ctypes.c_int,
None : ctypes.c_int, None : ctypes.c_int,
} }
RETVAL_CONVERTERS = { RETVAL_CONVERTERS = {
None : int, None : int,
int : int, int : int,
float : float, float : float,
bool : bool, bool : bool,
str : bytes2str, str : bytes2str,
bytes : str2bytes, bytes : str2bytes,
} }
def __init__(self, library, function): def __init__(self, library, function):
''' '''
Class constructor. Class constructor.
@library - Library handle as returned by ctypes.cdll.LoadLibrary. @library - Library handle as returned by ctypes.cdll.LoadLibrary.
@function - An instance of the binwalk.core.C.Function class. @function - An instance of the binwalk.core.C.Function class.
Returns None. Returns None.
''' '''
self.retype = function.type self.retype = function.type
self.function = getattr(library, function.name) self.function = getattr(library, function.name)
if has_key(self.PY2CTYPES, self.retype): if has_key(self.PY2CTYPES, self.retype):
self.function.restype = self.PY2CTYPES[self.retype] self.function.restype = self.PY2CTYPES[self.retype]
self.retval_converter = self.RETVAL_CONVERTERS[self.retype] self.retval_converter = self.RETVAL_CONVERTERS[self.retype]
else: else:
raise Exception("Unknown return type: '%s'" % self.retype) raise Exception("Unknown return type: '%s'" % self.retype)
def run(self, *args): def run(self, *args):
''' '''
Executes the library function, handling Python 2/3 compatibility and properly converting the return type. Executes the library function, handling Python 2/3 compatibility and properly converting the return type.
@*args - Library function arguments. @*args - Library function arguments.
Returns the return value from the libraray function. Returns the return value from the libraray function.
''' '''
args = list(args) args = list(args)
# Python3 expects a bytes object for char *'s, not a str. # Python3 expects a bytes object for char *'s, not a str.
# This allows us to pass either, regardless of the Python version. # This allows us to pass either, regardless of the Python version.
for i in range(0, len(args)): for i in range(0, len(args)):
if isinstance(args[i], str): if isinstance(args[i], str):
args[i] = str2bytes(args[i]) args[i] = str2bytes(args[i])
return self.retval_converter(self.function(*args)) return self.retval_converter(self.function(*args))
class Library(object): class Library(object):
''' '''
Class for loading the specified library via ctypes. Class for loading the specified library via ctypes.
''' '''
def __init__(self, library, functions): def __init__(self, library, functions):
''' '''
Class constructor. Class constructor.
@library - Library name (e.g., 'magic' for libmagic). @library - Library name (e.g., 'magic' for libmagic).
@functions - A dictionary of function names and their return types (e.g., {'magic_buffer' : str}) @functions - A dictionary of function names and their return types (e.g., {'magic_buffer' : str})
Returns None. Returns None.
''' '''
self.library = ctypes.cdll.LoadLibrary(self.find_library(library)) self.library = ctypes.cdll.LoadLibrary(self.find_library(library))
if not self.library: if not self.library:
raise Exception("Failed to load library '%s'" % library) raise Exception("Failed to load library '%s'" % library)
for function in functions: for function in functions:
f = FunctionHandler(self.library, function) f = FunctionHandler(self.library, function)
setattr(self, function.name, f.run) setattr(self, function.name, f.run)
def find_library(self, library): def find_library(self, library):
''' '''
Locates the specified library. Locates the specified library.
@library - Library name (e.g., 'magic' for libmagic). @library - Library name (e.g., 'magic' for libmagic).
Returns a string to be passed to ctypes.cdll.LoadLibrary. Returns a string to be passed to ctypes.cdll.LoadLibrary.
''' '''
lib_path = None lib_path = None
system_paths = { system_paths = {
'linux' : ['/usr/local/lib/lib%s.so' % library], 'linux' : ['/usr/local/lib/lib%s.so' % library],
'linux2' : ['/usr/local/lib/lib%s.so' % library], 'linux2' : ['/usr/local/lib/lib%s.so' % library],
'linux3' : ['/usr/local/lib/lib%s.so' % library], 'linux3' : ['/usr/local/lib/lib%s.so' % library],
'darwin' : ['/opt/local/lib/lib%s.dylib' % library, 'darwin' : ['/opt/local/lib/lib%s.dylib' % library,
'/usr/local/lib/lib%s.dylib' % library, '/usr/local/lib/lib%s.dylib' % library,
] + glob.glob('/usr/local/Cellar/lib%s/*/lib/lib%s.dylib' % (library, library)), ] + glob.glob('/usr/local/Cellar/lib%s/*/lib/lib%s.dylib' % (library, library)),
'win32' : ['%s.dll' % library] 'win32' : ['%s.dll' % library]
} }
lib_path = ctypes.util.find_library(library) lib_path = ctypes.util.find_library(library)
if not lib_path: if not lib_path:
for path in system_paths[sys.platform]: for path in system_paths[sys.platform]:
if os.path.exists(path): if os.path.exists(path):
lib_path = path lib_path = path
break break
if not lib_path: if not lib_path:
raise Exception("Failed to locate library '%s'" % library) raise Exception("Failed to locate library '%s'" % library)
return lib_path return lib_path
...@@ -9,383 +9,383 @@ from binwalk.core.compat import * ...@@ -9,383 +9,383 @@ from binwalk.core.compat import *
# This allows other modules/scripts to subclass BlockFile from a custom class. Defaults to io.FileIO. # This allows other modules/scripts to subclass BlockFile from a custom class. Defaults to io.FileIO.
if has_key(__builtins__, 'BLOCK_FILE_PARENT_CLASS'): if has_key(__builtins__, 'BLOCK_FILE_PARENT_CLASS'):
BLOCK_FILE_PARENT_CLASS = __builtins__['BLOCK_FILE_PARENT_CLASS'] BLOCK_FILE_PARENT_CLASS = __builtins__['BLOCK_FILE_PARENT_CLASS']
else: else:
BLOCK_FILE_PARENT_CLASS = io.FileIO BLOCK_FILE_PARENT_CLASS = io.FileIO
def file_md5(file_name): def file_md5(file_name):
''' '''
Generate an MD5 hash of the specified file. Generate an MD5 hash of the specified file.
@file_name - The file to hash. @file_name - The file to hash.
Returns an MD5 hex digest string. Returns an MD5 hex digest string.
''' '''
md5 = hashlib.md5() md5 = hashlib.md5()
with open(file_name, 'rb') as f: with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(128*md5.block_size), b''): for chunk in iter(lambda: f.read(128*md5.block_size), b''):
md5.update(chunk) md5.update(chunk)
return md5.hexdigest() return md5.hexdigest()
def file_size(filename): def file_size(filename):
''' '''
Obtains the size of a given file. Obtains the size of a given file.
@filename - Path to the file. @filename - Path to the file.
Returns the size of the file. Returns the size of the file.
''' '''
# Using open/lseek works on both regular files and block devices # Using open/lseek works on both regular files and block devices
fd = os.open(filename, os.O_RDONLY) fd = os.open(filename, os.O_RDONLY)
try: try:
return os.lseek(fd, 0, os.SEEK_END) return os.lseek(fd, 0, os.SEEK_END)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e))) raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
finally: finally:
os.close(fd) os.close(fd)
def strip_quoted_strings(string): def strip_quoted_strings(string):
''' '''
Strips out data in between double quotes. Strips out data in between double quotes.
@string - String to strip. @string - String to strip.
Returns a sanitized string. Returns a sanitized string.
''' '''
# This regex removes all quoted data from string. # This regex removes all quoted data from string.
# Note that this removes everything in between the first and last double quote. # Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain # This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any # double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped. # data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.
return re.sub(r'\"(.*)\"', "", string) return re.sub(r'\"(.*)\"', "", string)
def get_quoted_strings(string): def get_quoted_strings(string):
''' '''
Returns a string comprised of all data in between double quotes. Returns a string comprised of all data in between double quotes.
@string - String to get quoted data from. @string - String to get quoted data from.
Returns a string of quoted data on success. Returns a string of quoted data on success.
Returns a blank string if no quoted data is present. Returns a blank string if no quoted data is present.
''' '''
try: try:
# This regex grabs all quoted data from string. # This regex grabs all quoted data from string.
# Note that this gets everything in between the first and last double quote. # Note that this gets everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain # This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any # double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included. # data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included.
return re.findall(r'\"(.*)\"', string)[0] return re.findall(r'\"(.*)\"', string)[0]
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
return '' return ''
def unique_file_name(base_name, extension=''): def unique_file_name(base_name, extension=''):
''' '''
Creates a unique file name based on the specified base name. Creates a unique file name based on the specified base name.
@base_name - The base name to use for the unique file name. @base_name - The base name to use for the unique file name.
@extension - The file extension to use for the unique file name. @extension - The file extension to use for the unique file name.
Returns a unique file string. Returns a unique file string.
''' '''
idcount = 0 idcount = 0
if extension and not extension.startswith('.'): if extension and not extension.startswith('.'):
extension = '.%s' % extension extension = '.%s' % extension
fname = base_name + extension fname = base_name + extension
while os.path.exists(fname): while os.path.exists(fname):
fname = "%s-%d%s" % (base_name, idcount, extension) fname = "%s-%d%s" % (base_name, idcount, extension)
idcount += 1 idcount += 1
return fname return fname
def strings(filename, minimum=4): def strings(filename, minimum=4):
''' '''
A strings generator, similar to the Unix strings utility. A strings generator, similar to the Unix strings utility.
@filename - The file to search for strings in. @filename - The file to search for strings in.
@minimum - The minimum string length to search for. @minimum - The minimum string length to search for.
Yeilds printable ASCII strings from filename. Yeilds printable ASCII strings from filename.
''' '''
result = "" result = ""
with BlockFile(filename) as f: with BlockFile(filename) as f:
while True: while True:
(data, dlen) = f.read_block() (data, dlen) = f.read_block()
if not data: if not data:
break break
for c in data: for c in data:
if c in string.printable: if c in string.printable:
result += c result += c
continue continue
elif len(result) >= minimum: elif len(result) >= minimum:
yield result yield result
result = "" result = ""
else: else:
result = "" result = ""
class MathExpression(object): class MathExpression(object):
''' '''
Class for safely evaluating mathematical expressions from a string. Class for safely evaluating mathematical expressions from a string.
Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string
''' '''
OPERATORS = { OPERATORS = {
ast.Add: op.add, ast.Add: op.add,
ast.Sub: op.sub, ast.Sub: op.sub,
ast.Mult: op.mul, ast.Mult: op.mul,
ast.Div: op.truediv, ast.Div: op.truediv,
ast.Pow: op.pow, ast.Pow: op.pow,
ast.BitXor: op.xor ast.BitXor: op.xor
} }
def __init__(self, expression): def __init__(self, expression):
self.expression = expression self.expression = expression
self.value = None self.value = None
if expression: if expression:
try: try:
self.value = self.evaluate(self.expression) self.value = self.evaluate(self.expression)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
def evaluate(self, expr): def evaluate(self, expr):
return self._eval(ast.parse(expr).body[0].value) return self._eval(ast.parse(expr).body[0].value)
def _eval(self, node): def _eval(self, node):
if isinstance(node, ast.Num): # <number> if isinstance(node, ast.Num): # <number>
return node.n return node.n
elif isinstance(node, ast.operator): # <operator> elif isinstance(node, ast.operator): # <operator>
return self.OPERATORS[type(node)] return self.OPERATORS[type(node)]
elif isinstance(node, ast.BinOp): # <left> <operator> <right> elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return self._eval(node.op)(self._eval(node.left), self._eval(node.right)) return self._eval(node.op)(self._eval(node.left), self._eval(node.right))
else: else:
raise TypeError(node) raise TypeError(node)
class BlockFile(BLOCK_FILE_PARENT_CLASS): class BlockFile(BLOCK_FILE_PARENT_CLASS):
''' '''
Abstraction class for accessing binary files. Abstraction class for accessing binary files.
This class overrides io.FilIO's read and write methods. This guaruntees two things: This class overrides io.FilIO's read and write methods. This guaruntees two things:
1. All requested data will be read/written via the read and write methods. 1. All requested data will be read/written via the read and write methods.
2. All reads return a str object and all writes can accept either a str or a 2. All reads return a str object and all writes can accept either a str or a
bytes object, regardless of the Python interpreter version. bytes object, regardless of the Python interpreter version.
However, the downside is that other io.FileIO methods won't work properly in Python 3, However, the downside is that other io.FileIO methods won't work properly in Python 3,
namely things that are wrappers around self.read (e.g., readline, readlines, etc). namely things that are wrappers around self.read (e.g., readline, readlines, etc).
This class also provides a read_block method, which is used by binwalk to read in a This class also provides a read_block method, which is used by binwalk to read in a
block of data, plus some additional data (MAX_TRAILING_SIZE), but on the next block read block of data, plus some additional data (MAX_TRAILING_SIZE), but on the next block read
pick up at the end of the previous data block (not the end of the additional data). This pick up at the end of the previous data block (not the end of the additional data). This
is necessary for scans where a signature may span a block boundary. is necessary for scans where a signature may span a block boundary.
The descision to force read to return a str object instead of a bytes object is questionable The descision to force read to return a str object instead of a bytes object is questionable
for Python 3, it seemed the best way to abstract differences in Python 2/3 from the rest for Python 3, it seemed the best way to abstract differences in Python 2/3 from the rest
of the code (especially for people writing plugins) and to add Python 3 support with of the code (especially for people writing plugins) and to add Python 3 support with
minimal code change. minimal code change.
''' '''
# The MAX_TRAILING_SIZE limits the amount of data available to a signature. # The MAX_TRAILING_SIZE limits the amount of data available to a signature.
# While most headers/signatures are far less than this value, some may reference # While most headers/signatures are far less than this value, some may reference
# pointers in the header structure which may point well beyond the header itself. # pointers in the header structure which may point well beyond the header itself.
# Passing the entire remaining buffer to libmagic is resource intensive and will # Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to # significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time. # pass to libmagic which will not drastically affect scan time.
DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024 DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024
# Max number of bytes to process at one time. This needs to be large enough to # Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks. # limit disk I/O, but small enough to limit the size of processed data blocks.
DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024 DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0): def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
''' '''
Class constructor. Class constructor.
@fname - Path to the file to be opened. @fname - Path to the file to be opened.
@mode - Mode to open the file in (default: 'r'). @mode - Mode to open the file in (default: 'r').
@length - Maximum number of bytes to read from the file via self.block_read(). @length - Maximum number of bytes to read from the file via self.block_read().
@offset - Offset at which to start reading from the file. @offset - Offset at which to start reading from the file.
@block - Size of data block to read (excluding any trailing size), @block - Size of data block to read (excluding any trailing size),
@peek - Size of trailing data to append to the end of each block. @peek - Size of trailing data to append to the end of each block.
@swap - Swap every n bytes of data. @swap - Swap every n bytes of data.
Returns None. Returns None.
''' '''
self.total_read = 0 self.total_read = 0
self.swap_size = swap self.swap_size = swap
self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
# Python 2.6 doesn't like modes like 'rb' or 'wb' # Python 2.6 doesn't like modes like 'rb' or 'wb'
mode = mode.replace('b', '') mode = mode.replace('b', '')
try: try:
self.size = file_size(fname) self.size = file_size(fname)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
self.size = 0 self.size = 0
if offset < 0: if offset < 0:
self.offset = self.size + offset self.offset = self.size + offset
else: else:
self.offset = offset self.offset = offset
if self.offset < 0: if self.offset < 0:
self.offset = 0 self.offset = 0
elif self.offset > self.size: elif self.offset > self.size:
self.offset = self.size self.offset = self.size
if offset < 0: if offset < 0:
self.length = offset * -1 self.length = offset * -1
elif length: elif length:
self.length = length self.length = length
else: else:
self.length = self.size - offset self.length = self.size - offset
if self.length < 0: if self.length < 0:
self.length = 0 self.length = 0
elif self.length > self.size: elif self.length > self.size:
self.length = self.size self.length = self.size
if block is not None: if block is not None:
self.block_read_size = block self.block_read_size = block
self.base_block_size = self.block_read_size self.base_block_size = self.block_read_size
if peek is not None: if peek is not None:
self.block_peek_size = peek self.block_peek_size = peek
self.base_peek_size = self.block_peek_size self.base_peek_size = self.block_peek_size
super(self.__class__, self).__init__(fname, mode) super(self.__class__, self).__init__(fname, mode)
# Work around for python 2.6 where FileIO._name is not defined # Work around for python 2.6 where FileIO._name is not defined
try: try:
self.name self.name
except AttributeError: except AttributeError:
self._name = fname self._name = fname
self.seek(self.offset) self.seek(self.offset)
def _swap_data_block(self, block): def _swap_data_block(self, block):
''' '''
Reverses every self.swap_size bytes inside the specified data block. Reverses every self.swap_size bytes inside the specified data block.
Size of data block must be a multiple of self.swap_size. Size of data block must be a multiple of self.swap_size.
@block - The data block to swap. @block - The data block to swap.
Returns a swapped string. Returns a swapped string.
''' '''
i = 0 i = 0
data = "" data = ""
if self.swap_size > 0: if self.swap_size > 0:
while i < len(block): while i < len(block):
data += block[i:i+self.swap_size][::-1] data += block[i:i+self.swap_size][::-1]
i += self.swap_size i += self.swap_size
else: else:
data = block data = block
return data return data
def reset(self): def reset(self):
self.set_block_size(block=self.base_block_size, peek=self.base_peek_size) self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
self.seek(self.offset) self.seek(self.offset)
def set_block_size(self, block=None, peek=None): def set_block_size(self, block=None, peek=None):
if block is not None: if block is not None:
self.block_read_size = block self.block_read_size = block
if peek is not None: if peek is not None:
self.block_peek_size = peek self.block_peek_size = peek
def write(self, data): def write(self, data):
''' '''
Writes data to the opened file. Writes data to the opened file.
io.FileIO.write does not guaruntee that all data will be written; io.FileIO.write does not guaruntee that all data will be written;
this method overrides io.FileIO.write and does guaruntee that all data will be written. this method overrides io.FileIO.write and does guaruntee that all data will be written.
Returns the number of bytes written. Returns the number of bytes written.
''' '''
n = 0 n = 0
l = len(data) l = len(data)
data = str2bytes(data) data = str2bytes(data)
while n < l: while n < l:
n += super(self.__class__, self).write(data[n:]) n += super(self.__class__, self).write(data[n:])
return n return n
def read(self, n=-1): def read(self, n=-1):
'''' ''''
Reads up to n bytes of data (or to EOF if n is not specified). Reads up to n bytes of data (or to EOF if n is not specified).
Will not read more than self.length bytes. Will not read more than self.length bytes.
io.FileIO.read does not guaruntee that all requested data will be read; io.FileIO.read does not guaruntee that all requested data will be read;
this method overrides io.FileIO.read and does guaruntee that all data will be read. this method overrides io.FileIO.read and does guaruntee that all data will be read.
Returns a str object containing the read data. Returns a str object containing the read data.
''' '''
l = 0 l = 0
data = b'' data = b''
if self.total_read < self.length: if self.total_read < self.length:
# Don't read more than self.length bytes from the file # Don't read more than self.length bytes from the file
if (self.total_read + n) > self.length: if (self.total_read + n) > self.length:
n = self.length - self.total_read n = self.length - self.total_read
while n < 0 or l < n: while n < 0 or l < n:
tmp = super(self.__class__, self).read(n-l) tmp = super(self.__class__, self).read(n-l)
if tmp: if tmp:
data += tmp data += tmp
l += len(tmp) l += len(tmp)
else: else:
break break
self.total_read += len(data) self.total_read += len(data)
return self._swap_data_block(bytes2str(data)) return self._swap_data_block(bytes2str(data))
def peek(self, n=-1): def peek(self, n=-1):
''' '''
Peeks at data in file. Peeks at data in file.
''' '''
pos = self.tell() pos = self.tell()
data = self.read(n) data = self.read(n)
self.seek(pos) self.seek(pos)
return data return data
def seek(self, n, whence=os.SEEK_SET): def seek(self, n, whence=os.SEEK_SET):
if whence == os.SEEK_SET: if whence == os.SEEK_SET:
self.total_read = n - self.offset self.total_read = n - self.offset
elif whence == os.SEEK_CUR: elif whence == os.SEEK_CUR:
self.total_read += n self.total_read += n
elif whence == os.SEEK_END: elif whence == os.SEEK_END:
self.total_read = self.size + n self.total_read = self.size + n
super(self.__class__, self).seek(n, whence) super(self.__class__, self).seek(n, whence)
def read_block(self): def read_block(self):
''' '''
Reads in a block of data from the target file. Reads in a block of data from the target file.
Returns a tuple of (str(file block data), block data length). Returns a tuple of (str(file block data), block data length).
''' '''
data = self.read(self.block_read_size) data = self.read(self.block_read_size)
dlen = len(data) dlen = len(data)
data += self.peek(self.block_peek_size) data += self.peek(self.block_peek_size)
return (data, dlen) return (data, dlen)
...@@ -7,68 +7,68 @@ import string ...@@ -7,68 +7,68 @@ import string
PY_MAJOR_VERSION = sys.version_info[0] PY_MAJOR_VERSION = sys.version_info[0]
if PY_MAJOR_VERSION > 2: if PY_MAJOR_VERSION > 2:
string.letters = string.ascii_letters string.letters = string.ascii_letters
def iterator(dictionary): def iterator(dictionary):
''' '''
For cross compatibility between Python 2 and Python 3 dictionaries. For cross compatibility between Python 2 and Python 3 dictionaries.
''' '''
if PY_MAJOR_VERSION > 2: if PY_MAJOR_VERSION > 2:
return dictionary.items() return dictionary.items()
else: else:
return dictionary.iteritems() return dictionary.iteritems()
def has_key(dictionary, key): def has_key(dictionary, key):
''' '''
For cross compatibility between Python 2 and Python 3 dictionaries. For cross compatibility between Python 2 and Python 3 dictionaries.
''' '''
if PY_MAJOR_VERSION > 2: if PY_MAJOR_VERSION > 2:
return key in dictionary return key in dictionary
else: else:
return dictionary.has_key(key) return dictionary.has_key(key)
def get_keys(dictionary): def get_keys(dictionary):
''' '''
For cross compatibility between Python 2 and Python 3 dictionaries. For cross compatibility between Python 2 and Python 3 dictionaries.
''' '''
if PY_MAJOR_VERSION > 2: if PY_MAJOR_VERSION > 2:
return list(dictionary.keys()) return list(dictionary.keys())
else: else:
return dictionary.keys() return dictionary.keys()
def str2bytes(string): def str2bytes(string):
''' '''
For cross compatibility between Python 2 and Python 3 strings. For cross compatibility between Python 2 and Python 3 strings.
''' '''
if isinstance(string, type('')) and PY_MAJOR_VERSION > 2: if isinstance(string, type('')) and PY_MAJOR_VERSION > 2:
return bytes(string, 'latin1') return bytes(string, 'latin1')
else: else:
return string return string
def bytes2str(bs): def bytes2str(bs):
''' '''
For cross compatibility between Python 2 and Python 3 strings. For cross compatibility between Python 2 and Python 3 strings.
''' '''
if isinstance(bs, type(b'')) and PY_MAJOR_VERSION > 2: if isinstance(bs, type(b'')) and PY_MAJOR_VERSION > 2:
return bs.decode('latin1') return bs.decode('latin1')
else: else:
return bs return bs
def string_decode(string): def string_decode(string):
''' '''
For cross compatibility between Python 2 and Python 3 strings. For cross compatibility between Python 2 and Python 3 strings.
''' '''
if PY_MAJOR_VERSION > 2: if PY_MAJOR_VERSION > 2:
return bytes(string, 'utf-8').decode('unicode_escape') return bytes(string, 'utf-8').decode('unicode_escape')
else: else:
return string.decode('string_escape') return string.decode('string_escape')
def user_input(prompt=''): def user_input(prompt=''):
''' '''
For getting raw user input in Python 2 and 3. For getting raw user input in Python 2 and 3.
''' '''
if PY_MAJOR_VERSION > 2: if PY_MAJOR_VERSION > 2:
return input(prompt) return input(prompt)
else: else:
return raw_input(prompt) return raw_input(prompt)
...@@ -6,171 +6,171 @@ from binwalk.core.compat import * ...@@ -6,171 +6,171 @@ from binwalk.core.compat import *
class Display(object): class Display(object):
SCREEN_WIDTH = 0 SCREEN_WIDTH = 0
HEADER_WIDTH = 150 HEADER_WIDTH = 150
DEFAULT_FORMAT = "%s\n" DEFAULT_FORMAT = "%s\n"
def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None): def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None):
self.quiet = quiet self.quiet = quiet
self.filter = filter self.filter = filter
self.verbose = verbose self.verbose = verbose
self.fit_to_screen = fit_to_screen self.fit_to_screen = fit_to_screen
self.fp = None self.fp = None
self.csv = None self.csv = None
self.num_columns = 0 self.num_columns = 0
self.custom_verbose_format = "" self.custom_verbose_format = ""
self.custom_verbose_args = [] self.custom_verbose_args = []
self._configure_formatting() self._configure_formatting()
if log: if log:
self.fp = open(log, "w") self.fp = open(log, "w")
if csv: if csv:
self.csv = pycsv.writer(self.fp) self.csv = pycsv.writer(self.fp)
def format_strings(self, header, result): def format_strings(self, header, result):
self.result_format = result self.result_format = result
self.header_format = header self.header_format = header
if self.num_columns == 0: if self.num_columns == 0:
self.num_columns = len(header.split()) self.num_columns = len(header.split())
def log(self, fmt, columns): def log(self, fmt, columns):
if self.fp: if self.fp:
if self.csv: if self.csv:
self.csv.writerow(columns) self.csv.writerow(columns)
else: else:
self.fp.write(fmt % tuple(columns)) self.fp.write(fmt % tuple(columns))
def add_custom_header(self, fmt, args): def add_custom_header(self, fmt, args):
self.custom_verbose_format = fmt self.custom_verbose_format = fmt
self.custom_verbose_args = args self.custom_verbose_args = args
def header(self, *args, **kwargs): def header(self, *args, **kwargs):
file_name = None file_name = None
self.num_columns = len(args) self.num_columns = len(args)
if has_key(kwargs, 'file_name'): if has_key(kwargs, 'file_name'):
file_name = kwargs['file_name'] file_name = kwargs['file_name']
if self.verbose and file_name: if self.verbose and file_name:
md5sum = binwalk.core.common.file_md5(file_name) md5sum = binwalk.core.common.file_md5(file_name)
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if self.csv: if self.csv:
self.log("", ["FILE", "MD5SUM", "TIMESTAMP"]) self.log("", ["FILE", "MD5SUM", "TIMESTAMP"])
self.log("", [file_name, md5sum, timestamp]) self.log("", [file_name, md5sum, timestamp])
self._fprint("%s", "\n", csv=False) self._fprint("%s", "\n", csv=False)
self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False) self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False)
self._fprint("Target File: %s\n", [file_name], csv=False, filter=False) self._fprint("Target File: %s\n", [file_name], csv=False, filter=False)
self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False) self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False)
if self.custom_verbose_format and self.custom_verbose_args: if self.custom_verbose_format and self.custom_verbose_args:
self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False) self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False)
self._fprint("%s", "\n", csv=False, filter=False) self._fprint("%s", "\n", csv=False, filter=False)
self._fprint(self.header_format, args, filter=False) self._fprint(self.header_format, args, filter=False)
self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False) self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False)
def result(self, *args): def result(self, *args):
# Convert to list for item assignment # Convert to list for item assignment
args = list(args) args = list(args)
# Replace multiple spaces with single spaces. This is to prevent accidentally putting # Replace multiple spaces with single spaces. This is to prevent accidentally putting
# four spaces in the description string, which would break auto-formatting. # four spaces in the description string, which would break auto-formatting.
for i in range(len(args)): for i in range(len(args)):
if isinstance(args[i], str): if isinstance(args[i], str):
while " " in args[i]: while " " in args[i]:
args[i] = args[i].replace(" " , " ") args[i] = args[i].replace(" " , " ")
self._fprint(self.result_format, tuple(args)) self._fprint(self.result_format, tuple(args))
def footer(self): def footer(self):
self._fprint("%s", "\n", csv=False, filter=False) self._fprint("%s", "\n", csv=False, filter=False)
def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True): def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True):
line = fmt % tuple(columns) line = fmt % tuple(columns)
if not filter or self.filter.valid_result(line): if not filter or self.filter.valid_result(line):
if not self.quiet and stdout: if not self.quiet and stdout:
sys.stdout.write(self._format_line(line.strip()) + "\n") sys.stdout.write(self._format_line(line.strip()) + "\n")
if self.fp and not (self.csv and not csv): if self.fp and not (self.csv and not csv):
self.log(fmt, columns) self.log(fmt, columns)
def _append_to_data_parts(self, data, start, end): def _append_to_data_parts(self, data, start, end):
''' '''
Intelligently appends data to self.string_parts. Intelligently appends data to self.string_parts.
For use by self._format. For use by self._format.
''' '''
try: try:
while data[start] == ' ': while data[start] == ' ':
start += 1 start += 1
if start == end: if start == end:
end = len(data[start:]) end = len(data[start:])
self.string_parts.append(data[start:end]) self.string_parts.append(data[start:end])
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
try: try:
self.string_parts.append(data[start:]) self.string_parts.append(data[start:])
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
return start return start
def _format_line(self, line): def _format_line(self, line):
''' '''
Formats a line of text to fit in the terminal window. Formats a line of text to fit in the terminal window.
For Tim. For Tim.
''' '''
offset = 0 offset = 0
space_offset = 0 space_offset = 0
self.string_parts = [] self.string_parts = []
delim = '\n' delim = '\n'
if self.fit_to_screen and len(line) > self.SCREEN_WIDTH: if self.fit_to_screen and len(line) > self.SCREEN_WIDTH:
line_columns = line.split(None, self.num_columns-1) line_columns = line.split(None, self.num_columns-1)
if line_columns: if line_columns:
delim = '\n' + ' ' * line.rfind(line_columns[-1]) delim = '\n' + ' ' * line.rfind(line_columns[-1])
while len(line[offset:]) > self.SCREEN_WIDTH: while len(line[offset:]) > self.SCREEN_WIDTH:
space_offset = line[offset:offset+self.HEADER_WIDTH].rfind(' ') space_offset = line[offset:offset+self.HEADER_WIDTH].rfind(' ')
if space_offset == -1 or space_offset == 0: if space_offset == -1 or space_offset == 0:
space_offset = self.SCREEN_WIDTH space_offset = self.SCREEN_WIDTH
self._append_to_data_parts(line, offset, offset+space_offset) self._append_to_data_parts(line, offset, offset+space_offset)
offset += space_offset offset += space_offset
self._append_to_data_parts(line, offset, offset+len(line[offset:])) self._append_to_data_parts(line, offset, offset+len(line[offset:]))
return delim.join(self.string_parts) return delim.join(self.string_parts)
def _configure_formatting(self): def _configure_formatting(self):
''' '''
Configures output formatting, and fitting output to the current terminal width. Configures output formatting, and fitting output to the current terminal width.
Returns None. Returns None.
''' '''
self.format_strings(self.DEFAULT_FORMAT, self.DEFAULT_FORMAT) self.format_strings(self.DEFAULT_FORMAT, self.DEFAULT_FORMAT)
if self.fit_to_screen: if self.fit_to_screen:
try: try:
import fcntl import fcntl
import struct import struct
import termios import termios
# Get the terminal window width # Get the terminal window width
hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234')) hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1] self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1]
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
...@@ -5,205 +5,205 @@ from binwalk.core.compat import * ...@@ -5,205 +5,205 @@ from binwalk.core.compat import *
class FilterType(object): class FilterType(object):
FILTER_INCLUDE = 0 FILTER_INCLUDE = 0
FILTER_EXCLUDE = 1 FILTER_EXCLUDE = 1
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.type = None self.type = None
self.filter = None self.filter = None
self.regex = None self.regex = None
for (k,v) in iterator(kwargs): for (k,v) in iterator(kwargs):
setattr(self, k, v) setattr(self, k, v)
if self.regex is None: if self.regex is None:
self.regex = re.compile(self.filter) self.regex = re.compile(self.filter)
class FilterInclude(FilterType): class FilterInclude(FilterType):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(FilterInclude, self).__init__(**kwargs) super(FilterInclude, self).__init__(**kwargs)
self.type = self.FILTER_INCLUDE self.type = self.FILTER_INCLUDE
class FilterExclude(FilterType): class FilterExclude(FilterType):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(FilterExclude, self).__init__(**kwargs) super(FilterExclude, self).__init__(**kwargs)
self.type = self.FILTER_EXCLUDE self.type = self.FILTER_EXCLUDE
class Filter(object): class Filter(object):
''' '''
Class to filter results based on include/exclude rules and false positive detection. Class to filter results based on include/exclude rules and false positive detection.
An instance of this class is available via the Binwalk.filter object. An instance of this class is available via the Binwalk.filter object.
Note that all filter strings should be in lower case. Note that all filter strings should be in lower case.
''' '''
# If the result returned by libmagic is "data" or contains the text # If the result returned by libmagic is "data" or contains the text
# 'invalid' or a backslash are known to be invalid/false positives. # 'invalid' or a backslash are known to be invalid/false positives.
DATA_RESULT = "data" DATA_RESULT = "data"
INVALID_RESULTS = ["invalid", "\\"] INVALID_RESULTS = ["invalid", "\\"]
INVALID_RESULT = "invalid" INVALID_RESULT = "invalid"
NON_PRINTABLE_RESULT = "\\" NON_PRINTABLE_RESULT = "\\"
def __init__(self, show_invalid_results=False): def __init__(self, show_invalid_results=False):
''' '''
Class constructor. Class constructor.
@show_invalid_results - Set to True to display results marked as invalid. @show_invalid_results - Set to True to display results marked as invalid.
Returns None. Returns None.
''' '''
self.filters = [] self.filters = []
self.grep_filters = [] self.grep_filters = []
self.show_invalid_results = show_invalid_results self.show_invalid_results = show_invalid_results
self.exclusive_filter = False self.exclusive_filter = False
self.smart = Signature(self) self.smart = Signature(self)
def include(self, match, exclusive=True): def include(self, match, exclusive=True):
''' '''
Adds a new filter which explicitly includes results that contain Adds a new filter which explicitly includes results that contain
the specified matching text. the specified matching text.
@match - Regex, or list of regexs, to match. @match - Regex, or list of regexs, to match.
@exclusive - If True, then results that do not explicitly contain @exclusive - If True, then results that do not explicitly contain
a FILTER_INCLUDE match will be excluded. If False, a FILTER_INCLUDE match will be excluded. If False,
signatures that contain the FILTER_INCLUDE match will signatures that contain the FILTER_INCLUDE match will
be included in the scan, but will not cause non-matching be included in the scan, but will not cause non-matching
results to be excluded. results to be excluded.
Returns None. Returns None.
''' '''
if not isinstance(match, type([])): if not isinstance(match, type([])):
matches = [match] matches = [match]
else: else:
matches = match matches = match
for m in matches: for m in matches:
if m: if m:
if exclusive and not self.exclusive_filter: if exclusive and not self.exclusive_filter:
self.exclusive_filter = True self.exclusive_filter = True
self.filters.append(FilterInclude(filter=m)) self.filters.append(FilterInclude(filter=m))
def exclude(self, match): def exclude(self, match):
''' '''
Adds a new filter which explicitly excludes results that contain Adds a new filter which explicitly excludes results that contain
the specified matching text. the specified matching text.
@match - Regex, or list of regexs, to match. @match - Regex, or list of regexs, to match.
Returns None. Returns None.
''' '''
if not isinstance(match, type([])): if not isinstance(match, type([])):
matches = [match] matches = [match]
else: else:
matches = match matches = match
for m in matches: for m in matches:
if m: if m:
self.filters.append(FilterExclude(filter=m)) self.filters.append(FilterExclude(filter=m))
def filter(self, data): def filter(self, data):
''' '''
Checks to see if a given string should be excluded from or included in the results. Checks to see if a given string should be excluded from or included in the results.
Called internally by Binwalk.scan(). Called internally by Binwalk.scan().
@data - String to check. @data - String to check.
Returns FILTER_INCLUDE if the string should be included. Returns FILTER_INCLUDE if the string should be included.
Returns FILTER_EXCLUDE if the string should be excluded. Returns FILTER_EXCLUDE if the string should be excluded.
''' '''
data = data.lower() data = data.lower()
# Loop through the filters to see if any of them are a match. # Loop through the filters to see if any of them are a match.
# If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE). # If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE).
for f in self.filters: for f in self.filters:
if f.regex.search(data): if f.regex.search(data):
return f.type return f.type
# If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE. # If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE.
if self.exclusive_filter: if self.exclusive_filter:
return FilterType.FILTER_EXCLUDE return FilterType.FILTER_EXCLUDE
return FilterType.FILTER_INCLUDE return FilterType.FILTER_INCLUDE
def valid_result(self, data): def valid_result(self, data):
''' '''
Checks if the given string contains invalid data. Checks if the given string contains invalid data.
@data - String to validate. @data - String to validate.
Returns True if data is valid, False if invalid. Returns True if data is valid, False if invalid.
''' '''
# A result of 'data' is never ever valid (for libmagic results) # A result of 'data' is never ever valid (for libmagic results)
if data == self.DATA_RESULT: if data == self.DATA_RESULT:
return False return False
# Make sure this result wasn't filtered # Make sure this result wasn't filtered
if self.filter(data) == FilterType.FILTER_EXCLUDE: if self.filter(data) == FilterType.FILTER_EXCLUDE:
return False return False
# If showing invalid results, just return True without further checking. # If showing invalid results, just return True without further checking.
if self.show_invalid_results: if self.show_invalid_results:
return True return True
# Don't include quoted strings or keyword arguments in this search, as # Don't include quoted strings or keyword arguments in this search, as
# strings from the target file may legitimately contain the INVALID_RESULT text. # strings from the target file may legitimately contain the INVALID_RESULT text.
if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)): if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)):
return False return False
# There should be no non-printable characters in any of the data # There should be no non-printable characters in any of the data
if self.NON_PRINTABLE_RESULT in data: if self.NON_PRINTABLE_RESULT in data:
return False return False
return True return True
def grep(self, data=None, filters=[]): def grep(self, data=None, filters=[]):
''' '''
Add or check case-insensitive grep filters against the supplied data string. Add or check case-insensitive grep filters against the supplied data string.
@data - Data string to check grep filters against. Not required if filters is specified. @data - Data string to check grep filters against. Not required if filters is specified.
@filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified. @filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified.
Returns None if data is not specified. Returns None if data is not specified.
If data is specified, returns True if the data contains a grep filter, or if no grep filters exist. If data is specified, returns True if the data contains a grep filter, or if no grep filters exist.
If data is specified, returns False if the data does not contain any grep filters. If data is specified, returns False if the data does not contain any grep filters.
''' '''
# Add any specified filters to self.grep_filters # Add any specified filters to self.grep_filters
if filters: if filters:
if not isinstance(filters, type([])): if not isinstance(filters, type([])):
gfilters = [filters] gfilters = [filters]
else: else:
gfilters = filters gfilters = filters
for gfilter in gfilters: for gfilter in gfilters:
# Filters are case insensitive # Filters are case insensitive
self.grep_filters.append(re.compile(gfilter)) self.grep_filters.append(re.compile(gfilter))
# Check the data against all grep filters until one is found # Check the data against all grep filters until one is found
if data is not None: if data is not None:
# If no grep filters have been created, always return True # If no grep filters have been created, always return True
if not self.grep_filters: if not self.grep_filters:
return True return True
# Filters are case insensitive # Filters are case insensitive
data = data.lower() data = data.lower()
# If a filter exists in data, return True # If a filter exists in data, return True
for gfilter in self.grep_filters: for gfilter in self.grep_filters:
if gfilter.search(data): if gfilter.search(data):
return True return True
# Else, return False # Else, return False
return False return False
return None return None
def clear(self): def clear(self):
''' '''
Clears all include, exclude and grep filters. Clears all include, exclude and grep filters.
Retruns None. Retruns None.
''' '''
self.filters = [] self.filters = []
self.grep_filters = [] self.grep_filters = []
...@@ -10,777 +10,777 @@ import binwalk.core.plugin ...@@ -10,777 +10,777 @@ import binwalk.core.plugin
from binwalk.core.compat import * from binwalk.core.compat import *
class Option(object): class Option(object):
''' '''
A container class that allows modules to declare command line options. A container class that allows modules to declare command line options.
''' '''
def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=None): def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=None):
''' '''
Class constructor. Class constructor.
@kwargs - A dictionary of kwarg key-value pairs affected by this command line option. @kwargs - A dictionary of kwarg key-value pairs affected by this command line option.
@priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options. @priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options.
@description - A description to be displayed in the help output. @description - A description to be displayed in the help output.
@short - The short option to use (optional). @short - The short option to use (optional).
@long - The long option to use (if None, this option will not be displayed in help output). @long - The long option to use (if None, this option will not be displayed in help output).
@type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.core.common.BlockFile, list, str, int, float). @type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.core.common.BlockFile, list, str, int, float).
@dtype - The displayed accepted type string, to be shown in help output. @dtype - The displayed accepted type string, to be shown in help output.
Returns None. Returns None.
''' '''
self.kwargs = kwargs self.kwargs = kwargs
self.priority = priority self.priority = priority
self.description = description self.description = description
self.short = short self.short = short
self.long = long self.long = long
self.type = type self.type = type
self.dtype = dtype self.dtype = dtype
if not self.dtype and self.type: if not self.dtype and self.type:
if self.type in [io.FileIO, argparse.FileType, binwalk.core.common.BlockFile]: if self.type in [io.FileIO, argparse.FileType, binwalk.core.common.BlockFile]:
self.dtype = 'file' self.dtype = 'file'
elif self.type in [int, float, str]: elif self.type in [int, float, str]:
self.dtype = self.type.__name__ self.dtype = self.type.__name__
else: else:
self.dtype = str.__name__ self.dtype = str.__name__
class Kwarg(object): class Kwarg(object):
''' '''
A container class allowing modules to specify their expected __init__ kwarg(s). A container class allowing modules to specify their expected __init__ kwarg(s).
''' '''
def __init__(self, name="", default=None, description=""): def __init__(self, name="", default=None, description=""):
''' '''
Class constructor. Class constructor.
@name - Kwarg name. @name - Kwarg name.
@default - Default kwarg value. @default - Default kwarg value.
@description - Description string. @description - Description string.
Return None. Return None.
''' '''
self.name = name self.name = name
self.default = default self.default = default
self.description = description self.description = description
class Dependency(object): class Dependency(object):
''' '''
A container class for declaring module dependencies. A container class for declaring module dependencies.
''' '''
def __init__(self, attribute="", name="", kwargs={}): def __init__(self, attribute="", name="", kwargs={}):
self.attribute = attribute self.attribute = attribute
self.name = name self.name = name
self.kwargs = kwargs self.kwargs = kwargs
self.module = None self.module = None
class Result(object): class Result(object):
''' '''
Generic class for storing and accessing scan results. Generic class for storing and accessing scan results.
''' '''
def __init__(self, **kwargs): def __init__(self, **kwargs):
''' '''
Class constructor. Class constructor.
@offset - The file offset of the result. @offset - The file offset of the result.
@size - Size of the result, if known. @size - Size of the result, if known.
@description - The result description, as displayed to the user. @description - The result description, as displayed to the user.
@module - Name of the module that generated the result. @module - Name of the module that generated the result.
@file - The file object of the scanned file. @file - The file object of the scanned file.
@valid - Set to True if the result if value, False if invalid. @valid - Set to True if the result if value, False if invalid.
@display - Set to True to display the result to the user, False to hide it. @display - Set to True to display the result to the user, False to hide it.
@extract - Set to True to flag this result for extraction. @extract - Set to True to flag this result for extraction.
@plot - Set to Flase to exclude this result from entropy plots. @plot - Set to Flase to exclude this result from entropy plots.
@name - Name of the result found (None if not applicable or unknown). @name - Name of the result found (None if not applicable or unknown).
Provide additional kwargs as necessary. Provide additional kwargs as necessary.
Returns None. Returns None.
''' '''
self.offset = 0 self.offset = 0
self.size = 0 self.size = 0
self.description = '' self.description = ''
self.module = '' self.module = ''
self.file = None self.file = None
self.valid = True self.valid = True
self.display = True self.display = True
self.extract = True self.extract = True
self.plot = True self.plot = True
self.name = None self.name = None
for (k, v) in iterator(kwargs): for (k, v) in iterator(kwargs):
setattr(self, k, v) setattr(self, k, v)
class Error(Result): class Error(Result):
''' '''
A subclass of binwalk.core.module.Result. A subclass of binwalk.core.module.Result.
''' '''
def __init__(self, **kwargs): def __init__(self, **kwargs):
''' '''
Accepts all the same kwargs as binwalk.core.module.Result, but the following are also added: Accepts all the same kwargs as binwalk.core.module.Result, but the following are also added:
@exception - In case of an exception, this is the exception object. @exception - In case of an exception, this is the exception object.
Returns None. Returns None.
''' '''
self.exception = None self.exception = None
Result.__init__(self, **kwargs) Result.__init__(self, **kwargs)
class Module(object): class Module(object):
''' '''
All module classes must be subclassed from this. All module classes must be subclassed from this.
''' '''
# The module title, as displayed in help output # The module title, as displayed in help output
TITLE = "" TITLE = ""
# A list of binwalk.core.module.Option command line options # A list of binwalk.core.module.Option command line options
CLI = [] CLI = []
# A list of binwalk.core.module.Kwargs accepted by __init__ # A list of binwalk.core.module.Kwargs accepted by __init__
KWARGS = [] KWARGS = []
# A list of default dependencies for all modules; do not override this unless you # A list of default dependencies for all modules; do not override this unless you
# understand the consequences of doing so. # understand the consequences of doing so.
DEFAULT_DEPENDS = [ DEFAULT_DEPENDS = [
Dependency(name='General', Dependency(name='General',
attribute='config'), attribute='config'),
Dependency(name='Extractor', Dependency(name='Extractor',
attribute='extractor'), attribute='extractor'),
] ]
# A list of dependencies that can be filled in as needed by each individual module. # A list of dependencies that can be filled in as needed by each individual module.
DEPENDS = [] DEPENDS = []
# Format string for printing the header during a scan. # Format string for printing the header during a scan.
# Must be set prior to calling self.header. # Must be set prior to calling self.header.
HEADER_FORMAT = "%-12s %-12s %s\n" HEADER_FORMAT = "%-12s %-12s %s\n"
# Format string for printing each result during a scan. # Format string for printing each result during a scan.
# Must be set prior to calling self.result. # Must be set prior to calling self.result.
RESULT_FORMAT = "%-12d 0x%-12X %s\n" RESULT_FORMAT = "%-12d 0x%-12X %s\n"
# Format string for printing custom information in the verbose header output. # Format string for printing custom information in the verbose header output.
# Must be set prior to calling self.header. # Must be set prior to calling self.header.
VERBOSE_FORMAT = "" VERBOSE_FORMAT = ""
# The header to print during a scan. # The header to print during a scan.
# Set to None to not print a header. # Set to None to not print a header.
# Note that this will be formatted per the HEADER_FORMAT format string. # Note that this will be formatted per the HEADER_FORMAT format string.
# Must be set prior to calling self.header. # Must be set prior to calling self.header.
HEADER = ["DECIMAL", "HEXADECIMAL", "DESCRIPTION"] HEADER = ["DECIMAL", "HEXADECIMAL", "DESCRIPTION"]
# The Result attribute names to print during a scan, as provided to the self.results method. # The Result attribute names to print during a scan, as provided to the self.results method.
# Set to None to not print any results. # Set to None to not print any results.
# Note that these will be formatted per the RESULT_FORMAT format string. # Note that these will be formatted per the RESULT_FORMAT format string.
# Must be set prior to calling self.result. # Must be set prior to calling self.result.
RESULT = ["offset", "offset", "description"] RESULT = ["offset", "offset", "description"]
# The custom data to print in the verbose header output. # The custom data to print in the verbose header output.
# Note that these will be formatted per the VERBOSE_FORMAT format string. # Note that these will be formatted per the VERBOSE_FORMAT format string.
# Must be set prior to calling self.header. # Must be set prior to calling self.header.
VERBOSE = [] VERBOSE = []
# If set to True, the progress status will be automatically updated for each result # If set to True, the progress status will be automatically updated for each result
# containing valid file and offset attributes. # containing valid file and offset attributes.
AUTO_UPDATE_STATUS = True AUTO_UPDATE_STATUS = True
# Modules with higher priorities are executed first # Modules with higher priorities are executed first
PRIORITY = 5 PRIORITY = 5
# Modules with a higher order are displayed first in help output # Modules with a higher order are displayed first in help output
ORDER = 5 ORDER = 5
# Set to False if this is not a primary module (e.g., General, Extractor modules) # Set to False if this is not a primary module (e.g., General, Extractor modules)
PRIMARY = True PRIMARY = True
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.errors = [] self.errors = []
self.results = [] self.results = []
self.target_file_list = [] self.target_file_list = []
self.status = None self.status = None
self.enabled = False self.enabled = False
self.current_target_file_name = None self.current_target_file_name = None
self.name = self.__class__.__name__ self.name = self.__class__.__name__
self.plugins = binwalk.core.plugin.Plugins(self) self.plugins = binwalk.core.plugin.Plugins(self)
self.dependencies = self.DEFAULT_DEPENDS + self.DEPENDS self.dependencies = self.DEFAULT_DEPENDS + self.DEPENDS
process_kwargs(self, kwargs) process_kwargs(self, kwargs)
self.plugins.load_plugins() self.plugins.load_plugins()
try: try:
self.load() self.load()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
self.error(exception=e) self.error(exception=e)
try: try:
self.target_file_list = list(self.config.target_files) self.target_file_list = list(self.config.target_files)
except AttributeError as e: except AttributeError as e:
pass pass
def __del__(self): def __del__(self):
return None return None
def __enter__(self): def __enter__(self):
return self return self
def __exit__(self, x, z, y): def __exit__(self, x, z, y):
return None return None
def load(self): def load(self):
''' '''
Invoked at module load time. Invoked at module load time.
May be overridden by the module sub-class. May be overridden by the module sub-class.
''' '''
return None return None
def reset(self): def reset(self):
''' '''
Invoked only for dependency modules immediately prior to starting a new primary module. Invoked only for dependency modules immediately prior to starting a new primary module.
''' '''
return None return None
def init(self): def init(self):
''' '''
Invoked prior to self.run. Invoked prior to self.run.
May be overridden by the module sub-class. May be overridden by the module sub-class.
Returns None. Returns None.
''' '''
return None return None
def run(self): def run(self):
''' '''
Executes the main module routine. Executes the main module routine.
Must be overridden by the module sub-class. Must be overridden by the module sub-class.
Returns True on success, False on failure. Returns True on success, False on failure.
''' '''
return False return False
def callback(self, r): def callback(self, r):
''' '''
Processes the result from all modules. Called for all dependency modules when a valid result is found. Processes the result from all modules. Called for all dependency modules when a valid result is found.
@r - The result, an instance of binwalk.core.module.Result. @r - The result, an instance of binwalk.core.module.Result.
Returns None. Returns None.
''' '''
return None return None
def validate(self, r): def validate(self, r):
''' '''
Validates the result. Validates the result.
May be overridden by the module sub-class. May be overridden by the module sub-class.
@r - The result, an instance of binwalk.core.module.Result. @r - The result, an instance of binwalk.core.module.Result.
Returns None. Returns None.
''' '''
r.valid = True r.valid = True
return None return None
def _plugins_pre_scan(self): def _plugins_pre_scan(self):
self.plugins.pre_scan_callbacks(self) self.plugins.pre_scan_callbacks(self)
def _plugins_post_scan(self): def _plugins_post_scan(self):
self.plugins.post_scan_callbacks(self) self.plugins.post_scan_callbacks(self)
def _plugins_result(self, r): def _plugins_result(self, r):
self.plugins.scan_callbacks(r) self.plugins.scan_callbacks(r)
def _build_display_args(self, r): def _build_display_args(self, r):
args = [] args = []
if self.RESULT: if self.RESULT:
if type(self.RESULT) != type([]): if type(self.RESULT) != type([]):
result = [self.RESULT] result = [self.RESULT]
else: else:
result = self.RESULT result = self.RESULT
for name in result: for name in result:
args.append(getattr(r, name)) args.append(getattr(r, name))
return args return args
def next_file(self): def next_file(self):
''' '''
Gets the next file to be scanned (including pending extracted files, if applicable). Gets the next file to be scanned (including pending extracted files, if applicable).
Also re/initializes self.status. Also re/initializes self.status.
All modules should access the target file list through this method. All modules should access the target file list through this method.
''' '''
fp = None fp = None
# Add any pending extracted files to the target_files list and reset the extractor's pending file list # Add any pending extracted files to the target_files list and reset the extractor's pending file list
self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending] self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending]
self.extractor.pending = [] self.extractor.pending = []
if self.target_file_list: if self.target_file_list:
fp = self.target_file_list.pop(0) fp = self.target_file_list.pop(0)
self.status.clear() self.status.clear()
self.status.total = fp.length self.status.total = fp.length
if fp is not None: if fp is not None:
self.current_target_file_name = fp.name self.current_target_file_name = fp.name
else: else:
self.current_target_file_name = None self.current_target_file_name = None
return fp return fp
def clear(self, results=True, errors=True): def clear(self, results=True, errors=True):
''' '''
Clears results and errors lists. Clears results and errors lists.
''' '''
if results: if results:
self.results = [] self.results = []
if errors: if errors:
self.errors = [] self.errors = []
def result(self, r=None, **kwargs): def result(self, r=None, **kwargs):
''' '''
Validates a result, stores it in self.results and prints it. Validates a result, stores it in self.results and prints it.
Accepts the same kwargs as the binwalk.core.module.Result class. Accepts the same kwargs as the binwalk.core.module.Result class.
@r - An existing instance of binwalk.core.module.Result. @r - An existing instance of binwalk.core.module.Result.
Returns an instance of binwalk.core.module.Result. Returns an instance of binwalk.core.module.Result.
''' '''
if r is None: if r is None:
r = Result(**kwargs) r = Result(**kwargs)
r.module = self.__class__.__name__ r.module = self.__class__.__name__
# Any module that is reporting results, valid or not, should be marked as enabled # Any module that is reporting results, valid or not, should be marked as enabled
if not self.enabled: if not self.enabled:
self.enabled = True self.enabled = True
self.validate(r) self.validate(r)
self._plugins_result(r) self._plugins_result(r)
for dependency in self.dependencies: for dependency in self.dependencies:
try: try:
getattr(self, dependency.attribute).callback(r) getattr(self, dependency.attribute).callback(r)
except AttributeError: except AttributeError:
continue continue
if r.valid: if r.valid:
self.results.append(r) self.results.append(r)
# Update the progress status automatically if it is not being done manually by the module # Update the progress status automatically if it is not being done manually by the module
if r.offset and r.file and self.AUTO_UPDATE_STATUS: if r.offset and r.file and self.AUTO_UPDATE_STATUS:
self.status.total = r.file.length self.status.total = r.file.length
self.status.completed = r.offset self.status.completed = r.offset
if r.display: if r.display:
display_args = self._build_display_args(r) display_args = self._build_display_args(r)
if display_args: if display_args:
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.result(*display_args) self.config.display.result(*display_args)
return r return r
def error(self, **kwargs): def error(self, **kwargs):
''' '''
Stores the specified error in self.errors. Stores the specified error in self.errors.
Accepts the same kwargs as the binwalk.core.module.Error class. Accepts the same kwargs as the binwalk.core.module.Error class.
Returns None. Returns None.
''' '''
exception_header_width = 100 exception_header_width = 100
e = Error(**kwargs) e = Error(**kwargs)
e.module = self.__class__.__name__ e.module = self.__class__.__name__
self.errors.append(e) self.errors.append(e)
if e.exception: if e.exception:
sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n") sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n")
sys.stderr.write("-" * exception_header_width + "\n") sys.stderr.write("-" * exception_header_width + "\n")
traceback.print_exc(file=sys.stderr) traceback.print_exc(file=sys.stderr)
sys.stderr.write("-" * exception_header_width + "\n\n") sys.stderr.write("-" * exception_header_width + "\n\n")
elif e.description: elif e.description:
sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n") sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n")
def header(self): def header(self):
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE) self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE)
if type(self.HEADER) == type([]): if type(self.HEADER) == type([]):
self.config.display.header(*self.HEADER, file_name=self.current_target_file_name) self.config.display.header(*self.HEADER, file_name=self.current_target_file_name)
elif self.HEADER: elif self.HEADER:
self.config.display.header(self.HEADER, file_name=self.current_target_file_name) self.config.display.header(self.HEADER, file_name=self.current_target_file_name)
def footer(self): def footer(self):
self.config.display.footer() self.config.display.footer()
def main(self, parent): def main(self, parent):
''' '''
Responsible for calling self.init, initializing self.config.display, and calling self.run. Responsible for calling self.init, initializing self.config.display, and calling self.run.
Returns the value returned from self.run. Returns the value returned from self.run.
''' '''
self.status = parent.status self.status = parent.status
self.modules = parent.loaded_modules self.modules = parent.loaded_modules
# Reset all dependency modules # Reset all dependency modules
for dependency in self.dependencies: for dependency in self.dependencies:
if hasattr(self, dependency.attribute): if hasattr(self, dependency.attribute):
getattr(self, dependency.attribute).reset() getattr(self, dependency.attribute).reset()
try: try:
self.init() self.init()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
self.error(exception=e) self.error(exception=e)
return False return False
try: try:
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT) self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
self.error(exception=e) self.error(exception=e)
return False return False
self._plugins_pre_scan() self._plugins_pre_scan()
try: try:
retval = self.run() retval = self.run()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
self.error(exception=e) self.error(exception=e)
return False return False
self._plugins_post_scan() self._plugins_post_scan()
return retval return retval
class Status(object): class Status(object):
''' '''
Class used for tracking module status (e.g., % complete). Class used for tracking module status (e.g., % complete).
''' '''
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.kwargs = kwargs self.kwargs = kwargs
self.clear() self.clear()
def clear(self): def clear(self):
for (k,v) in iterator(self.kwargs): for (k,v) in iterator(self.kwargs):
setattr(self, k, v) setattr(self, k, v)
class ModuleException(Exception): class ModuleException(Exception):
''' '''
Module exception class. Module exception class.
Nothing special here except the name. Nothing special here except the name.
''' '''
pass pass
class Modules(object): class Modules(object):
''' '''
Main class used for running and managing modules. Main class used for running and managing modules.
''' '''
def __init__(self, *argv, **kargv): def __init__(self, *argv, **kargv):
''' '''
Class constructor. Class constructor.
@argv - List of command line options. Must not include the program name (e.g., sys.argv[1:]). @argv - List of command line options. Must not include the program name (e.g., sys.argv[1:]).
@kargv - Keyword dictionary of command line options. @kargv - Keyword dictionary of command line options.
Returns None. Returns None.
''' '''
self.arguments = [] self.arguments = []
self.loaded_modules = {} self.loaded_modules = {}
self.default_dependency_modules = {} self.default_dependency_modules = {}
self.status = Status(completed=0, total=0) self.status = Status(completed=0, total=0)
self._set_arguments(list(argv), kargv) self._set_arguments(list(argv), kargv)
def _set_arguments(self, argv=[], kargv={}): def _set_arguments(self, argv=[], kargv={}):
for (k,v) in iterator(kargv): for (k,v) in iterator(kargv):
k = self._parse_api_opt(k) k = self._parse_api_opt(k)
if v not in [True, False, None]: if v not in [True, False, None]:
argv.append("%s %s" % (k, v)) argv.append("%s %s" % (k, v))
else: else:
argv.append(k) argv.append(k)
if not argv and not self.arguments: if not argv and not self.arguments:
self.arguments = sys.argv[1:] self.arguments = sys.argv[1:]
elif argv: elif argv:
self.arguments = argv self.arguments = argv
def _parse_api_opt(self, opt): def _parse_api_opt(self, opt):
# If the argument already starts with a hyphen, don't add hyphens in front of it # If the argument already starts with a hyphen, don't add hyphens in front of it
if opt.startswith('-'): if opt.startswith('-'):
return opt return opt
# Short options are only 1 character # Short options are only 1 character
elif len(opt) == 1: elif len(opt) == 1:
return '-' + opt return '-' + opt
else: else:
return '--' + opt return '--' + opt
def list(self, attribute="run"): def list(self, attribute="run"):
''' '''
Finds all modules with the specified attribute. Finds all modules with the specified attribute.
@attribute - The desired module attribute. @attribute - The desired module attribute.
Returns a list of modules that contain the specified attribute, in the order they should be executed. Returns a list of modules that contain the specified attribute, in the order they should be executed.
''' '''
import binwalk.modules import binwalk.modules
modules = {} modules = {}
for (name, module) in inspect.getmembers(binwalk.modules): for (name, module) in inspect.getmembers(binwalk.modules):
if inspect.isclass(module) and hasattr(module, attribute): if inspect.isclass(module) and hasattr(module, attribute):
modules[module] = module.PRIORITY modules[module] = module.PRIORITY
return sorted(modules, key=modules.get, reverse=True) return sorted(modules, key=modules.get, reverse=True)
def help(self): def help(self):
''' '''
Generates formatted help output. Generates formatted help output.
Returns the help string. Returns the help string.
''' '''
modules = {} modules = {}
help_string = "\nBinwalk v%s\nCraig Heffner, http://www.binwalk.org\n" % binwalk.core.settings.Settings.VERSION help_string = "\nBinwalk v%s\nCraig Heffner, http://www.binwalk.org\n" % binwalk.core.settings.Settings.VERSION
# Build a dictionary of modules and their ORDER attributes. # Build a dictionary of modules and their ORDER attributes.
# This makes it easy to sort modules by their ORDER attribute for display. # This makes it easy to sort modules by their ORDER attribute for display.
for module in self.list(attribute="CLI"): for module in self.list(attribute="CLI"):
if module.CLI: if module.CLI:
modules[module] = module.ORDER modules[module] = module.ORDER
for module in sorted(modules, key=modules.get, reverse=True): for module in sorted(modules, key=modules.get, reverse=True):
help_string += "\n%s Options:\n" % module.TITLE help_string += "\n%s Options:\n" % module.TITLE
for module_option in module.CLI: for module_option in module.CLI:
if module_option.long: if module_option.long:
long_opt = '--' + module_option.long long_opt = '--' + module_option.long
if module_option.dtype: if module_option.dtype:
optargs = "=<%s>" % module_option.dtype optargs = "=<%s>" % module_option.dtype
else: else:
optargs = "" optargs = ""
if module_option.short: if module_option.short:
short_opt = "-" + module_option.short + "," short_opt = "-" + module_option.short + ","
else: else:
short_opt = " " short_opt = " "
fmt = " %%s %%s%%-%ds%%s\n" % (32-len(long_opt)) fmt = " %%s %%s%%-%ds%%s\n" % (32-len(long_opt))
help_string += fmt % (short_opt, long_opt, optargs, module_option.description) help_string += fmt % (short_opt, long_opt, optargs, module_option.description)
return help_string + "\n" return help_string + "\n"
def execute(self, *args, **kwargs): def execute(self, *args, **kwargs):
''' '''
Executes all appropriate modules according to the options specified in args/kwargs. Executes all appropriate modules according to the options specified in args/kwargs.
Returns a list of executed module objects. Returns a list of executed module objects.
''' '''
run_modules = [] run_modules = []
orig_arguments = self.arguments orig_arguments = self.arguments
if args or kwargs: if args or kwargs:
self._set_arguments(list(args), kwargs) self._set_arguments(list(args), kwargs)
# Run all modules # Run all modules
for module in self.list(): for module in self.list():
obj = self.run(module) obj = self.run(module)
# Add all loaded modules that marked themselves as enabled to the run_modules list # Add all loaded modules that marked themselves as enabled to the run_modules list
for (module, obj) in iterator(self.loaded_modules): for (module, obj) in iterator(self.loaded_modules):
# Report the results if the module is enabled and if it is a primary module or if it reported any results/errors # Report the results if the module is enabled and if it is a primary module or if it reported any results/errors
if obj.enabled and (obj.PRIMARY or obj.results or obj.errors): if obj.enabled and (obj.PRIMARY or obj.results or obj.errors):
run_modules.append(obj) run_modules.append(obj)
self.arguments = orig_arguments self.arguments = orig_arguments
return run_modules return run_modules
def run(self, module, dependency=False, kwargs={}): def run(self, module, dependency=False, kwargs={}):
''' '''
Runs a specific module. Runs a specific module.
''' '''
obj = self.load(module, kwargs) obj = self.load(module, kwargs)
if isinstance(obj, binwalk.core.module.Module) and obj.enabled: if isinstance(obj, binwalk.core.module.Module) and obj.enabled:
obj.main(parent=self) obj.main(parent=self)
self.status.clear() self.status.clear()
# If the module is not being loaded as a dependency, add it to the loaded modules dictionary # If the module is not being loaded as a dependency, add it to the loaded modules dictionary
if not dependency: if not dependency:
self.loaded_modules[module] = obj self.loaded_modules[module] = obj
return obj return obj
def load(self, module, kwargs={}): def load(self, module, kwargs={}):
argv = self.argv(module, argv=self.arguments) argv = self.argv(module, argv=self.arguments)
argv.update(kwargs) argv.update(kwargs)
argv.update(self.dependencies(module, argv['enabled'])) argv.update(self.dependencies(module, argv['enabled']))
return module(**argv) return module(**argv)
def dependencies(self, module, module_enabled): def dependencies(self, module, module_enabled):
import binwalk.modules import binwalk.modules
attributes = {} attributes = {}
for dependency in module.DEFAULT_DEPENDS+module.DEPENDS: for dependency in module.DEFAULT_DEPENDS+module.DEPENDS:
# The dependency module must be imported by binwalk.modules.__init__.py # The dependency module must be imported by binwalk.modules.__init__.py
if hasattr(binwalk.modules, dependency.name): if hasattr(binwalk.modules, dependency.name):
dependency.module = getattr(binwalk.modules, dependency.name) dependency.module = getattr(binwalk.modules, dependency.name)
else: else:
raise ModuleException("%s depends on %s which was not found in binwalk.modules.__init__.py\n" % (str(module), dependency.name)) raise ModuleException("%s depends on %s which was not found in binwalk.modules.__init__.py\n" % (str(module), dependency.name))
# No recursive dependencies, thanks # No recursive dependencies, thanks
if dependency.module == module: if dependency.module == module:
continue continue
# Only load dependencies with custom kwargs from modules that are enabled, else madness ensues. # Only load dependencies with custom kwargs from modules that are enabled, else madness ensues.
# Example: Heursitic module depends on entropy module, and sets entropy kwargs to contain 'enabled' : True. # Example: Heursitic module depends on entropy module, and sets entropy kwargs to contain 'enabled' : True.
# Without this check, an entropy scan would always be run, even if -H or -E weren't specified! # Without this check, an entropy scan would always be run, even if -H or -E weren't specified!
# #
# Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't # Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't
# set any custom kwargs for those dependencies. # set any custom kwargs for those dependencies.
if module_enabled or not dependency.kwargs: if module_enabled or not dependency.kwargs:
depobj = self.run(dependency.module, dependency=True, kwargs=dependency.kwargs) depobj = self.run(dependency.module, dependency=True, kwargs=dependency.kwargs)
# If a dependency failed, consider this a non-recoverable error and raise an exception # If a dependency failed, consider this a non-recoverable error and raise an exception
if depobj.errors: if depobj.errors:
raise ModuleException("Failed to load " + dependency.name) raise ModuleException("Failed to load " + dependency.name)
else: else:
attributes[dependency.attribute] = depobj attributes[dependency.attribute] = depobj
return attributes return attributes
def argv(self, module, argv=sys.argv[1:]): def argv(self, module, argv=sys.argv[1:]):
''' '''
Processes argv for any options specific to the specified module. Processes argv for any options specific to the specified module.
@module - The module to process argv for. @module - The module to process argv for.
@argv - A list of command line arguments (excluding argv[0]). @argv - A list of command line arguments (excluding argv[0]).
Returns a dictionary of kwargs for the specified module. Returns a dictionary of kwargs for the specified module.
''' '''
kwargs = {'enabled' : False} kwargs = {'enabled' : False}
last_priority = {} last_priority = {}
longs = [] longs = []
shorts = "" shorts = ""
parser = argparse.ArgumentParser(add_help=False) parser = argparse.ArgumentParser(add_help=False)
# Must build arguments from all modules so that: # Must build arguments from all modules so that:
# #
# 1) Any conflicting arguments will raise an exception # 1) Any conflicting arguments will raise an exception
# 2) The only unknown arguments will be the target files, making them easy to identify # 2) The only unknown arguments will be the target files, making them easy to identify
for m in self.list(attribute="CLI"): for m in self.list(attribute="CLI"):
for module_option in m.CLI: for module_option in m.CLI:
if not module_option.long: if not module_option.long:
continue continue
if module_option.type is None: if module_option.type is None:
action = 'store_true' action = 'store_true'
else: else:
action = None action = None
if module_option.short: if module_option.short:
parser.add_argument('-' + module_option.short, '--' + module_option.long, action=action, dest=module_option.long) parser.add_argument('-' + module_option.short, '--' + module_option.long, action=action, dest=module_option.long)
else: else:
parser.add_argument('--' + module_option.long, action=action, dest=module_option.long) parser.add_argument('--' + module_option.long, action=action, dest=module_option.long)
args, unknown = parser.parse_known_args(argv) args, unknown = parser.parse_known_args(argv)
args = args.__dict__ args = args.__dict__
# Only add parsed options pertinent to the requested module # Only add parsed options pertinent to the requested module
for module_option in module.CLI: for module_option in module.CLI:
if module_option.type == binwalk.core.common.BlockFile: if module_option.type == binwalk.core.common.BlockFile:
for k in get_keys(module_option.kwargs): for k in get_keys(module_option.kwargs):
kwargs[k] = [] kwargs[k] = []
for unk in unknown: for unk in unknown:
kwargs[k].append(unk) kwargs[k].append(unk)
elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]: elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]:
for (name, value) in iterator(module_option.kwargs): for (name, value) in iterator(module_option.kwargs):
if not has_key(last_priority, name) or last_priority[name] <= module_option.priority: if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
if module_option.type is not None: if module_option.type is not None:
value = args[module_option.long] value = args[module_option.long]
last_priority[name] = module_option.priority last_priority[name] = module_option.priority
# Do this manually as argparse doesn't seem to be able to handle hexadecimal values # Do this manually as argparse doesn't seem to be able to handle hexadecimal values
if module_option.type == int: if module_option.type == int:
kwargs[name] = int(value, 0) kwargs[name] = int(value, 0)
elif module_option.type == float: elif module_option.type == float:
kwargs[name] = float(value) kwargs[name] = float(value)
elif module_option.type == dict: elif module_option.type == dict:
if not has_key(kwargs, name): if not has_key(kwargs, name):
kwargs[name] = {} kwargs[name] = {}
kwargs[name][len(kwargs[name])] = value kwargs[name][len(kwargs[name])] = value
elif module_option.type == list: elif module_option.type == list:
if not has_key(kwargs, name): if not has_key(kwargs, name):
kwargs[name] = [] kwargs[name] = []
kwargs[name].append(value) kwargs[name].append(value)
else: else:
kwargs[name] = value kwargs[name] = value
return kwargs return kwargs
def kwargs(self, obj, kwargs): def kwargs(self, obj, kwargs):
''' '''
Processes a module's kwargs. All modules should use this for kwarg processing. Processes a module's kwargs. All modules should use this for kwarg processing.
@obj - An instance of the module (e.g., self) @obj - An instance of the module (e.g., self)
@kwargs - The kwargs passed to the module @kwargs - The kwargs passed to the module
Returns None. Returns None.
''' '''
if hasattr(obj, "KWARGS"): if hasattr(obj, "KWARGS"):
for module_argument in obj.KWARGS: for module_argument in obj.KWARGS:
if has_key(kwargs, module_argument.name): if has_key(kwargs, module_argument.name):
arg_value = kwargs[module_argument.name] arg_value = kwargs[module_argument.name]
else: else:
arg_value = module_argument.default arg_value = module_argument.default
setattr(obj, module_argument.name, arg_value) setattr(obj, module_argument.name, arg_value)
for (k, v) in iterator(kwargs): for (k, v) in iterator(kwargs):
if not hasattr(obj, k): if not hasattr(obj, k):
setattr(obj, k, v) setattr(obj, k, v)
else: else:
raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj)) raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj))
def process_kwargs(obj, kwargs): def process_kwargs(obj, kwargs):
''' '''
Convenience wrapper around binwalk.core.module.Modules.kwargs. Convenience wrapper around binwalk.core.module.Modules.kwargs.
@obj - The class object (an instance of a sub-class of binwalk.core.module.Module). @obj - The class object (an instance of a sub-class of binwalk.core.module.Module).
@kwargs - The kwargs provided to the object's __init__ method. @kwargs - The kwargs provided to the object's __init__ method.
Returns None. Returns None.
''' '''
return Modules().kwargs(obj, kwargs) return Modules().kwargs(obj, kwargs)
def show_help(fd=sys.stdout): def show_help(fd=sys.stdout):
''' '''
Convenience wrapper around binwalk.core.module.Modules.help. Convenience wrapper around binwalk.core.module.Modules.help.
@fd - An object with a write method (e.g., sys.stdout, sys.stderr, etc). @fd - An object with a write method (e.g., sys.stdout, sys.stderr, etc).
Returns None. Returns None.
''' '''
fd.write(Modules().help()) fd.write(Modules().help())
...@@ -7,350 +7,350 @@ from binwalk.core.filter import FilterType ...@@ -7,350 +7,350 @@ from binwalk.core.filter import FilterType
class MagicSignature(object): class MagicSignature(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.offset = 0 self.offset = 0
self.type = '' self.type = ''
self.condition = '' self.condition = ''
self.description = '' self.description = ''
self.length = 0 self.length = 0
for (k,v) in iterator(kwargs): for (k,v) in iterator(kwargs):
try: try:
v = int(v, 0) v = int(v, 0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
setattr(self, k, v) setattr(self, k, v)
class MagicParser(object): class MagicParser(object):
''' '''
Class for loading, parsing and creating libmagic-compatible magic files. Class for loading, parsing and creating libmagic-compatible magic files.
This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object. This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object.
One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string: One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string:
import binwalk import binwalk
bw = binwalk.Binwalk() bw = binwalk.Binwalk()
# Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting # Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting
# temporary file name to the list of magic files in the Binwalk class instance. # temporary file name to the list of magic files in the Binwalk class instance.
bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature')) bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature'))
bw.scan('firmware.bin') bw.scan('firmware.bin')
All magic files generated by this class will be deleted when the class deconstructor is called. All magic files generated by this class will be deleted when the class deconstructor is called.
''' '''
BIG_ENDIAN = 'big' BIG_ENDIAN = 'big'
LITTLE_ENDIAN = 'little' LITTLE_ENDIAN = 'little'
MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n" MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n"
DEFAULT_DISPLAY_NAME = "Raw string signature" DEFAULT_DISPLAY_NAME = "Raw string signature"
WILDCARD = 'x' WILDCARD = 'x'
# If libmagic returns multiple results, they are delimited with this string. # If libmagic returns multiple results, they are delimited with this string.
RESULT_SEPERATOR = "\\012- " RESULT_SEPERATOR = "\\012- "
def __init__(self, filter=None, smart=None): def __init__(self, filter=None, smart=None):
''' '''
Class constructor. Class constructor.
@filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used. @filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used.
@smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used. @smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used.
Returns None. Returns None.
''' '''
self.matches = set([]) self.matches = set([])
self.signatures = {} self.signatures = {}
self.filter = filter self.filter = filter
self.smart = smart self.smart = smart
self.raw_fd = None self.raw_fd = None
self.signature_count = 0 self.signature_count = 0
def __del__(self): def __del__(self):
try: try:
self.cleanup() self.cleanup()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
def rm_magic_files(self): def rm_magic_files(self):
''' '''
Cleans up the temporary magic file(s). Cleans up the temporary magic file(s).
Returns None. Returns None.
''' '''
try: try:
self.fd.close() self.fd.close()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
try: try:
self.raw_fd.close() self.raw_fd.close()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
def cleanup(self): def cleanup(self):
''' '''
Cleans up any tempfiles created by the class instance. Cleans up any tempfiles created by the class instance.
Returns None. Returns None.
''' '''
self.rm_magic_files() self.rm_magic_files()
def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME): def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME):
''' '''
Generates a magic file from a signature string. Generates a magic file from a signature string.
This method is intended to be used once per instance. This method is intended to be used once per instance.
If invoked multiple times, any previously created magic files will be closed and deleted. If invoked multiple times, any previously created magic files will be closed and deleted.
@signature_string - The string signature to search for. @signature_string - The string signature to search for.
@offset - The offset at which the signature should occur. @offset - The offset at which the signature should occur.
@display_name - The text to display when the signature is found. @display_name - The text to display when the signature is found.
Returns the name of the generated temporary magic file. Returns the name of the generated temporary magic file.
''' '''
self.raw_fd = tempfile.NamedTemporaryFile() self.raw_fd = tempfile.NamedTemporaryFile()
self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name)) self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name))
self.raw_fd.seek(0) self.raw_fd.seek(0)
return self.raw_fd.name return self.raw_fd.name
def parse(self, file_name): def parse(self, file_name):
''' '''
Parses magic file(s) and contatenates them into a single temporary magic file Parses magic file(s) and contatenates them into a single temporary magic file
while simultaneously removing filtered signatures. while simultaneously removing filtered signatures.
@file_name - Magic file, or list of magic files, to parse. @file_name - Magic file, or list of magic files, to parse.
Returns the name of the generated temporary magic file, which will be automatically Returns the name of the generated temporary magic file, which will be automatically
deleted when the class deconstructor is called. deleted when the class deconstructor is called.
''' '''
self.matches = set([]) self.matches = set([])
self.signatures = {} self.signatures = {}
self.signature_count = 0 self.signature_count = 0
self.fd = tempfile.NamedTemporaryFile() self.fd = tempfile.NamedTemporaryFile()
if isinstance(file_name, type([])): if isinstance(file_name, type([])):
files = file_name files = file_name
else: else:
files = [file_name] files = [file_name]
for fname in files: for fname in files:
if os.path.exists(fname): if os.path.exists(fname):
self.parse_file(fname) self.parse_file(fname)
else: else:
sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname) sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname)
self.fd.seek(0) self.fd.seek(0)
return self.fd.name return self.fd.name
def parse_file(self, file_name): def parse_file(self, file_name):
''' '''
Parses a magic file and appends valid signatures to the temporary magic file, as allowed Parses a magic file and appends valid signatures to the temporary magic file, as allowed
by the existing filter rules. by the existing filter rules.
@file_name - Magic file to parse. @file_name - Magic file to parse.
Returns None. Returns None.
''' '''
# Default to not including signature entries until we've # Default to not including signature entries until we've
# found what looks like a valid entry. # found what looks like a valid entry.
include = False include = False
line_count = 0 line_count = 0
try: try:
for line in open(file_name, 'r').readlines(): for line in open(file_name, 'r').readlines():
line_count += 1 line_count += 1
# Check if this is the first line of a signature entry # Check if this is the first line of a signature entry
entry = self._parse_line(line) entry = self._parse_line(line)
if entry is not None: if entry is not None:
# If this signature is marked for inclusion, include it. # If this signature is marked for inclusion, include it.
if self.filter.filter(entry.description) == FilterType.FILTER_INCLUDE: if self.filter.filter(entry.description) == FilterType.FILTER_INCLUDE:
include = True include = True
self.signature_count += 1 self.signature_count += 1
if not has_key(self.signatures, entry.offset): if not has_key(self.signatures, entry.offset):
self.signatures[entry.offset] = [] self.signatures[entry.offset] = []
if entry.condition not in self.signatures[entry.offset]: if entry.condition not in self.signatures[entry.offset]:
self.signatures[entry.offset].append(entry.condition) self.signatures[entry.offset].append(entry.condition)
else: else:
include = False include = False
# Keep writing lines of the signature to the temporary magic file until # Keep writing lines of the signature to the temporary magic file until
# we detect a signature that should not be included. # we detect a signature that should not be included.
if include: if include:
self.fd.write(str2bytes(line)) self.fd.write(str2bytes(line))
self.build_signature_set() self.build_signature_set()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e))) raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e)))
def _parse_line(self, line): def _parse_line(self, line):
''' '''
Parses a signature line into its four parts (offset, type, condition and description), Parses a signature line into its four parts (offset, type, condition and description),
looking for the first line of a given signature. looking for the first line of a given signature.
@line - The signature line to parse. @line - The signature line to parse.
Returns a dictionary with the respective line parts populated if the line is the first of a signature. Returns a dictionary with the respective line parts populated if the line is the first of a signature.
Returns a dictionary with all parts set to None if the line is not the first of a signature. Returns a dictionary with all parts set to None if the line is not the first of a signature.
''' '''
entry = MagicSignature() entry = MagicSignature()
# Quick and dirty pre-filter. We are only concerned with the first line of a # Quick and dirty pre-filter. We are only concerned with the first line of a
# signature, which will always start with a number. Make sure the first byte of # signature, which will always start with a number. Make sure the first byte of
# the line is a number; if not, don't process. # the line is a number; if not, don't process.
if line[:1] < '0' or line[:1] > '9': if line[:1] < '0' or line[:1] > '9':
return None return None
try: try:
# Split the line into white-space separated parts. # Split the line into white-space separated parts.
# For this to work properly, replace escaped spaces ('\ ') with '\x20'. # For this to work properly, replace escaped spaces ('\ ') with '\x20'.
# This means the same thing, but doesn't confuse split(). # This means the same thing, but doesn't confuse split().
line_parts = line.replace('\\ ', '\\x20').split() line_parts = line.replace('\\ ', '\\x20').split()
entry.offset = line_parts[0] entry.offset = line_parts[0]
entry.type = line_parts[1] entry.type = line_parts[1]
# The condition line may contain escaped sequences, so be sure to decode it properly. # The condition line may contain escaped sequences, so be sure to decode it properly.
entry.condition = string_decode(line_parts[2]) entry.condition = string_decode(line_parts[2])
entry.description = ' '.join(line_parts[3:]) entry.description = ' '.join(line_parts[3:])
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("%s :: %s", (str(e), line)) raise Exception("%s :: %s", (str(e), line))
# We've already verified that the first character in this line is a number, so this *shouldn't* # We've already verified that the first character in this line is a number, so this *shouldn't*
# throw an exception, but let's catch it just in case... # throw an exception, but let's catch it just in case...
try: try:
entry.offset = int(entry.offset, 0) entry.offset = int(entry.offset, 0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("%s :: %s", (str(e), line)) raise Exception("%s :: %s", (str(e), line))
# If this is a string, get the length of the string # If this is a string, get the length of the string
if 'string' in entry.type or entry.condition == self.WILDCARD: if 'string' in entry.type or entry.condition == self.WILDCARD:
entry.length = len(entry.condition) entry.length = len(entry.condition)
# Else, we need to jump through a few more hoops... # Else, we need to jump through a few more hoops...
else: else:
# Default to little endian, unless the type field starts with 'be'. # Default to little endian, unless the type field starts with 'be'.
# This assumes that we're running on a little endian system... # This assumes that we're running on a little endian system...
if entry.type.startswith('be'): if entry.type.startswith('be'):
endianess = self.BIG_ENDIAN endianess = self.BIG_ENDIAN
else: else:
endianess = self.LITTLE_ENDIAN endianess = self.LITTLE_ENDIAN
# Try to convert the condition to an integer. This does not allow # Try to convert the condition to an integer. This does not allow
# for more advanced conditions for the first line of a signature, # for more advanced conditions for the first line of a signature,
# but needing that is rare. # but needing that is rare.
try: try:
intval = int(entry.condition.strip('L'), 0) intval = int(entry.condition.strip('L'), 0)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e))) raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e)))
# How long is the field type? # How long is the field type?
if entry.type == 'byte': if entry.type == 'byte':
entry.length = 1 entry.length = 1
elif 'short' in entry.type: elif 'short' in entry.type:
entry.length = 2 entry.length = 2
elif 'long' in entry.type: elif 'long' in entry.type:
entry.length = 4 entry.length = 4
elif 'quad' in entry.type: elif 'quad' in entry.type:
entry.length = 8 entry.length = 8
# Convert the integer value to a string of the appropriate endianess # Convert the integer value to a string of the appropriate endianess
entry.condition = self._to_string(intval, entry.length, endianess) entry.condition = self._to_string(intval, entry.length, endianess)
return entry return entry
def build_signature_set(self): def build_signature_set(self):
''' '''
Builds a set of signature tuples. Builds a set of signature tuples.
Returns a set of tuples in the format: [(<signature offset>, [signature regex])]. Returns a set of tuples in the format: [(<signature offset>, [signature regex])].
''' '''
self.signature_set = set() self.signature_set = set()
for (offset, sigs) in iterator(self.signatures): for (offset, sigs) in iterator(self.signatures):
for sig in sigs: for sig in sigs:
if sig == self.WILDCARD: if sig == self.WILDCARD:
sig = re.compile('.') sig = re.compile('.')
else: else:
sig = re.compile(re.escape(sig)) sig = re.compile(re.escape(sig))
self.signature_set.add((offset, sig)) self.signature_set.add((offset, sig))
return self.signature_set return self.signature_set
def find_signature_candidates(self, data, end): def find_signature_candidates(self, data, end):
''' '''
Finds candidate signatures inside of the data buffer. Finds candidate signatures inside of the data buffer.
Called internally by Binwalk.single_scan. Called internally by Binwalk.single_scan.
@data - Data to scan for candidate signatures. @data - Data to scan for candidate signatures.
@end - Don't look for signatures beyond this offset. @end - Don't look for signatures beyond this offset.
Returns an ordered list of offsets inside of data at which candidate offsets were found. Returns an ordered list of offsets inside of data at which candidate offsets were found.
''' '''
candidate_offsets = [] candidate_offsets = []
for (offset, regex) in self.signature_set: for (offset, regex) in self.signature_set:
candidate_offsets += [(match.start() - offset) for match in regex.finditer(data) if (match.start() - offset) < end and (match.start() - offset) >= 0] candidate_offsets += [(match.start() - offset) for match in regex.finditer(data) if (match.start() - offset) < end and (match.start() - offset) >= 0]
candidate_offsets = list(set(candidate_offsets)) candidate_offsets = list(set(candidate_offsets))
candidate_offsets.sort() candidate_offsets.sort()
return candidate_offsets return candidate_offsets
def _to_string(self, value, size, endianess): def _to_string(self, value, size, endianess):
''' '''
Converts an integer value into a raw string. Converts an integer value into a raw string.
@value - The integer value to convert. @value - The integer value to convert.
@size - Size, in bytes, of the integer value. @size - Size, in bytes, of the integer value.
@endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN. @endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN.
Returns a raw string containing value. Returns a raw string containing value.
''' '''
data = "" data = ""
for i in range(0, size): for i in range(0, size):
data += chr((value >> (8*i)) & 0xFF) data += chr((value >> (8*i)) & 0xFF)
if endianess != self.LITTLE_ENDIAN: if endianess != self.LITTLE_ENDIAN:
data = data[::-1] data = data[::-1]
return data return data
def split(self, data): def split(self, data):
''' '''
Splits multiple libmagic results in the data string into a list of separate results. Splits multiple libmagic results in the data string into a list of separate results.
@data - Data string returned from libmagic. @data - Data string returned from libmagic.
Returns a list of result strings. Returns a list of result strings.
''' '''
try: try:
return data.split(self.RESULT_SEPERATOR) return data.split(self.RESULT_SEPERATOR)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
return [] return []
...@@ -5,180 +5,180 @@ import binwalk.core.settings ...@@ -5,180 +5,180 @@ import binwalk.core.settings
from binwalk.core.compat import * from binwalk.core.compat import *
class Plugins: class Plugins:
''' '''
Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan. Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan.
An instance of this class is available during a scan via the Binwalk.plugins object. An instance of this class is available during a scan via the Binwalk.plugins object.
Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'. Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'.
The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class. The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class.
The Plugin class constructor is called once prior to scanning a file or set of files. The Plugin class constructor is called once prior to scanning a file or set of files.
The Plugin class destructor (__del__) is called once after scanning all files. The Plugin class destructor (__del__) is called once after scanning all files.
The Plugin class can define one or all of the following callback methods: The Plugin class can define one or all of the following callback methods:
o pre_scan(self, fd) o pre_scan(self, fd)
This method is called prior to running a scan against a file. It is passed the file object of This method is called prior to running a scan against a file. It is passed the file object of
the file about to be scanned. the file about to be scanned.
o pre_parser(self, result) o pre_parser(self, result)
This method is called every time any result - valid or invalid - is found in the file being scanned. This method is called every time any result - valid or invalid - is found in the file being scanned.
It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic. It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic.
The contents of this dictionary key may be modified as necessary by the plugin. The contents of this dictionary key may be modified as necessary by the plugin.
o callback(self, results) o callback(self, results)
This method is called every time a valid result is found in the file being scanned. It is passed a This method is called every time a valid result is found in the file being scanned. It is passed a
dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback
function, and its contents may be modified as necessary by the plugin. function, and its contents may be modified as necessary by the plugin.
o post_scan(self, fd) o post_scan(self, fd)
This method is called after running a scan against a file, but before the file has been closed. This method is called after running a scan against a file, but before the file has been closed.
It is passed the file object of the scanned file. It is passed the file object of the scanned file.
Values returned by pre_scan affect all results during the scan of that particular file. Values returned by pre_scan affect all results during the scan of that particular file.
Values returned by callback affect only that specific scan result. Values returned by callback affect only that specific scan result.
Values returned by post_scan are ignored since the scan of that file has already been completed. Values returned by post_scan are ignored since the scan of that file has already been completed.
By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by
default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the
plugin will only be loaded if it is explicitly named in the plugins whitelist. plugin will only be loaded if it is explicitly named in the plugins whitelist.
''' '''
SCAN = 'scan' SCAN = 'scan'
PRESCAN = 'pre_scan' PRESCAN = 'pre_scan'
POSTSCAN = 'post_scan' POSTSCAN = 'post_scan'
PLUGIN = 'Plugin' PLUGIN = 'Plugin'
MODULE_EXTENSION = '.py' MODULE_EXTENSION = '.py'
def __init__(self, parent=None): def __init__(self, parent=None):
self.scan = [] self.scan = []
self.pre_scan = [] self.pre_scan = []
self.post_scan = [] self.post_scan = []
self.parent = parent self.parent = parent
self.settings = binwalk.core.settings.Settings() self.settings = binwalk.core.settings.Settings()
def __del__(self): def __del__(self):
pass pass
def __enter__(self): def __enter__(self):
return self return self
def __exit__(self, t, v, traceback): def __exit__(self, t, v, traceback):
pass pass
def _call_plugins(self, callback_list, arg): def _call_plugins(self, callback_list, arg):
for callback in callback_list: for callback in callback_list:
try: try:
callback(arg) callback(arg)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
sys.stderr.write("WARNING: %s.%s failed: %s\n" % (callback.__module__, callback.__name__, e)) sys.stderr.write("WARNING: %s.%s failed: %s\n" % (callback.__module__, callback.__name__, e))
def list_plugins(self): def list_plugins(self):
''' '''
Obtain a list of all user and system plugin modules. Obtain a list of all user and system plugin modules.
Returns a dictionary of: Returns a dictionary of:
{ {
'user' : { 'user' : {
'modules' : [list, of, module, names], 'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'}, 'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True}, 'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory" 'path' : "path/to/module/plugin/directory"
}, },
'system' : { 'system' : {
'modules' : [list, of, module, names], 'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'}, 'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True}, 'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory" 'path' : "path/to/module/plugin/directory"
} }
} }
''' '''
plugins = { plugins = {
'user' : { 'user' : {
'modules' : [], 'modules' : [],
'descriptions' : {}, 'descriptions' : {},
'enabled' : {}, 'enabled' : {},
'path' : None, 'path' : None,
}, },
'system' : { 'system' : {
'modules' : [], 'modules' : [],
'descriptions' : {}, 'descriptions' : {},
'enabled' : {}, 'enabled' : {},
'path' : None, 'path' : None,
} }
} }
for key in plugins.keys(): for key in plugins.keys():
plugins[key]['path'] = self.settings.paths[key][self.settings.PLUGINS] plugins[key]['path'] = self.settings.paths[key][self.settings.PLUGINS]
for file_name in os.listdir(plugins[key]['path']): for file_name in os.listdir(plugins[key]['path']):
if file_name.endswith(self.MODULE_EXTENSION): if file_name.endswith(self.MODULE_EXTENSION):
module = file_name[:-len(self.MODULE_EXTENSION)] module = file_name[:-len(self.MODULE_EXTENSION)]
plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name)) plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name))
plugin_class = getattr(plugin, self.PLUGIN) plugin_class = getattr(plugin, self.PLUGIN)
plugins[key]['enabled'][module] = True plugins[key]['enabled'][module] = True
plugins[key]['modules'].append(module) plugins[key]['modules'].append(module)
try: try:
plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0] plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0]
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
plugins[key]['descriptions'][module] = 'No description' plugins[key]['descriptions'][module] = 'No description'
return plugins return plugins
def load_plugins(self): def load_plugins(self):
plugins = self.list_plugins() plugins = self.list_plugins()
self._load_plugin_modules(plugins['user']) self._load_plugin_modules(plugins['user'])
self._load_plugin_modules(plugins['system']) self._load_plugin_modules(plugins['system'])
def _load_plugin_modules(self, plugins): def _load_plugin_modules(self, plugins):
for module in plugins['modules']: for module in plugins['modules']:
file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION) file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION)
try: try:
plugin = imp.load_source(module, file_path) plugin = imp.load_source(module, file_path)
plugin_class = getattr(plugin, self.PLUGIN) plugin_class = getattr(plugin, self.PLUGIN)
class_instance = plugin_class(self.parent) class_instance = plugin_class(self.parent)
try: try:
self.scan.append(getattr(class_instance, self.SCAN)) self.scan.append(getattr(class_instance, self.SCAN))
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
pass pass
try: try:
self.pre_scan.append(getattr(class_instance, self.PRESCAN)) self.pre_scan.append(getattr(class_instance, self.PRESCAN))
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
pass pass
try: try:
self.post_scan.append(getattr(class_instance, self.POSTSCAN)) self.post_scan.append(getattr(class_instance, self.POSTSCAN))
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
pass pass
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e))) sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e)))
def pre_scan_callbacks(self, obj): def pre_scan_callbacks(self, obj):
return self._call_plugins(self.pre_scan, obj) return self._call_plugins(self.pre_scan, obj)
def post_scan_callbacks(self, obj): def post_scan_callbacks(self, obj):
return self._call_plugins(self.post_scan, obj) return self._call_plugins(self.post_scan, obj)
def scan_callbacks(self, obj): def scan_callbacks(self, obj):
return self._call_plugins(self.scan, obj) return self._call_plugins(self.scan, obj)
...@@ -4,310 +4,310 @@ from binwalk.core.compat import * ...@@ -4,310 +4,310 @@ from binwalk.core.compat import *
from binwalk.core.common import get_quoted_strings, MathExpression from binwalk.core.common import get_quoted_strings, MathExpression
class Tag(object): class Tag(object):
TAG_DELIM_START = "{" TAG_DELIM_START = "{"
TAG_DELIM_END = "}" TAG_DELIM_END = "}"
TAG_ARG_SEPERATOR = ":" TAG_ARG_SEPERATOR = ":"
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.name = None self.name = None
self.keyword = None self.keyword = None
self.type = None self.type = None
self.handler = None self.handler = None
self.tag = None self.tag = None
self.default = None self.default = None
for (k,v) in iterator(kwargs): for (k,v) in iterator(kwargs):
setattr(self, k, v) setattr(self, k, v)
if self.type == int: if self.type == int:
self.default = 0 self.default = 0
elif self.type == str: elif self.type == str:
self.default = '' self.default = ''
if self.keyword is not None: if self.keyword is not None:
self.tag = self.TAG_DELIM_START + self.keyword self.tag = self.TAG_DELIM_START + self.keyword
if self.type is None: if self.type is None:
self.tag += self.TAG_DELIM_END self.tag += self.TAG_DELIM_END
else: else:
self.tag += self.TAG_ARG_SEPERATOR self.tag += self.TAG_ARG_SEPERATOR
if self.handler is None: if self.handler is None:
if self.type == int: if self.type == int:
self.handler = 'get_math_arg' self.handler = 'get_math_arg'
elif self.type == str: elif self.type == str:
self.handler = 'get_keyword_arg' self.handler = 'get_keyword_arg'
class Signature(object): class Signature(object):
''' '''
Class for parsing smart signature tags in libmagic result strings. Class for parsing smart signature tags in libmagic result strings.
This class is intended for internal use only, but a list of supported 'smart keywords' that may be used This class is intended for internal use only, but a list of supported 'smart keywords' that may be used
in magic files is available via the SmartSignature.KEYWORDS dictionary: in magic files is available via the SmartSignature.KEYWORDS dictionary:
from binwalk import SmartSignature from binwalk import SmartSignature
for tag in SmartSignature.TAGS: for tag in SmartSignature.TAGS:
print tag.keyword print tag.keyword
''' '''
TAGS = [ TAGS = [
Tag(name='raw-string', keyword='raw-string', type=str, handler='parse_raw_string'), Tag(name='raw-string', keyword='raw-string', type=str, handler='parse_raw_string'),
Tag(name='string-len', keyword='string-len', type=str, handler='parse_string_len'), Tag(name='string-len', keyword='string-len', type=str, handler='parse_string_len'),
Tag(name='math', keyword='math', type=int, handler='parse_math'), Tag(name='math', keyword='math', type=int, handler='parse_math'),
Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'), Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'),
Tag(name='jump', keyword='jump-to-offset', type=int), Tag(name='jump', keyword='jump-to-offset', type=int),
Tag(name='name', keyword='file-name', type=str), Tag(name='name', keyword='file-name', type=str),
Tag(name='size', keyword='file-size', type=int), Tag(name='size', keyword='file-size', type=int),
Tag(name='adjust', keyword='offset-adjust', type=int), Tag(name='adjust', keyword='offset-adjust', type=int),
Tag(name='delay', keyword='extract-delay', type=str), Tag(name='delay', keyword='extract-delay', type=str),
Tag(name='year', keyword='file-year', type=str), Tag(name='year', keyword='file-year', type=str),
Tag(name='epoch', keyword='file-epoch', type=int), Tag(name='epoch', keyword='file-epoch', type=int),
Tag(name='raw-size', keyword='raw-string-length', type=int), Tag(name='raw-size', keyword='raw-string-length', type=int),
Tag(name='raw-replace', keyword='raw-replace'), Tag(name='raw-replace', keyword='raw-replace'),
Tag(name='string-len-replace', keyword='string-len'), Tag(name='string-len-replace', keyword='string-len'),
] ]
def __init__(self, filter, ignore_smart_signatures=False): def __init__(self, filter, ignore_smart_signatures=False):
''' '''
Class constructor. Class constructor.
@filter - Instance of the MagicFilter class. @filter - Instance of the MagicFilter class.
@ignore_smart_signatures - Set to True to ignore smart signature keywords. @ignore_smart_signatures - Set to True to ignore smart signature keywords.
Returns None. Returns None.
''' '''
self.filter = filter self.filter = filter
self.valid = True self.valid = True
self.last_one_of_many = None self.last_one_of_many = None
self.ignore_smart_signatures = ignore_smart_signatures self.ignore_smart_signatures = ignore_smart_signatures
def parse(self, data): def parse(self, data):
''' '''
Parse a given data string for smart signature keywords. If any are found, interpret them and strip them. Parse a given data string for smart signature keywords. If any are found, interpret them and strip them.
@data - String to parse, as returned by libmagic. @data - String to parse, as returned by libmagic.
Returns a dictionary of parsed values. Returns a dictionary of parsed values.
''' '''
results = {} results = {}
self.valid = True self.valid = True
if data: if data:
for tag in self.TAGS: for tag in self.TAGS:
if tag.handler is not None: if tag.handler is not None:
(d, arg) = getattr(self, tag.handler)(data, tag) (d, arg) = getattr(self, tag.handler)(data, tag)
if not self.ignore_smart_signatures: if not self.ignore_smart_signatures:
data = d data = d
if isinstance(arg, type(False)) and arg == False and not self.ignore_smart_signatures: if isinstance(arg, type(False)) and arg == False and not self.ignore_smart_signatures:
self.valid = False self.valid = False
elif tag.type is not None: elif tag.type is not None:
if self.ignore_smart_signatures: if self.ignore_smart_signatures:
results[tag.name] = tag.default results[tag.name] = tag.default
else: else:
results[tag.name] = arg results[tag.name] = arg
if self.ignore_smart_signatures: if self.ignore_smart_signatures:
results['description'] = data results['description'] = data
else: else:
results['description'] = self.strip_tags(data) results['description'] = self.strip_tags(data)
else: else:
self.valid = False self.valid = False
results['valid'] = self.valid results['valid'] = self.valid
return binwalk.core.module.Result(**results) return binwalk.core.module.Result(**results)
def tag_lookup(self, keyword): def tag_lookup(self, keyword):
for tag in self.TAGS: for tag in self.TAGS:
if tag.keyword == keyword: if tag.keyword == keyword:
return tag return tag
return None return None
def is_valid(self, data): def is_valid(self, data):
''' '''
Validates that result data does not contain smart keywords in file-supplied strings. Validates that result data does not contain smart keywords in file-supplied strings.
@data - Data string to validate. @data - Data string to validate.
Returns True if data is OK. Returns True if data is OK.
Returns False if data is not OK. Returns False if data is not OK.
''' '''
# All strings printed from the target file should be placed in strings, else there is # All strings printed from the target file should be placed in strings, else there is
# no way to distinguish between intended keywords and unintended keywords. Get all the # no way to distinguish between intended keywords and unintended keywords. Get all the
# quoted strings. # quoted strings.
quoted_data = get_quoted_strings(data) quoted_data = get_quoted_strings(data)
# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter # Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
if quoted_data and Tag.TAG_DELIM_START in quoted_data: if quoted_data and Tag.TAG_DELIM_START in quoted_data:
# If so, check to see if the quoted data contains any of our keywords. # If so, check to see if the quoted data contains any of our keywords.
# If any keywords are found inside of quoted data, consider the keywords invalid. # If any keywords are found inside of quoted data, consider the keywords invalid.
for tag in self.TAGS: for tag in self.TAGS:
if tag.tag in quoted_data: if tag.tag in quoted_data:
return False return False
return True return True
def safe_string(self, data): def safe_string(self, data):
''' '''
Strips out quoted data (i.e., data taken directly from a file). Strips out quoted data (i.e., data taken directly from a file).
''' '''
quoted_string = get_quoted_strings(data) quoted_string = get_quoted_strings(data)
if quoted_string: if quoted_string:
data = data.replace(quoted_string, "") data = data.replace(quoted_string, "")
return data return data
def one_of_many(self, data, tag): def one_of_many(self, data, tag):
''' '''
Determines if a given data string is one result of many. Determines if a given data string is one result of many.
@data - String result data. @data - String result data.
Returns False if the string result is one of many and should not be displayed. Returns False if the string result is one of many and should not be displayed.
Returns True if the string result is not one of many and should be displayed. Returns True if the string result is not one of many and should be displayed.
''' '''
if self.filter.valid_result(data): if self.filter.valid_result(data):
if self.last_one_of_many is not None and data.startswith(self.last_one_of_many): if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
return (data, False) return (data, False)
if tag.tag in data: if tag.tag in data:
# Only match on the data before the first comma, as that is typically unique and static # Only match on the data before the first comma, as that is typically unique and static
self.last_one_of_many = data.split(',')[0] self.last_one_of_many = data.split(',')[0]
else: else:
self.last_one_of_many = None self.last_one_of_many = None
return (data, True) return (data, True)
def get_keyword_arg(self, data, tag): def get_keyword_arg(self, data, tag):
''' '''
Retrieves the argument for keywords that specify arguments. Retrieves the argument for keywords that specify arguments.
@data - String result data, as returned by libmagic. @data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS. @keyword - Keyword index in KEYWORDS.
Returns the argument string value on success. Returns the argument string value on success.
Returns a blank string on failure. Returns a blank string on failure.
''' '''
arg = '' arg = ''
safe_data = self.safe_string(data) safe_data = self.safe_string(data)
if tag.tag in safe_data: if tag.tag in safe_data:
arg = safe_data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0] arg = safe_data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0]
return (data, arg) return (data, arg)
def get_math_arg(self, data, tag): def get_math_arg(self, data, tag):
''' '''
Retrieves the argument for keywords that specifiy mathematical expressions as arguments. Retrieves the argument for keywords that specifiy mathematical expressions as arguments.
@data - String result data, as returned by libmagic. @data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS. @keyword - Keyword index in KEYWORDS.
Returns the resulting calculated value. Returns the resulting calculated value.
''' '''
value = 0 value = 0
(data, arg) = self.get_keyword_arg(data, tag) (data, arg) = self.get_keyword_arg(data, tag)
if arg: if arg:
value = MathExpression(arg).value value = MathExpression(arg).value
if value is None: if value is None:
value = 0 value = 0
self.valid = False self.valid = False
return (data, value) return (data, value)
def parse_math(self, data, tag): def parse_math(self, data, tag):
''' '''
Replace math keywords with the requested values. Replace math keywords with the requested values.
@data - String result data. @data - String result data.
Returns the modified string result data. Returns the modified string result data.
''' '''
while tag.keyword in data: while tag.keyword in data:
(data, arg) = self.get_keyword_arg(data, tag.name) (data, arg) = self.get_keyword_arg(data, tag.name)
v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END) v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END)
math_value = "%d" % self.get_math_arg(data, tag.name) math_value = "%d" % self.get_math_arg(data, tag.name)
data = data.replace(v, math_value) data = data.replace(v, math_value)
return (data, None) return (data, None)
def parse_raw_string(self, data, raw_str_tag): def parse_raw_string(self, data, raw_str_tag):
''' '''
Process strings that aren't NULL byte terminated, but for which we know the string length. Process strings that aren't NULL byte terminated, but for which we know the string length.
This should be called prior to any other smart parsing functions. This should be called prior to any other smart parsing functions.
@data - String to parse. @data - String to parse.
Returns a parsed string. Returns a parsed string.
''' '''
if self.is_valid(data): if self.is_valid(data):
raw_str_length_tag = self.tag_lookup('raw-string-length') raw_str_length_tag = self.tag_lookup('raw-string-length')
raw_replace_tag = self.tag_lookup('raw-replace') raw_replace_tag = self.tag_lookup('raw-replace')
# Get the raw string keyword arg # Get the raw string keyword arg
(data, raw_string) = self.get_keyword_arg(data, raw_str_tag) (data, raw_string) = self.get_keyword_arg(data, raw_str_tag)
# Was a raw string keyword specified? # Was a raw string keyword specified?
if raw_string: if raw_string:
# Get the raw string length arg # Get the raw string length arg
(data, raw_size) = self.get_math_arg(data, raw_str_length_tag) (data, raw_size) = self.get_math_arg(data, raw_str_length_tag)
# Replace all instances of raw-replace in data with raw_string[:raw_size] # Replace all instances of raw-replace in data with raw_string[:raw_size]
# Also strip out everything after the raw-string keyword, including the keyword itself. # Also strip out everything after the raw-string keyword, including the keyword itself.
# Failure to do so may (will) result in non-printable characters and this string will be # Failure to do so may (will) result in non-printable characters and this string will be
# marked as invalid when it shouldn't be. # marked as invalid when it shouldn't be.
data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"') data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"')
return (data, True) return (data, True)
def parse_string_len(self, data, str_len_tag): def parse_string_len(self, data, str_len_tag):
''' '''
Process {string-len} macros. Process {string-len} macros.
@data - String to parse. @data - String to parse.
Returns parsed data string. Returns parsed data string.
''' '''
if not self.ignore_smart_signatures and self.is_valid(data): if not self.ignore_smart_signatures and self.is_valid(data):
str_len_replace_tag = self.tag_lookup('string-len-replace') str_len_replace_tag = self.tag_lookup('string-len-replace')
# Get the raw string keyword arg # Get the raw string keyword arg
(data, raw_string) = self.get_keyword_arg(data, str_len_tag) (data, raw_string) = self.get_keyword_arg(data, str_len_tag)
# Was a string-len keyword specified? # Was a string-len keyword specified?
if raw_string: if raw_string:
# Get the string length # Get the string length
try: try:
string_length = '%d' % len(raw_string) string_length = '%d' % len(raw_string)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
string_length = '0' string_length = '0'
# Strip out *everything* after the string-len keyword, including the keyword itself. # Strip out *everything* after the string-len keyword, including the keyword itself.
# Failure to do so can potentially allow keyword injection from a maliciously created file. # Failure to do so can potentially allow keyword injection from a maliciously created file.
data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length) data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length)
return (data, True) return (data, True)
def strip_tags(self, data): def strip_tags(self, data):
''' '''
Strips the smart tags from a result string. Strips the smart tags from a result string.
@data - String result data. @data - String result data.
Returns a sanitized string. Returns a sanitized string.
''' '''
if not self.ignore_smart_signatures: if not self.ignore_smart_signatures:
for tag in self.TAGS: for tag in self.TAGS:
start = data.find(tag.tag) start = data.find(tag.tag)
if start != -1: if start != -1:
end = data[start:].find(tag.TAG_DELIM_END) end = data[start:].find(tag.TAG_DELIM_END)
if end != -1: if end != -1:
data = data.replace(data[start:start+end+1], "") data = data.replace(data[start:start+end+1], "")
return data return data
0 belong x Hex: 0x%.8X
#0 string x String: %s
#0 lequad x Little Endian Quad: %lld
#0 bequad x Big Endian Quad: %lld
0 lelong x Little Endian Long: %d
0 belong x Big Endian Long: %d
0 leshort x Little Endian Short: %d
0 beshort x Big Endian Short: %d
0 ledate x Little Endian Date: %s
0 bedate x Big Endian Date: %s
...@@ -4,300 +4,300 @@ from binwalk.core.common import BlockFile ...@@ -4,300 +4,300 @@ from binwalk.core.common import BlockFile
from binwalk.core.module import Module, Option, Kwarg from binwalk.core.module import Module, Option, Kwarg
class Plotter(Module): class Plotter(Module):
''' '''
Base class for visualizing binaries in Qt. Base class for visualizing binaries in Qt.
Other plotter classes are derived from this. Other plotter classes are derived from this.
''' '''
VIEW_DISTANCE = 1024 VIEW_DISTANCE = 1024
MAX_2D_PLOT_POINTS = 12500 MAX_2D_PLOT_POINTS = 12500
MAX_3D_PLOT_POINTS = 25000 MAX_3D_PLOT_POINTS = 25000
TITLE = "Binary Visualization" TITLE = "Binary Visualization"
CLI = [ CLI = [
Option(short='3', Option(short='3',
long='3D', long='3D',
kwargs={'axis' : 3, 'enabled' : True}, kwargs={'axis' : 3, 'enabled' : True},
description='Generate a 3D binary visualization'), description='Generate a 3D binary visualization'),
Option(short='2', Option(short='2',
long='2D', long='2D',
kwargs={'axis' : 2, 'enabled' : True}, kwargs={'axis' : 2, 'enabled' : True},
description='Project data points onto 3D cube walls only'), description='Project data points onto 3D cube walls only'),
Option(short='Z', Option(short='Z',
long='max-points', long='max-points',
type=int, type=int,
kwargs={'max_points' : 0}, kwargs={'max_points' : 0},
description='Set the maximum number of plotted data points'), description='Set the maximum number of plotted data points'),
Option(short='V', Option(short='V',
long='show-grids', long='show-grids',
kwargs={'show_grids' : True}, kwargs={'show_grids' : True},
description='Display the x-y-z grids in the resulting plot'), description='Display the x-y-z grids in the resulting plot'),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='axis', default=3), Kwarg(name='axis', default=3),
Kwarg(name='max_points', default=0), Kwarg(name='max_points', default=0),
Kwarg(name='show_grids', default=False), Kwarg(name='show_grids', default=False),
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
] ]
# There isn't really any useful data to print to console. Disable header and result output. # There isn't really any useful data to print to console. Disable header and result output.
HEADER = None HEADER = None
RESULT = None RESULT = None
def init(self): def init(self):
import pyqtgraph.opengl as gl import pyqtgraph.opengl as gl
from pyqtgraph.Qt import QtGui from pyqtgraph.Qt import QtGui
self.verbose = self.config.verbose self.verbose = self.config.verbose
self.offset = self.config.offset self.offset = self.config.offset
self.length = self.config.length self.length = self.config.length
self.plane_count = -1 self.plane_count = -1
self.plot_points = None self.plot_points = None
if self.axis == 2: if self.axis == 2:
self.MAX_PLOT_POINTS = self.MAX_2D_PLOT_POINTS self.MAX_PLOT_POINTS = self.MAX_2D_PLOT_POINTS
self._generate_data_point = self._generate_2d_data_point self._generate_data_point = self._generate_2d_data_point
elif self.axis == 3: elif self.axis == 3:
self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS
self._generate_data_point = self._generate_3d_data_point self._generate_data_point = self._generate_3d_data_point
else: else:
raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis) raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis)
if not self.max_points: if not self.max_points:
self.max_points = self.MAX_PLOT_POINTS self.max_points = self.MAX_PLOT_POINTS
self.app = QtGui.QApplication([]) self.app = QtGui.QApplication([])
self.window = gl.GLViewWidget() self.window = gl.GLViewWidget()
self.window.opts['distance'] = self.VIEW_DISTANCE self.window.opts['distance'] = self.VIEW_DISTANCE
if len(self.config.target_files) == 1: if len(self.config.target_files) == 1:
self.window.setWindowTitle(self.config.target_files[0].name) self.window.setWindowTitle(self.config.target_files[0].name)
def _print(self, message): def _print(self, message):
''' '''
Print console messages. For internal use only. Print console messages. For internal use only.
''' '''
if self.verbose: if self.verbose:
print(message) print(message)
def _generate_plot_points(self, data_points): def _generate_plot_points(self, data_points):
''' '''
Generates plot points from a list of data points. Generates plot points from a list of data points.
@data_points - A dictionary containing each unique point and its frequency of occurance. @data_points - A dictionary containing each unique point and its frequency of occurance.
Returns a set of plot points. Returns a set of plot points.
''' '''
total = 0 total = 0
min_weight = 0 min_weight = 0
weightings = {} weightings = {}
plot_points = {} plot_points = {}
# If the number of data points exceeds the maximum number of allowed data points, use a # If the number of data points exceeds the maximum number of allowed data points, use a
# weighting system to eliminate data points that occur less freqently. # weighting system to eliminate data points that occur less freqently.
if sum(data_points.itervalues()) > self.max_points: if sum(data_points.itervalues()) > self.max_points:
# First, generate a set of weight values 1 - 10 # First, generate a set of weight values 1 - 10
for i in range(1, 11): for i in range(1, 11):
weightings[i] = 0 weightings[i] = 0
# Go through every data point and how many times that point occurs # Go through every data point and how many times that point occurs
for (point, count) in iterator(data_points): for (point, count) in iterator(data_points):
# For each data point, compare it to each remaining weight value # For each data point, compare it to each remaining weight value
for w in get_keys(weightings): for w in get_keys(weightings):
# If the number of times this data point occurred is >= the weight value, # If the number of times this data point occurred is >= the weight value,
# then increment the weight value. Since weight values are ordered lowest # then increment the weight value. Since weight values are ordered lowest
# to highest, this means that more frequent data points also increment lower # to highest, this means that more frequent data points also increment lower
# weight values. Thus, the more high-frequency data points there are, the # weight values. Thus, the more high-frequency data points there are, the
# more lower-frequency data points are eliminated. # more lower-frequency data points are eliminated.
if count >= w: if count >= w:
weightings[w] += 1 weightings[w] += 1
else: else:
break break
# Throw out weight values that exceed the maximum number of data points # Throw out weight values that exceed the maximum number of data points
if weightings[w] > self.max_points: if weightings[w] > self.max_points:
del weightings[w] del weightings[w]
# If there's only one weight value left, no sense in continuing the loop... # If there's only one weight value left, no sense in continuing the loop...
if len(weightings) == 1: if len(weightings) == 1:
break break
# The least weighted value is our minimum weight # The least weighted value is our minimum weight
min_weight = min(weightings) min_weight = min(weightings)
# Get rid of all data points that occur less frequently than our minimum weight # Get rid of all data points that occur less frequently than our minimum weight
for point in get_keys(data_points): for point in get_keys(data_points):
if data_points[point] < min_weight: if data_points[point] < min_weight:
del data_points[point] del data_points[point]
for point in sorted(data_points, key=data_points.get, reverse=True): for point in sorted(data_points, key=data_points.get, reverse=True):
plot_points[point] = data_points[point] plot_points[point] = data_points[point]
self.result(point=point) self.result(point=point)
total += 1 total += 1
if total >= self.max_points: if total >= self.max_points:
break break
return plot_points return plot_points
def _generate_data_point(self, data): def _generate_data_point(self, data):
''' '''
Subclasses must override this to return the appropriate data point. Subclasses must override this to return the appropriate data point.
@data - A string of data self.axis in length. @data - A string of data self.axis in length.
Returns a data point tuple. Returns a data point tuple.
''' '''
return (0,0,0) return (0,0,0)
def _generate_data_points(self, fp): def _generate_data_points(self, fp):
''' '''
Generates a dictionary of data points and their frequency of occurrance. Generates a dictionary of data points and their frequency of occurrance.
@fp - The BlockFile object to generate data points from. @fp - The BlockFile object to generate data points from.
Returns a dictionary. Returns a dictionary.
''' '''
i = 0 i = 0
data_points = {} data_points = {}
self._print("Generating data points for %s" % fp.name) self._print("Generating data points for %s" % fp.name)
# We don't need any extra data from BlockFile # We don't need any extra data from BlockFile
fp.set_block_size(peek=0) fp.set_block_size(peek=0)
while True: while True:
(data, dlen) = fp.read_block() (data, dlen) = fp.read_block()
if not data or not dlen: if not data or not dlen:
break break
i = 0 i = 0
while (i+(self.axis-1)) < dlen: while (i+(self.axis-1)) < dlen:
point = self._generate_data_point(data[i:i+self.axis]) point = self._generate_data_point(data[i:i+self.axis])
if has_key(data_points, point): if has_key(data_points, point):
data_points[point] += 1 data_points[point] += 1
else: else:
data_points[point] = 1 data_points[point] = 1
i += 3 i += 3
return data_points return data_points
def _generate_plot(self, plot_points): def _generate_plot(self, plot_points):
import numpy as np import numpy as np
import pyqtgraph.opengl as gl import pyqtgraph.opengl as gl
nitems = float(len(plot_points)) nitems = float(len(plot_points))
pos = np.empty((nitems, 3)) pos = np.empty((nitems, 3))
size = np.empty((nitems)) size = np.empty((nitems))
color = np.empty((nitems, 4)) color = np.empty((nitems, 4))
i = 0 i = 0
for (point, weight) in iterator(plot_points): for (point, weight) in iterator(plot_points):
r = 0.0 r = 0.0
g = 0.0 g = 0.0
b = 0.0 b = 0.0
pos[i] = point pos[i] = point
frequency_percentage = (weight / nitems) frequency_percentage = (weight / nitems)
# Give points that occur more frequently a brighter color and larger point size. # Give points that occur more frequently a brighter color and larger point size.
# Frequency is determined as a percentage of total unique data points. # Frequency is determined as a percentage of total unique data points.
if frequency_percentage > .005: if frequency_percentage > .005:
size[i] = .20 size[i] = .20
r = 1.0 r = 1.0
elif frequency_percentage > .002: elif frequency_percentage > .002:
size[i] = .10 size[i] = .10
g = 1.0 g = 1.0
r = 1.0 r = 1.0
else: else:
size[i] = .05 size[i] = .05
g = 1.0 g = 1.0
color[i] = (r, g, b, 1.0) color[i] = (r, g, b, 1.0)
i += 1 i += 1
scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False) scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False)
scatter_plot.translate(-127.5, -127.5, -127.5) scatter_plot.translate(-127.5, -127.5, -127.5)
return scatter_plot return scatter_plot
def plot(self, wait=True): def plot(self, wait=True):
import pyqtgraph.opengl as gl import pyqtgraph.opengl as gl
self.window.show() self.window.show()
if self.show_grids: if self.show_grids:
xgrid = gl.GLGridItem() xgrid = gl.GLGridItem()
ygrid = gl.GLGridItem() ygrid = gl.GLGridItem()
zgrid = gl.GLGridItem() zgrid = gl.GLGridItem()
self.window.addItem(xgrid) self.window.addItem(xgrid)
self.window.addItem(ygrid) self.window.addItem(ygrid)
self.window.addItem(zgrid) self.window.addItem(zgrid)
# Rotate x and y grids to face the correct direction # Rotate x and y grids to face the correct direction
xgrid.rotate(90, 0, 1, 0) xgrid.rotate(90, 0, 1, 0)
ygrid.rotate(90, 1, 0, 0) ygrid.rotate(90, 1, 0, 0)
# Scale grids to the appropriate dimensions # Scale grids to the appropriate dimensions
xgrid.scale(12.8, 12.8, 12.8) xgrid.scale(12.8, 12.8, 12.8)
ygrid.scale(12.8, 12.8, 12.8) ygrid.scale(12.8, 12.8, 12.8)
zgrid.scale(12.8, 12.8, 12.8) zgrid.scale(12.8, 12.8, 12.8)
for fd in iter(self.next_file, None): for fd in iter(self.next_file, None):
data_points = self._generate_data_points(fd) data_points = self._generate_data_points(fd)
self._print("Generating plot points from %d data points" % len(data_points)) self._print("Generating plot points from %d data points" % len(data_points))
self.plot_points = self._generate_plot_points(data_points) self.plot_points = self._generate_plot_points(data_points)
del data_points del data_points
self._print("Generating graph from %d plot points" % len(self.plot_points)) self._print("Generating graph from %d plot points" % len(self.plot_points))
self.window.addItem(self._generate_plot(self.plot_points)) self.window.addItem(self._generate_plot(self.plot_points))
if wait: if wait:
self.wait() self.wait()
def wait(self): def wait(self):
from pyqtgraph.Qt import QtCore, QtGui from pyqtgraph.Qt import QtCore, QtGui
t = QtCore.QTimer() t = QtCore.QTimer()
t.start(50) t.start(50)
QtGui.QApplication.instance().exec_() QtGui.QApplication.instance().exec_()
def _generate_3d_data_point(self, data): def _generate_3d_data_point(self, data):
''' '''
Plot data points within a 3D cube. Plot data points within a 3D cube.
''' '''
return (ord(data[0]), ord(data[1]), ord(data[2])) return (ord(data[0]), ord(data[1]), ord(data[2]))
def _generate_2d_data_point(self, data): def _generate_2d_data_point(self, data):
''' '''
Plot data points projected on each cube face. Plot data points projected on each cube face.
''' '''
self.plane_count += 1 self.plane_count += 1
if self.plane_count > 5: if self.plane_count > 5:
self.plane_count = 0 self.plane_count = 0
if self.plane_count == 0: if self.plane_count == 0:
return (0, ord(data[0]), ord(data[1])) return (0, ord(data[0]), ord(data[1]))
elif self.plane_count == 1: elif self.plane_count == 1:
return (ord(data[0]), 0, ord(data[1])) return (ord(data[0]), 0, ord(data[1]))
elif self.plane_count == 2: elif self.plane_count == 2:
return (ord(data[0]), ord(data[1]), 0) return (ord(data[0]), ord(data[1]), 0)
elif self.plane_count == 3: elif self.plane_count == 3:
return (255, ord(data[0]), ord(data[1])) return (255, ord(data[0]), ord(data[1]))
elif self.plane_count == 4: elif self.plane_count == 4:
return (ord(data[0]), 255, ord(data[1])) return (ord(data[0]), 255, ord(data[1]))
elif self.plane_count == 5: elif self.plane_count == 5:
return (ord(data[0]), ord(data[1]), 255) return (ord(data[0]), ord(data[1]), 255)
def run(self): def run(self):
self.plot() self.plot()
return True return True
...@@ -5,87 +5,87 @@ import binwalk.core.C ...@@ -5,87 +5,87 @@ import binwalk.core.C
from binwalk.core.module import Option, Kwarg, Module from binwalk.core.module import Option, Kwarg, Module
class Deflate(object): class Deflate(object):
''' '''
Finds and extracts raw deflate compression streams. Finds and extracts raw deflate compression streams.
''' '''
ENABLED = False ENABLED = False
BLOCK_SIZE = 33*1024 BLOCK_SIZE = 33*1024
# To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes # To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes
MIN_DECOMP_SIZE = 32*1024 MIN_DECOMP_SIZE = 32*1024
DESCRIPTION = "Raw deflate compression stream" DESCRIPTION = "Raw deflate compression stream"
TINFL_NAME = "tinfl" TINFL_NAME = "tinfl"
TINFL_FUNCTIONS = [ TINFL_FUNCTIONS = [
binwalk.core.C.Function(name="is_deflated", type=int), binwalk.core.C.Function(name="is_deflated", type=int),
binwalk.core.C.Function(name="inflate_raw_file", type=None), binwalk.core.C.Function(name="inflate_raw_file", type=None),
] ]
def __init__(self, module): def __init__(self, module):
self.module = module self.module = module
# The tinfl library is built and installed with binwalk # The tinfl library is built and installed with binwalk
self.tinfl = binwalk.core.C.Library(self.TINFL_NAME, self.TINFL_FUNCTIONS) self.tinfl = binwalk.core.C.Library(self.TINFL_NAME, self.TINFL_FUNCTIONS)
# Add an extraction rule # Add an extraction rule
if self.module.extractor.enabled: if self.module.extractor.enabled:
self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor) self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor)
def _extractor(self, file_name): def _extractor(self, file_name):
out_file = os.path.splitext(file_name)[0] out_file = os.path.splitext(file_name)[0]
self.tinfl.inflate_raw_file(file_name, out_file) self.tinfl.inflate_raw_file(file_name, out_file)
def decompress(self, data): def decompress(self, data):
description = None description = None
decomp_size = self.tinfl.is_deflated(data, len(data), 0) decomp_size = self.tinfl.is_deflated(data, len(data), 0)
if decomp_size >= self.MIN_DECOMP_SIZE: if decomp_size >= self.MIN_DECOMP_SIZE:
description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size
return description return description
class RawCompression(Module): class RawCompression(Module):
DECOMPRESSORS = { DECOMPRESSORS = {
'deflate' : Deflate, 'deflate' : Deflate,
} }
TITLE = 'Raw Compression' TITLE = 'Raw Compression'
CLI = [ CLI = [
Option(short='X', Option(short='X',
long='deflate', long='deflate',
kwargs={'enabled' : True, 'decompressor_class' : 'deflate'}, kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
description='Scan for raw deflate compression streams'), description='Scan for raw deflate compression streams'),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
Kwarg(name='decompressor_class', default=None), Kwarg(name='decompressor_class', default=None),
] ]
def init(self): def init(self):
self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self) self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self)
def run(self): def run(self):
for fp in iter(self.next_file, None): for fp in iter(self.next_file, None):
fp.set_block_size(peek=self.decompressor.BLOCK_SIZE) fp.set_block_size(peek=self.decompressor.BLOCK_SIZE)
self.header() self.header()
while True: while True:
(data, dlen) = fp.read_block() (data, dlen) = fp.read_block()
if not data: if not data:
break break
for i in range(0, dlen): for i in range(0, dlen):
description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE]) description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
if description: if description:
self.result(description=description, file=fp, offset=fp.tell()-dlen+i) self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
self.status.completed = fp.tell() - fp.offset self.status.completed = fp.tell() - fp.offset
self.footer() self.footer()
...@@ -6,192 +6,192 @@ from binwalk.core.module import Module, Option, Kwarg ...@@ -6,192 +6,192 @@ from binwalk.core.module import Module, Option, Kwarg
class Entropy(Module): class Entropy(Module):
XLABEL = 'Offset' XLABEL = 'Offset'
YLABEL = 'Entropy' YLABEL = 'Entropy'
XUNITS = 'B' XUNITS = 'B'
YUNITS = 'E' YUNITS = 'E'
FILE_WIDTH = 1024 FILE_WIDTH = 1024
FILE_FORMAT = 'png' FILE_FORMAT = 'png'
COLORS = ['r', 'g', 'c', 'b', 'm'] COLORS = ['r', 'g', 'c', 'b', 'm']
DEFAULT_BLOCK_SIZE = 1024 DEFAULT_BLOCK_SIZE = 1024
TITLE = "Entropy" TITLE = "Entropy"
ORDER = 8 ORDER = 8
CLI = [ CLI = [
Option(short='E', Option(short='E',
long='entropy', long='entropy',
kwargs={'enabled' : True}, kwargs={'enabled' : True},
description='Calculate file entropy'), description='Calculate file entropy'),
Option(short='J', Option(short='J',
long='save-plot', long='save-plot',
kwargs={'save_plot' : True}, kwargs={'save_plot' : True},
description='Save plot as a PNG'), description='Save plot as a PNG'),
Option(short='N', Option(short='N',
long='no-plot', long='no-plot',
kwargs={'do_plot' : False}, kwargs={'do_plot' : False},
description='Do not generate an entropy plot graph'), description='Do not generate an entropy plot graph'),
Option(short='Q', Option(short='Q',
long='no-legend', long='no-legend',
kwargs={'show_legend' : False}, kwargs={'show_legend' : False},
description='Omit the legend from the entropy plot graph'), description='Omit the legend from the entropy plot graph'),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
Kwarg(name='save_plot', default=False), Kwarg(name='save_plot', default=False),
Kwarg(name='display_results', default=True), Kwarg(name='display_results', default=True),
Kwarg(name='do_plot', default=True), Kwarg(name='do_plot', default=True),
Kwarg(name='show_legend', default=True), Kwarg(name='show_legend', default=True),
Kwarg(name='block_size', default=0), Kwarg(name='block_size', default=0),
] ]
# Run this module last so that it can process all other module's results and overlay them on the entropy graph # Run this module last so that it can process all other module's results and overlay them on the entropy graph
PRIORITY = 0 PRIORITY = 0
def init(self): def init(self):
self.HEADER[-1] = "ENTROPY" self.HEADER[-1] = "ENTROPY"
self.algorithm = self.shannon self.algorithm = self.shannon
self.max_description_length = 0 self.max_description_length = 0
self.file_markers = {} self.file_markers = {}
# Get a list of all other module's results to mark on the entropy graph # Get a list of all other module's results to mark on the entropy graph
for (module, obj) in iterator(self.modules): for (module, obj) in iterator(self.modules):
for result in obj.results: for result in obj.results:
if result.file and result.description: if result.file and result.description:
description = result.description.split(',')[0] description = result.description.split(',')[0]
if not has_key(self.file_markers, result.file.name): if not has_key(self.file_markers, result.file.name):
self.file_markers[result.file.name] = [] self.file_markers[result.file.name] = []
if len(description) > self.max_description_length: if len(description) > self.max_description_length:
self.max_description_length = len(description) self.max_description_length = len(description)
self.file_markers[result.file.name].append((result.offset, description)) self.file_markers[result.file.name].append((result.offset, description))
# If other modules have been run and they produced results, don't spam the terminal with entropy results # If other modules have been run and they produced results, don't spam the terminal with entropy results
if self.file_markers: if self.file_markers:
self.display_results = False self.display_results = False
if not self.block_size: if not self.block_size:
if self.config.block: if self.config.block:
self.block_size = self.config.block self.block_size = self.config.block
else: else:
self.block_size = self.DEFAULT_BLOCK_SIZE self.block_size = self.DEFAULT_BLOCK_SIZE
def run(self): def run(self):
from pyqtgraph.Qt import QtGui from pyqtgraph.Qt import QtGui
for fp in iter(self.next_file, None): for fp in iter(self.next_file, None):
if self.display_results: if self.display_results:
self.header() self.header()
self.calculate_file_entropy(fp) self.calculate_file_entropy(fp)
if self.display_results: if self.display_results:
self.footer() self.footer()
if self.do_plot and not self.save_plot: if self.do_plot and not self.save_plot:
QtGui.QApplication.instance().exec_() QtGui.QApplication.instance().exec_()
def calculate_file_entropy(self, fp): def calculate_file_entropy(self, fp):
# Clear results from any previously analyzed files # Clear results from any previously analyzed files
self.clear(results=True) self.clear(results=True)
while True: while True:
file_offset = fp.tell() file_offset = fp.tell()
(data, dlen) = fp.read_block() (data, dlen) = fp.read_block()
if not data: if not data:
break break
i = 0 i = 0
while i < dlen: while i < dlen:
entropy = self.algorithm(data[i:i+self.block_size]) entropy = self.algorithm(data[i:i+self.block_size])
r = self.result(offset=(file_offset + i), file=fp, entropy=entropy, description=("%f" % entropy), display=self.display_results) r = self.result(offset=(file_offset + i), file=fp, entropy=entropy, description=("%f" % entropy), display=self.display_results)
i += self.block_size i += self.block_size
if self.do_plot: if self.do_plot:
self.plot_entropy(fp.name) self.plot_entropy(fp.name)
def shannon(self, data): def shannon(self, data):
''' '''
Performs a Shannon entropy analysis on a given block of data. Performs a Shannon entropy analysis on a given block of data.
''' '''
entropy = 0 entropy = 0
if data: if data:
for x in range(0, 256): for x in range(0, 256):
p_x = float(data.count(chr(x))) / len(data) p_x = float(data.count(chr(x))) / len(data)
if p_x > 0: if p_x > 0:
entropy += - p_x*math.log(p_x, 2) entropy += - p_x*math.log(p_x, 2)
return (entropy / 8) return (entropy / 8)
def gzip(self, data, truncate=True): def gzip(self, data, truncate=True):
''' '''
Performs an entropy analysis based on zlib compression ratio. Performs an entropy analysis based on zlib compression ratio.
This is faster than the shannon entropy analysis, but not as accurate. This is faster than the shannon entropy analysis, but not as accurate.
''' '''
# Entropy is a simple ratio of: <zlib compressed size> / <original size> # Entropy is a simple ratio of: <zlib compressed size> / <original size>
e = float(float(len(zlib.compress(data, 9))) / float(len(data))) e = float(float(len(zlib.compress(data, 9))) / float(len(data)))
if truncate and e > 1.0: if truncate and e > 1.0:
e = 1.0 e = 1.0
return e return e
def plot_entropy(self, fname): def plot_entropy(self, fname):
import numpy as np import numpy as np
import pyqtgraph as pg import pyqtgraph as pg
i = 0 i = 0
x = [] x = []
y = [] y = []
plotted_colors = {} plotted_colors = {}
for r in self.results: for r in self.results:
x.append(r.offset) x.append(r.offset)
y.append(r.entropy) y.append(r.entropy)
plt = pg.plot(title=fname, clear=True) plt = pg.plot(title=fname, clear=True)
if self.show_legend and has_key(self.file_markers, fname): if self.show_legend and has_key(self.file_markers, fname):
plt.addLegend(size=(self.max_description_length*10, 0)) plt.addLegend(size=(self.max_description_length*10, 0))
for (offset, description) in self.file_markers[fname]: for (offset, description) in self.file_markers[fname]:
# If this description has already been plotted at a different offset, we need to # If this description has already been plotted at a different offset, we need to
# use the same color for the marker, but set the description to None to prevent # use the same color for the marker, but set the description to None to prevent
# duplicate entries in the graph legend. # duplicate entries in the graph legend.
# #
# Else, get the next color and use it to mark descriptions of this type. # Else, get the next color and use it to mark descriptions of this type.
if has_key(plotted_colors, description): if has_key(plotted_colors, description):
color = plotted_colors[description] color = plotted_colors[description]
description = None description = None
else: else:
color = self.COLORS[i] color = self.COLORS[i]
plotted_colors[description] = color plotted_colors[description] = color
i += 1 i += 1
if i >= len(self.COLORS): if i >= len(self.COLORS):
i = 0 i = 0
plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5)) plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5))
# Plot data points # Plot data points
plt.plot(x, y, pen='y') plt.plot(x, y, pen='y')
# TODO: legend is not displayed properly when saving plots to disk # TODO: legend is not displayed properly when saving plots to disk
if self.save_plot: if self.save_plot:
exporter = pg.exporters.ImageExporter.ImageExporter(plt.plotItem) exporter = pg.exporters.ImageExporter.ImageExporter(plt.plotItem)
exporter.parameters()['width'] = self.FILE_WIDTH exporter.parameters()['width'] = self.FILE_WIDTH
exporter.export(binwalk.core.common.unique_file_name(os.path.basename(fname), self.FILE_FORMAT)) exporter.export(binwalk.core.common.unique_file_name(os.path.basename(fname), self.FILE_FORMAT))
else: else:
plt.setLabel('left', self.YLABEL, units=self.YUNITS) plt.setLabel('left', self.YLABEL, units=self.YUNITS)
plt.setLabel('bottom', self.XLABEL, units=self.XUNITS) plt.setLabel('bottom', self.XLABEL, units=self.XUNITS)
...@@ -9,560 +9,560 @@ from binwalk.core.module import Module, Option, Kwarg ...@@ -9,560 +9,560 @@ from binwalk.core.module import Module, Option, Kwarg
from binwalk.core.common import file_size, unique_file_name, BlockFile from binwalk.core.common import file_size, unique_file_name, BlockFile
class Extractor(Module): class Extractor(Module):
''' '''
Extractor class, responsible for extracting files from the target file and executing external applications, if requested. Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
''' '''
# Extract rules are delimited with a colon. # Extract rules are delimited with a colon.
# <case insensitive matching string>:<file extension>[:<command to run>] # <case insensitive matching string>:<file extension>[:<command to run>]
RULE_DELIM = ':' RULE_DELIM = ':'
# Comments in the extract.conf files start with a pound # Comments in the extract.conf files start with a pound
COMMENT_DELIM ='#' COMMENT_DELIM ='#'
# Place holder for the extracted file name in the command # Place holder for the extracted file name in the command
FILE_NAME_PLACEHOLDER = '%e' FILE_NAME_PLACEHOLDER = '%e'
TITLE = 'Extraction' TITLE = 'Extraction'
ORDER = 9 ORDER = 9
PRIMARY = False PRIMARY = False
CLI = [ CLI = [
Option(short='e', Option(short='e',
long='extract', long='extract',
kwargs={'load_default_rules' : True, 'enabled' : True}, kwargs={'load_default_rules' : True, 'enabled' : True},
description='Automatically extract known file types'), description='Automatically extract known file types'),
Option(short='D', Option(short='D',
long='dd', long='dd',
type=[], type=[],
dtype='type:ext:cmd', dtype='type:ext:cmd',
kwargs={'manual_rules' : [], 'enabled' : True}, kwargs={'manual_rules' : [], 'enabled' : True},
description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'), description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'),
Option(short='M', Option(short='M',
long='matryoshka', long='matryoshka',
kwargs={'matryoshka' : 8}, kwargs={'matryoshka' : 8},
description='Recursively scan extracted files'), description='Recursively scan extracted files'),
Option(short='d', Option(short='d',
long='depth', long='depth',
type=int, type=int,
kwargs={'matryoshka' : 0}, kwargs={'matryoshka' : 0},
description='Limit matryoshka recursion depth (default: 8 levels deep)'), description='Limit matryoshka recursion depth (default: 8 levels deep)'),
Option(short='j', Option(short='j',
long='max-size', long='max-size',
type=int, type=int,
kwargs={'max_size' : 0}, kwargs={'max_size' : 0},
description='Limit the size of each extracted file'), description='Limit the size of each extracted file'),
Option(short='r', Option(short='r',
long='rm', long='rm',
kwargs={'remove_after_execute' : True}, kwargs={'remove_after_execute' : True},
description='Cleanup extracted / zero-size files after extraction'), description='Cleanup extracted / zero-size files after extraction'),
Option(short='z', Option(short='z',
long='carve', long='carve',
kwargs={'run_extractors' : False}, kwargs={'run_extractors' : False},
description="Carve data from files, but don't execute extraction utilities"), description="Carve data from files, but don't execute extraction utilities"),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='max_size', default=None), Kwarg(name='max_size', default=None),
Kwarg(name='remove_after_execute', default=False), Kwarg(name='remove_after_execute', default=False),
Kwarg(name='load_default_rules', default=False), Kwarg(name='load_default_rules', default=False),
Kwarg(name='run_extractors', default=True), Kwarg(name='run_extractors', default=True),
Kwarg(name='manual_rules', default=[]), Kwarg(name='manual_rules', default=[]),
Kwarg(name='matryoshka', default=0), Kwarg(name='matryoshka', default=0),
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
] ]
def load(self): def load(self):
# Holds a list of extraction rules loaded either from a file or when manually specified. # Holds a list of extraction rules loaded either from a file or when manually specified.
self.extract_rules = [] self.extract_rules = []
if self.load_default_rules: if self.load_default_rules:
self.load_defaults() self.load_defaults()
for manual_rule in self.manual_rules: for manual_rule in self.manual_rules:
self.add_rule(manual_rule) self.add_rule(manual_rule)
def reset(self): def reset(self):
# Holds a list of pending files that should be scanned; only populated if self.matryoshka == True # Holds a list of pending files that should be scanned; only populated if self.matryoshka == True
self.pending = [] self.pending = []
# Holds a dictionary of extraction directories created for each scanned file. # Holds a dictionary of extraction directories created for each scanned file.
self.extraction_directories = {} self.extraction_directories = {}
# Holds a dictionary of the last directory listing for a given directory; used for identifying # Holds a dictionary of the last directory listing for a given directory; used for identifying
# newly created/extracted files that need to be appended to self.pending. # newly created/extracted files that need to be appended to self.pending.
self.last_directory_listing = {} self.last_directory_listing = {}
# Set to the directory path of the first extracted directory; this allows us to track recursion depth. # Set to the directory path of the first extracted directory; this allows us to track recursion depth.
self.base_recursion_dir = "" self.base_recursion_dir = ""
def callback(self, r): def callback(self, r):
# Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile # Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile
try: try:
r.file.size r.file.size
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
pass pass
except Exception as e: except Exception as e:
return return
if not r.size: if not r.size:
size = r.file.size - r.offset size = r.file.size - r.offset
else: else:
size = r.size size = r.size
# Only extract valid results # Only extract valid results
if r.valid: if r.valid:
# Do the extraction # Do the extraction
(extraction_directory, dd_file) = self.extract(r.offset, r.description, r.file.name, size, r.name) (extraction_directory, dd_file) = self.extract(r.offset, r.description, r.file.name, size, r.name)
# If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
if extraction_directory and dd_file: if extraction_directory and dd_file:
# Get the full path to the dd'd file # Get the full path to the dd'd file
dd_file_path = os.path.join(extraction_directory, dd_file) dd_file_path = os.path.join(extraction_directory, dd_file)
# Do a directory listing of the output directory # Do a directory listing of the output directory
directory_listing = set(os.listdir(extraction_directory)) directory_listing = set(os.listdir(extraction_directory))
# If this is a newly created output directory, self.last_directory_listing won't have a record of it. # If this is a newly created output directory, self.last_directory_listing won't have a record of it.
# If we've extracted other files to this directory before, it will. # If we've extracted other files to this directory before, it will.
if not has_key(self.last_directory_listing, extraction_directory): if not has_key(self.last_directory_listing, extraction_directory):
self.last_directory_listing[extraction_directory] = set() self.last_directory_listing[extraction_directory] = set()
# Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing) # Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing)
for f in directory_listing.difference(self.last_directory_listing[extraction_directory]): for f in directory_listing.difference(self.last_directory_listing[extraction_directory]):
# Build the full file path and add it to the extractor results # Build the full file path and add it to the extractor results
file_path = os.path.join(extraction_directory, f) file_path = os.path.join(extraction_directory, f)
real_file_path = os.path.realpath(file_path) real_file_path = os.path.realpath(file_path)
self.result(description=file_path, display=False) self.result(description=file_path, display=False)
# If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory # If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory
if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path): if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path):
# If the recursion level of this file is less than or equal to our desired recursion level # If the recursion level of this file is less than or equal to our desired recursion level
if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka: if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka:
# Add the file to our list of pending files # Add the file to our list of pending files
self.pending.append(file_path) self.pending.append(file_path)
# Update the last directory listing for the next time we extract a file to this same output directory # Update the last directory listing for the next time we extract a file to this same output directory
self.last_directory_listing[extraction_directory] = directory_listing self.last_directory_listing[extraction_directory] = directory_listing
def append_rule(self, r): def append_rule(self, r):
self.extract_rules.append(r.copy()) self.extract_rules.append(r.copy())
def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None): def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None):
''' '''
Adds a set of rules to the extraction rule list. Adds a set of rules to the extraction rule list.
@txtrule - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>] @txtrule - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>]
@regex - If rule string is not specified, this is the regular expression string to use. @regex - If rule string is not specified, this is the regular expression string to use.
@extension - If rule string is not specified, this is the file extension to use. @extension - If rule string is not specified, this is the file extension to use.
@cmd - If rule string is not specified, this is the command to run. @cmd - If rule string is not specified, this is the command to run.
Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract. Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract.
Returns None. Returns None.
''' '''
rules = [] rules = []
match = False match = False
r = { r = {
'extension' : '', 'extension' : '',
'cmd' : '', 'cmd' : '',
'regex' : None 'regex' : None
} }
# Process single explicitly specified rule # Process single explicitly specified rule
if not txtrule and regex and extension: if not txtrule and regex and extension:
r['extension'] = extension r['extension'] = extension
r['regex'] = re.compile(regex) r['regex'] = re.compile(regex)
if cmd: if cmd:
r['cmd'] = cmd r['cmd'] = cmd
self.append_rule(r) self.append_rule(r)
return return
# Process rule string, or list of rule strings # Process rule string, or list of rule strings
if not isinstance(txtrule, type([])): if not isinstance(txtrule, type([])):
rules = [txtrule] rules = [txtrule]
else: else:
rules = txtrule rules = txtrule
for rule in rules: for rule in rules:
r['cmd'] = '' r['cmd'] = ''
r['extension'] = '' r['extension'] = ''
try: try:
values = self._parse_rule(rule) values = self._parse_rule(rule)
match = values[0] match = values[0]
r['regex'] = re.compile(values[0]) r['regex'] = re.compile(values[0])
r['extension'] = values[1] r['extension'] = values[1]
r['cmd'] = values[2] r['cmd'] = values[2]
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception: except Exception:
pass pass
# Verify that the match string was retrieved. # Verify that the match string was retrieved.
if match: if match:
self.append_rule(r) self.append_rule(r)
def remove_rule(self, text): def remove_rule(self, text):
''' '''
Remove all rules that match a specified text. Remove all rules that match a specified text.
@text - The text to match against. @text - The text to match against.
Returns the number of rules removed. Returns the number of rules removed.
''' '''
rm = [] rm = []
for i in range(0, len(self.extract_rules)): for i in range(0, len(self.extract_rules)):
if self.extract_rules[i]['regex'].match(text): if self.extract_rules[i]['regex'].match(text):
rm.append(i) rm.append(i)
for i in rm: for i in rm:
self.extract_rules.pop(i) self.extract_rules.pop(i)
return len(rm) return len(rm)
def clear_rules(self): def clear_rules(self):
''' '''
Deletes all extraction rules. Deletes all extraction rules.
Returns None. Returns None.
''' '''
self.extract_rules = [] self.extract_rules = []
def get_rules(self): def get_rules(self):
''' '''
Returns a list of all extraction rules. Returns a list of all extraction rules.
''' '''
return self.extract_rules return self.extract_rules
def load_from_file(self, fname): def load_from_file(self, fname):
''' '''
Loads extraction rules from the specified file. Loads extraction rules from the specified file.
@fname - Path to the extraction rule file. @fname - Path to the extraction rule file.
Returns None. Returns None.
''' '''
try: try:
# Process each line from the extract file, ignoring comments # Process each line from the extract file, ignoring comments
with open(fname, 'r') as f: with open(fname, 'r') as f:
for rule in f.readlines(): for rule in f.readlines():
self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0]) self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0])
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e))) raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e)))
def load_defaults(self): def load_defaults(self):
''' '''
Loads default extraction rules from the user and system extract.conf files. Loads default extraction rules from the user and system extract.conf files.
Returns None. Returns None.
''' '''
# Load the user extract file first to ensure its rules take precedence. # Load the user extract file first to ensure its rules take precedence.
extract_files = [ extract_files = [
self.config.settings.paths['user'][self.config.settings.EXTRACT_FILE], self.config.settings.paths['user'][self.config.settings.EXTRACT_FILE],
self.config.settings.paths['system'][self.config.settings.EXTRACT_FILE], self.config.settings.paths['system'][self.config.settings.EXTRACT_FILE],
] ]
for extract_file in extract_files: for extract_file in extract_files:
try: try:
self.load_from_file(extract_file) self.load_from_file(extract_file)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
if self.config.verbose: if self.config.verbose:
raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e))) raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e)))
def build_output_directory(self, path): def build_output_directory(self, path):
''' '''
Set the output directory for extracted files. Set the output directory for extracted files.
@path - The path to the file that data will be extracted from. @path - The path to the file that data will be extracted from.
Returns None. Returns None.
''' '''
# If we have not already created an output directory for this target file, create one now # If we have not already created an output directory for this target file, create one now
if not has_key(self.extraction_directories, path): if not has_key(self.extraction_directories, path):
output_directory = os.path.join(os.path.dirname(path), unique_file_name('_' + os.path.basename(path), extension='extracted')) output_directory = os.path.join(os.path.dirname(path), unique_file_name('_' + os.path.basename(path), extension='extracted'))
if not os.path.exists(output_directory): if not os.path.exists(output_directory):
os.mkdir(output_directory) os.mkdir(output_directory)
self.extraction_directories[path] = output_directory self.extraction_directories[path] = output_directory
# Else, just use the already created directory # Else, just use the already created directory
else: else:
output_directory = self.extraction_directories[path] output_directory = self.extraction_directories[path]
# Set the initial base extraction directory for later determining the level of recusion # Set the initial base extraction directory for later determining the level of recusion
if not self.base_recursion_dir: if not self.base_recursion_dir:
self.base_recursion_dir = os.path.realpath(output_directory) + os.path.sep self.base_recursion_dir = os.path.realpath(output_directory) + os.path.sep
return output_directory return output_directory
def cleanup_extracted_files(self, tf=None): def cleanup_extracted_files(self, tf=None):
''' '''
Set the action to take after a file is extracted. Set the action to take after a file is extracted.
@tf - If set to True, extracted files will be cleaned up after running a command against them. @tf - If set to True, extracted files will be cleaned up after running a command against them.
If set to False, extracted files will not be cleaned up after running a command against them. If set to False, extracted files will not be cleaned up after running a command against them.
If set to None or not specified, the current setting will not be changed. If set to None or not specified, the current setting will not be changed.
Returns the current cleanup status (True/False). Returns the current cleanup status (True/False).
''' '''
if tf is not None: if tf is not None:
self.remove_after_execute = tf self.remove_after_execute = tf
return self.remove_after_execute return self.remove_after_execute
def extract(self, offset, description, file_name, size, name=None): def extract(self, offset, description, file_name, size, name=None):
''' '''
Extract an embedded file from the target file, if it matches an extract rule. Extract an embedded file from the target file, if it matches an extract rule.
Called automatically by Binwalk.scan(). Called automatically by Binwalk.scan().
@offset - Offset inside the target file to begin the extraction. @offset - Offset inside the target file to begin the extraction.
@description - Description of the embedded file to extract, as returned by libmagic. @description - Description of the embedded file to extract, as returned by libmagic.
@file_name - Path to the target file. @file_name - Path to the target file.
@size - Number of bytes to extract. @size - Number of bytes to extract.
@name - Name to save the file as. @name - Name to save the file as.
Returns the name of the extracted file (blank string if nothing was extracted). Returns the name of the extracted file (blank string if nothing was extracted).
''' '''
fname = '' fname = ''
cleanup_extracted_fname = True cleanup_extracted_fname = True
original_dir = os.getcwd() original_dir = os.getcwd()
rules = self._match(description) rules = self._match(description)
file_path = os.path.realpath(file_name) file_path = os.path.realpath(file_name)
# No extraction rules for this file # No extraction rules for this file
if not rules: if not rules:
return (None, None) return (None, None)
output_directory = self.build_output_directory(file_name) output_directory = self.build_output_directory(file_name)
# Extract to end of file if no size was specified # Extract to end of file if no size was specified
if not size: if not size:
size = file_size(file_path) - offset size = file_size(file_path) - offset
if os.path.isfile(file_path): if os.path.isfile(file_path):
os.chdir(output_directory) os.chdir(output_directory)
# Loop through each extraction rule until one succeeds # Loop through each extraction rule until one succeeds
for i in range(0, len(rules)): for i in range(0, len(rules)):
rule = rules[i] rule = rules[i]
# Copy out the data to disk, if we haven't already # Copy out the data to disk, if we haven't already
fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name) fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)
# If there was a command specified for this rule, try to execute it. # If there was a command specified for this rule, try to execute it.
# If execution fails, the next rule will be attempted. # If execution fails, the next rule will be attempted.
if rule['cmd']: if rule['cmd']:
# Many extraction utilities will extract the file to a new file, just without # Many extraction utilities will extract the file to a new file, just without
# the file extension (i.e., myfile.7z -> myfile). If the presumed resulting # the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
# file name already exists before executing the extract command, do not attempt # file name already exists before executing the extract command, do not attempt
# to clean it up even if its resulting file size is 0. # to clean it up even if its resulting file size is 0.
if self.remove_after_execute: if self.remove_after_execute:
extracted_fname = os.path.splitext(fname)[0] extracted_fname = os.path.splitext(fname)[0]
if os.path.exists(extracted_fname): if os.path.exists(extracted_fname):
cleanup_extracted_fname = False cleanup_extracted_fname = False
# Execute the specified command against the extracted file # Execute the specified command against the extracted file
if self.run_extractors: if self.run_extractors:
extract_ok = self.execute(rule['cmd'], fname) extract_ok = self.execute(rule['cmd'], fname)
else: else:
extract_ok = True extract_ok = True
# Only clean up files if remove_after_execute was specified # Only clean up files if remove_after_execute was specified
if extract_ok and self.remove_after_execute: if extract_ok and self.remove_after_execute:
# Remove the original file that we extracted # Remove the original file that we extracted
try: try:
os.unlink(fname) os.unlink(fname)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
pass pass
# If the command worked, assume it removed the file extension from the extracted file # If the command worked, assume it removed the file extension from the extracted file
# If the extracted file name file exists and is empty, remove it # If the extracted file name file exists and is empty, remove it
if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0: if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0:
try: try:
os.unlink(extracted_fname) os.unlink(extracted_fname)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
pass pass
# If the command executed OK, don't try any more rules # If the command executed OK, don't try any more rules
if extract_ok: if extract_ok:
break break
# Else, remove the extracted file if this isn't the last rule in the list. # Else, remove the extracted file if this isn't the last rule in the list.
# If it is the last rule, leave the file on disk for the user to examine. # If it is the last rule, leave the file on disk for the user to examine.
elif i != (len(rules)-1): elif i != (len(rules)-1):
try: try:
os.unlink(fname) os.unlink(fname)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
pass pass
# If there was no command to execute, just use the first rule # If there was no command to execute, just use the first rule
else: else:
break break
os.chdir(original_dir) os.chdir(original_dir)
return (output_directory, fname) return (output_directory, fname)
def _entry_offset(self, index, entries, description): def _entry_offset(self, index, entries, description):
''' '''
Gets the offset of the first entry that matches the description. Gets the offset of the first entry that matches the description.
@index - Index into the entries list to begin searching. @index - Index into the entries list to begin searching.
@entries - Dictionary of result entries. @entries - Dictionary of result entries.
@description - Case insensitive description. @description - Case insensitive description.
Returns the offset, if a matching description is found. Returns the offset, if a matching description is found.
Returns -1 if a matching description is not found. Returns -1 if a matching description is not found.
''' '''
description = description.lower() description = description.lower()
for (offset, infos) in entries[index:]: for (offset, infos) in entries[index:]:
for info in infos: for info in infos:
if info['description'].lower().startswith(description): if info['description'].lower().startswith(description):
return offset return offset
return -1 return -1
def _match(self, description): def _match(self, description):
''' '''
Check to see if the provided description string matches an extract rule. Check to see if the provided description string matches an extract rule.
Called internally by self.extract(). Called internally by self.extract().
@description - Description string to check. @description - Description string to check.
Returns the associated rule dictionary if a match is found. Returns the associated rule dictionary if a match is found.
Returns None if no match is found. Returns None if no match is found.
''' '''
rules = [] rules = []
description = description.lower() description = description.lower()
for rule in self.extract_rules: for rule in self.extract_rules:
if rule['regex'].search(description): if rule['regex'].search(description):
rules.append(rule) rules.append(rule)
return rules return rules
def _parse_rule(self, rule): def _parse_rule(self, rule):
''' '''
Parses an extraction rule. Parses an extraction rule.
@rule - Rule string. @rule - Rule string.
Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>']. Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>'].
''' '''
return rule.strip().split(self.RULE_DELIM, 2) return rule.strip().split(self.RULE_DELIM, 2)
def _dd(self, file_name, offset, size, extension, output_file_name=None): def _dd(self, file_name, offset, size, extension, output_file_name=None):
''' '''
Extracts a file embedded inside the target file. Extracts a file embedded inside the target file.
@file_name - Path to the target file. @file_name - Path to the target file.
@offset - Offset inside the target file where the embedded file begins. @offset - Offset inside the target file where the embedded file begins.
@size - Number of bytes to extract. @size - Number of bytes to extract.
@extension - The file exension to assign to the extracted file on disk. @extension - The file exension to assign to the extracted file on disk.
@output_file_name - The requested name of the output file. @output_file_name - The requested name of the output file.
Returns the extracted file name. Returns the extracted file name.
''' '''
total_size = 0 total_size = 0
# Default extracted file name is <hex offset>.<extension> # Default extracted file name is <hex offset>.<extension>
default_bname = "%X" % offset default_bname = "%X" % offset
if self.max_size and size > self.max_size: if self.max_size and size > self.max_size:
size = self.max_size size = self.max_size
if not output_file_name or output_file_name is None: if not output_file_name or output_file_name is None:
bname = default_bname bname = default_bname
else: else:
# Strip the output file name of invalid/dangerous characters (like file paths) # Strip the output file name of invalid/dangerous characters (like file paths)
bname = os.path.basename(output_file_name) bname = os.path.basename(output_file_name)
fname = unique_file_name(bname, extension) fname = unique_file_name(bname, extension)
try: try:
# Open the target file and seek to the offset # Open the target file and seek to the offset
fdin = self.config.open_file(file_name, length=size, offset=offset) fdin = self.config.open_file(file_name, length=size, offset=offset)
# Open the output file # Open the output file
try: try:
fdout = BlockFile(fname, 'w') fdout = BlockFile(fname, 'w')
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
# Fall back to the default name if the requested name fails # Fall back to the default name if the requested name fails
fname = unique_file_name(default_bname, extension) fname = unique_file_name(default_bname, extension)
fdout = BlockFile(fname, 'w') fdout = BlockFile(fname, 'w')
while total_size < size: while total_size < size:
(data, dlen) = fdin.read_block() (data, dlen) = fdin.read_block()
if not data: if not data:
break break
else: else:
fdout.write(str2bytes(data[:dlen])) fdout.write(str2bytes(data[:dlen]))
total_size += dlen total_size += dlen
# Cleanup # Cleanup
fdout.close() fdout.close()
fdin.close() fdin.close()
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e))) raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
return fname return fname
def execute(self, cmd, fname): def execute(self, cmd, fname):
''' '''
Execute a command against the specified file. Execute a command against the specified file.
@cmd - Command to execute. @cmd - Command to execute.
@fname - File to run command against. @fname - File to run command against.
Returns True on success, False on failure. Returns True on success, False on failure.
''' '''
tmp = None tmp = None
retval = True retval = True
try: try:
if callable(cmd): if callable(cmd):
try: try:
cmd(fname) cmd(fname)
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e))) sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
else: else:
# If not in verbose mode, create a temporary file to redirect stdout and stderr to # If not in verbose mode, create a temporary file to redirect stdout and stderr to
if not self.config.verbose: if not self.config.verbose:
tmp = tempfile.TemporaryFile() tmp = tempfile.TemporaryFile()
# Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname # Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname
cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname) cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname)
# Execute. # Execute.
if subprocess.call(shlex.split(cmd), stdout=tmp, stderr=tmp) != 0: if subprocess.call(shlex.split(cmd), stdout=tmp, stderr=tmp) != 0:
retval = False retval = False
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
# Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when # Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when
# making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's # making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's
# annoying to see this spammed out to the console every time. # annoying to see this spammed out to the console every time.
if e.errno != 2: if e.errno != 2:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e))) sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
retval = False retval = False
if tmp is not None: if tmp is not None:
tmp.close() tmp.close()
return retval return retval
...@@ -10,177 +10,177 @@ from binwalk.core.module import Module, Option, Kwarg, show_help ...@@ -10,177 +10,177 @@ from binwalk.core.module import Module, Option, Kwarg, show_help
class General(Module): class General(Module):
TITLE = "General" TITLE = "General"
ORDER = 0 ORDER = 0
DEFAULT_DEPENDS = [] DEFAULT_DEPENDS = []
CLI = [ CLI = [
Option(long='length', Option(long='length',
short='l', short='l',
type=int, type=int,
kwargs={'length' : 0}, kwargs={'length' : 0},
description='Number of bytes to scan'), description='Number of bytes to scan'),
Option(long='offset', Option(long='offset',
short='o', short='o',
type=int, type=int,
kwargs={'offset' : 0}, kwargs={'offset' : 0},
description='Start scan at this file offset'), description='Start scan at this file offset'),
Option(long='block', Option(long='block',
short='K', short='K',
type=int, type=int,
kwargs={'block' : 0}, kwargs={'block' : 0},
description='Set file block size'), description='Set file block size'),
Option(long='swap', Option(long='swap',
short='g', short='g',
type=int, type=int,
kwargs={'swap_size' : 0}, kwargs={'swap_size' : 0},
description='Reverse every n bytes before scanning'), description='Reverse every n bytes before scanning'),
Option(short='I', Option(short='I',
long='show-invalid', long='show-invalid',
kwargs={'show_invalid' : True}, kwargs={'show_invalid' : True},
description='Show results marked as invalid'), description='Show results marked as invalid'),
Option(short='x', Option(short='x',
long='exclude', long='exclude',
kwargs={'exclude_filters' : []}, kwargs={'exclude_filters' : []},
type=list, type=list,
dtype=str.__name__, dtype=str.__name__,
description='Exclude results that match <str>'), description='Exclude results that match <str>'),
Option(short='y', Option(short='y',
long='include', long='include',
kwargs={'include_filters' : []}, kwargs={'include_filters' : []},
type=list, type=list,
dtype=str.__name__, dtype=str.__name__,
description='Only show results that match <str>'), description='Only show results that match <str>'),
Option(long='log', Option(long='log',
short='f', short='f',
type=argparse.FileType, type=argparse.FileType,
kwargs={'log_file' : None}, kwargs={'log_file' : None},
description='Log results to file'), description='Log results to file'),
Option(long='csv', Option(long='csv',
short='c', short='c',
kwargs={'csv' : True}, kwargs={'csv' : True},
description='Log results to file in CSV format'), description='Log results to file in CSV format'),
Option(long='term', Option(long='term',
short='t', short='t',
kwargs={'format_to_terminal' : True}, kwargs={'format_to_terminal' : True},
description='Format output to fit the terminal window'), description='Format output to fit the terminal window'),
Option(long='quiet', Option(long='quiet',
short='q', short='q',
kwargs={'quiet' : True}, kwargs={'quiet' : True},
description='Supress output to stdout'), description='Supress output to stdout'),
Option(long='verbose', Option(long='verbose',
short='v', short='v',
kwargs={'verbose' : True}, kwargs={'verbose' : True},
description='Enable verbose output'), description='Enable verbose output'),
Option(short='h', Option(short='h',
long='help', long='help',
kwargs={'show_help' : True}, kwargs={'show_help' : True},
description='Show help output'), description='Show help output'),
Option(long=None, Option(long=None,
short=None, short=None,
type=binwalk.core.common.BlockFile, type=binwalk.core.common.BlockFile,
kwargs={'files' : []}), kwargs={'files' : []}),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='length', default=0), Kwarg(name='length', default=0),
Kwarg(name='offset', default=0), Kwarg(name='offset', default=0),
Kwarg(name='block', default=0), Kwarg(name='block', default=0),
Kwarg(name='swap_size', default=0), Kwarg(name='swap_size', default=0),
Kwarg(name='show_invalid', default=False), Kwarg(name='show_invalid', default=False),
Kwarg(name='include_filters', default=[]), Kwarg(name='include_filters', default=[]),
Kwarg(name='exclude_filters', default=[]), Kwarg(name='exclude_filters', default=[]),
Kwarg(name='log_file', default=None), Kwarg(name='log_file', default=None),
Kwarg(name='csv', default=False), Kwarg(name='csv', default=False),
Kwarg(name='format_to_terminal', default=False), Kwarg(name='format_to_terminal', default=False),
Kwarg(name='quiet', default=False), Kwarg(name='quiet', default=False),
Kwarg(name='verbose', default=False), Kwarg(name='verbose', default=False),
Kwarg(name='files', default=[]), Kwarg(name='files', default=[]),
Kwarg(name='show_help', default=False), Kwarg(name='show_help', default=False),
] ]
PRIMARY = False PRIMARY = False
def load(self): def load(self):
self.target_files = [] self.target_files = []
# Order is important with these two methods # Order is important with these two methods
self._open_target_files() self._open_target_files()
self._set_verbosity() self._set_verbosity()
self.filter = binwalk.core.filter.Filter(self.show_invalid) self.filter = binwalk.core.filter.Filter(self.show_invalid)
# Set any specified include/exclude filters # Set any specified include/exclude filters
for regex in self.exclude_filters: for regex in self.exclude_filters:
self.filter.exclude(regex) self.filter.exclude(regex)
for regex in self.include_filters: for regex in self.include_filters:
self.filter.include(regex) self.filter.include(regex)
self.settings = binwalk.core.settings.Settings() self.settings = binwalk.core.settings.Settings()
self.display = binwalk.core.display.Display(log=self.log_file, self.display = binwalk.core.display.Display(log=self.log_file,
csv=self.csv, csv=self.csv,
quiet=self.quiet, quiet=self.quiet,
verbose=self.verbose, verbose=self.verbose,
filter=self.filter, filter=self.filter,
fit_to_screen=self.format_to_terminal) fit_to_screen=self.format_to_terminal)
if self.show_help: if self.show_help:
show_help() show_help()
sys.exit(0) sys.exit(0)
def reset(self): def reset(self):
for fp in self.target_files: for fp in self.target_files:
fp.reset() fp.reset()
def __del__(self): def __del__(self):
self._cleanup() self._cleanup()
def __exit__(self, a, b, c): def __exit__(self, a, b, c):
self._cleanup() self._cleanup()
def _cleanup(self): def _cleanup(self):
if hasattr(self, 'target_files'): if hasattr(self, 'target_files'):
for fp in self.target_files: for fp in self.target_files:
fp.close() fp.close()
def _set_verbosity(self): def _set_verbosity(self):
''' '''
Sets the appropriate verbosity. Sets the appropriate verbosity.
Must be called after self._test_target_files so that self.target_files is properly set. Must be called after self._test_target_files so that self.target_files is properly set.
''' '''
# If more than one target file was specified, enable verbose mode; else, there is # If more than one target file was specified, enable verbose mode; else, there is
# nothing in some outputs to indicate which scan corresponds to which file. # nothing in some outputs to indicate which scan corresponds to which file.
if len(self.target_files) > 1 and not self.verbose: if len(self.target_files) > 1 and not self.verbose:
self.verbose = True self.verbose = True
def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None): def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None):
''' '''
Opens the specified file with all pertinent configuration settings. Opens the specified file with all pertinent configuration settings.
''' '''
if length is None: if length is None:
length = self.length length = self.length
if offset is None: if offset is None:
offset = self.offset offset = self.offset
if swap is None: if swap is None:
swap = self.swap_size swap = self.swap_size
return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek) return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)
def _open_target_files(self): def _open_target_files(self):
''' '''
Checks if the target files can be opened. Checks if the target files can be opened.
Any files that cannot be opened are removed from the self.target_files list. Any files that cannot be opened are removed from the self.target_files list.
''' '''
# Validate the target files listed in target_files # Validate the target files listed in target_files
for tfile in self.files: for tfile in self.files:
# Ignore directories. # Ignore directories.
if not os.path.isdir(tfile): if not os.path.isdir(tfile):
# Make sure we can open the target files # Make sure we can open the target files
try: try:
self.target_files.append(self.open_file(tfile)) self.target_files.append(self.open_file(tfile))
except KeyboardInterrupt as e: except KeyboardInterrupt as e:
raise e raise e
except Exception as e: except Exception as e:
self.error(description="Cannot open file : %s" % str(e)) self.error(description="Cannot open file : %s" % str(e))
...@@ -8,311 +8,311 @@ from binwalk.core.compat import * ...@@ -8,311 +8,311 @@ from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg from binwalk.core.module import Module, Option, Kwarg
class HashResult(object): class HashResult(object):
''' '''
Class for storing libfuzzy hash results. Class for storing libfuzzy hash results.
For internal use only. For internal use only.
''' '''
def __init__(self, name, hash=None, strings=None): def __init__(self, name, hash=None, strings=None):
self.name = name self.name = name
self.hash = hash self.hash = hash
self.strings = strings self.strings = strings
class HashMatch(Module): class HashMatch(Module):
''' '''
Class for fuzzy hash matching of files and directories. Class for fuzzy hash matching of files and directories.
''' '''
DEFAULT_CUTOFF = 0 DEFAULT_CUTOFF = 0
CONSERVATIVE_CUTOFF = 90 CONSERVATIVE_CUTOFF = 90
TITLE = "Fuzzy Hash" TITLE = "Fuzzy Hash"
CLI = [ CLI = [
Option(short='F', Option(short='F',
long='fuzzy', long='fuzzy',
kwargs={'enabled' : True}, kwargs={'enabled' : True},
description='Perform fuzzy hash matching on files/directories'), description='Perform fuzzy hash matching on files/directories'),
Option(short='u', Option(short='u',
long='cutoff', long='cutoff',
priority=100, priority=100,
type=int, type=int,
kwargs={'cutoff' : DEFAULT_CUTOFF}, kwargs={'cutoff' : DEFAULT_CUTOFF},
description='Set the cutoff percentage'), description='Set the cutoff percentage'),
Option(short='S', Option(short='S',
long='strings', long='strings',
kwargs={'strings' : True}, kwargs={'strings' : True},
description='Diff strings inside files instead of the entire file'), description='Diff strings inside files instead of the entire file'),
Option(short='s', Option(short='s',
long='same', long='same',
kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF}, kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are the same'), description='Only show files that are the same'),
Option(short='p', Option(short='p',
long='diff', long='diff',
kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF}, kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are different'), description='Only show files that are different'),
Option(short='n', Option(short='n',
long='name', long='name',
kwargs={'filter_by_name' : True}, kwargs={'filter_by_name' : True},
description='Only compare files whose base names are the same'), description='Only compare files whose base names are the same'),
Option(short='L', Option(short='L',
long='symlinks', long='symlinks',
kwargs={'symlinks' : True}, kwargs={'symlinks' : True},
description="Don't ignore symlinks"), description="Don't ignore symlinks"),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='cutoff', default=DEFAULT_CUTOFF), Kwarg(name='cutoff', default=DEFAULT_CUTOFF),
Kwarg(name='strings', default=False), Kwarg(name='strings', default=False),
Kwarg(name='same', default=True), Kwarg(name='same', default=True),
Kwarg(name='symlinks', default=False), Kwarg(name='symlinks', default=False),
Kwarg(name='name', default=False), Kwarg(name='name', default=False),
Kwarg(name='max_results', default=None), Kwarg(name='max_results', default=None),
Kwarg(name='abspath', default=False), Kwarg(name='abspath', default=False),
Kwarg(name='filter_by_name', default=False), Kwarg(name='filter_by_name', default=False),
Kwarg(name='symlinks', default=False), Kwarg(name='symlinks', default=False),
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
] ]
# Requires libfuzzy.so # Requires libfuzzy.so
LIBRARY_NAME = "fuzzy" LIBRARY_NAME = "fuzzy"
LIBRARY_FUNCTIONS = [ LIBRARY_FUNCTIONS = [
binwalk.core.C.Function(name="fuzzy_hash_buf", type=int), binwalk.core.C.Function(name="fuzzy_hash_buf", type=int),
binwalk.core.C.Function(name="fuzzy_hash_filename", type=int), binwalk.core.C.Function(name="fuzzy_hash_filename", type=int),
binwalk.core.C.Function(name="fuzzy_compare", type=int), binwalk.core.C.Function(name="fuzzy_compare", type=int),
] ]
# Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html) # Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html)
FUZZY_MAX_RESULT = 150 FUZZY_MAX_RESULT = 150
# Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h) # Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h)
FUZZY_MIN_FILE_SIZE = 4096 FUZZY_MIN_FILE_SIZE = 4096
HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n" HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n"
RESULT_FORMAT = "%4d%%" + " " * 16 + "%s\n" RESULT_FORMAT = "%4d%%" + " " * 16 + "%s\n"
HEADER = ["SIMILARITY", "FILE NAME"] HEADER = ["SIMILARITY", "FILE NAME"]
RESULT = ["percentage", "description"] RESULT = ["percentage", "description"]
def init(self): def init(self):
self.total = 0 self.total = 0
self.last_file1 = HashResult(None) self.last_file1 = HashResult(None)
self.last_file2 = HashResult(None) self.last_file2 = HashResult(None)
self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS) self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS)
def _get_strings(self, fname): def _get_strings(self, fname):
return ''.join(list(binwalk.core.common.strings(fname, minimum=10))) return ''.join(list(binwalk.core.common.strings(fname, minimum=10)))
def _show_result(self, match, fname): def _show_result(self, match, fname):
if self.abspath: if self.abspath:
fname = os.path.abspath(fname) fname = os.path.abspath(fname)
self.result(percentage=match, description=fname) self.result(percentage=match, description=fname)
def _compare_files(self, file1, file2): def _compare_files(self, file1, file2):
''' '''
Fuzzy diff two files. Fuzzy diff two files.
@file1 - The first file to diff. @file1 - The first file to diff.
@file2 - The second file to diff. @file2 - The second file to diff.
Returns the match percentage. Returns the match percentage.
Returns None on error. Returns None on error.
''' '''
status = 0 status = 0
file1_dup = False file1_dup = False
file2_dup = False file2_dup = False
if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2): if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2):
if os.path.exists(file1) and os.path.exists(file2): if os.path.exists(file1) and os.path.exists(file2):
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
# Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match. # Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match.
if file1 == self.last_file1.name and self.last_file1.hash: if file1 == self.last_file1.name and self.last_file1.hash:
file1_dup = True file1_dup = True
else: else:
self.last_file1.name = file1 self.last_file1.name = file1
if file2 == self.last_file2.name and self.last_file2.hash: if file2 == self.last_file2.name and self.last_file2.hash:
file2_dup = True file2_dup = True
else: else:
self.last_file2.name = file2 self.last_file2.name = file2
try: try:
if self.strings: if self.strings:
if file1_dup: if file1_dup:
file1_strings = self.last_file1.strings file1_strings = self.last_file1.strings
else: else:
self.last_file1.strings = file1_strings = self._get_strings(file1) self.last_file1.strings = file1_strings = self._get_strings(file1)
if file2_dup: if file2_dup:
file2_strings = self.last_file2.strings file2_strings = self.last_file2.strings
else: else:
self.last_file2.strings = file2_strings = self._get_strings(file2) self.last_file2.strings = file2_strings = self._get_strings(file2)
if file1_strings == file2_strings: if file1_strings == file2_strings:
return 100 return 100
else: else:
if file1_dup: if file1_dup:
hash1 = self.last_file1.hash hash1 = self.last_file1.hash
else: else:
status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1) status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1)
if file2_dup: if file2_dup:
hash2 = self.last_file2.hash hash2 = self.last_file2.hash
else: else:
status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2) status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2)
else: else:
if file1_dup: if file1_dup:
hash1 = self.last_file1.hash hash1 = self.last_file1.hash
else: else:
status |= self.lib.fuzzy_hash_filename(file1, hash1) status |= self.lib.fuzzy_hash_filename(file1, hash1)
if file2_dup: if file2_dup:
hash2 = self.last_file2.hash hash2 = self.last_file2.hash
else: else:
status |= self.lib.fuzzy_hash_filename(file2, hash2) status |= self.lib.fuzzy_hash_filename(file2, hash2)
if status == 0: if status == 0:
if not file1_dup: if not file1_dup:
self.last_file1.hash = hash1 self.last_file1.hash = hash1
if not file2_dup: if not file2_dup:
self.last_file2.hash = hash2 self.last_file2.hash = hash2
if hash1.raw == hash2.raw: if hash1.raw == hash2.raw:
return 100 return 100
else: else:
return self.lib.fuzzy_compare(hash1, hash2) return self.lib.fuzzy_compare(hash1, hash2)
except Exception as e: except Exception as e:
print ("WARNING: Exception while doing fuzzy hash: %s" % e) print ("WARNING: Exception while doing fuzzy hash: %s" % e)
return None return None
def is_match(self, match): def is_match(self, match):
''' '''
Returns True if this is a good match. Returns True if this is a good match.
Returns False if his is not a good match. Returns False if his is not a good match.
''' '''
return (match is not None and ((match >= self.cutoff and self.same) or (match < self.cutoff and not self.same))) return (match is not None and ((match >= self.cutoff and self.same) or (match < self.cutoff and not self.same)))
def _get_file_list(self, directory): def _get_file_list(self, directory):
''' '''
Generates a directory tree. Generates a directory tree.
@directory - The root directory to start from. @directory - The root directory to start from.
Returns a set of file paths, excluding the root directory. Returns a set of file paths, excluding the root directory.
''' '''
file_list = [] file_list = []
# Normalize directory path so that we can exclude it from each individual file path # Normalize directory path so that we can exclude it from each individual file path
directory = os.path.abspath(directory) + os.path.sep directory = os.path.abspath(directory) + os.path.sep
for (root, dirs, files) in os.walk(directory): for (root, dirs, files) in os.walk(directory):
# Don't include the root directory in the file paths # Don't include the root directory in the file paths
root = ''.join(root.split(directory, 1)[1:]) root = ''.join(root.split(directory, 1)[1:])
# Get a list of files, with or without symlinks as specified during __init__ # Get a list of files, with or without symlinks as specified during __init__
files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)] files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)]
file_list += files file_list += files
return set(file_list) return set(file_list)
def hash_files(self, needle, haystack): def hash_files(self, needle, haystack):
''' '''
Compare one file against a list of other files. Compare one file against a list of other files.
Returns a list of tuple results. Returns a list of tuple results.
''' '''
self.total = 0 self.total = 0
for f in haystack: for f in haystack:
m = self._compare_files(needle, f) m = self._compare_files(needle, f)
if m is not None and self.is_match(m): if m is not None and self.is_match(m):
self._show_result(m, f) self._show_result(m, f)
self.total += 1 self.total += 1
if self.max_results and self.total >= self.max_results: if self.max_results and self.total >= self.max_results:
break break
def hash_file(self, needle, haystack): def hash_file(self, needle, haystack):
''' '''
Search for one file inside one or more directories. Search for one file inside one or more directories.
Returns a list of tuple results. Returns a list of tuple results.
''' '''
matching_files = [] matching_files = []
self.total = 0 self.total = 0
done = False done = False
for directory in haystack: for directory in haystack:
for f in self._get_file_list(directory): for f in self._get_file_list(directory):
f = os.path.join(directory, f) f = os.path.join(directory, f)
m = self._compare_files(needle, f) m = self._compare_files(needle, f)
if m is not None and self.is_match(m): if m is not None and self.is_match(m):
self._show_result(m, f) self._show_result(m, f)
matching_files.append((m, f)) matching_files.append((m, f))
self.total += 1 self.total += 1
if self.max_results and self.total >= self.max_results: if self.max_results and self.total >= self.max_results:
done = True done = True
break break
if done: if done:
break break
return matching_files return matching_files
def hash_directories(self, needle, haystack): def hash_directories(self, needle, haystack):
''' '''
Compare the contents of one directory with the contents of other directories. Compare the contents of one directory with the contents of other directories.
Returns a list of tuple results. Returns a list of tuple results.
''' '''
done = False done = False
self.total = 0 self.total = 0
source_files = self._get_file_list(needle) source_files = self._get_file_list(needle)
for directory in haystack: for directory in haystack:
dir_files = self._get_file_list(directory) dir_files = self._get_file_list(directory)
for f in source_files: for f in source_files:
if f in dir_files: if f in dir_files:
file1 = os.path.join(needle, f) file1 = os.path.join(needle, f)
file2 = os.path.join(directory, f) file2 = os.path.join(directory, f)
m = self._compare_files(file1, file2) m = self._compare_files(file1, file2)
if m is not None and self.is_match(m): if m is not None and self.is_match(m):
self._show_result(m, file2) self._show_result(m, file2)
self.total += 1 self.total += 1
if self.max_results and self.total >= self.max_results: if self.max_results and self.total >= self.max_results:
done = True done = True
break break
if done: if done:
break break
def run(self): def run(self):
''' '''
Main module method. Main module method.
''' '''
needle = self.next_file().name needle = self.next_file().name
haystack = [] haystack = []
self.header() self.header()
for fp in iter(self.next_file, None): for fp in iter(self.next_file, None):
haystack.append(fp.name) haystack.append(fp.name)
if os.path.isfile(needle): if os.path.isfile(needle):
if os.path.isfile(haystack[0]): if os.path.isfile(haystack[0]):
self.hash_files(needle, haystack) self.hash_files(needle, haystack)
else: else:
self.hash_file(needle, haystack) self.hash_file(needle, haystack)
else: else:
self.hash_directories(needle, haystack) self.hash_directories(needle, haystack)
self.footer() self.footer()
return True return True
...@@ -8,186 +8,186 @@ from binwalk.core.compat import * ...@@ -8,186 +8,186 @@ from binwalk.core.compat import *
from binwalk.core.module import Module, Kwarg, Option, Dependency from binwalk.core.module import Module, Kwarg, Option, Dependency
class ChiSquare(object): class ChiSquare(object):
''' '''
Performs a Chi Squared test against the provided data. Performs a Chi Squared test against the provided data.
''' '''
IDEAL = 256.0 IDEAL = 256.0
def __init__(self): def __init__(self):
''' '''
Class constructor. Class constructor.
Returns None. Returns None.
''' '''
self.bytes = {} self.bytes = {}
self.freedom = self.IDEAL - 1 self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255) # Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
for i in range(0, int(self.IDEAL)): for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0 self.bytes[chr(i)] = 0
self.reset() self.reset()
def reset(self): def reset(self):
self.xc2 = 0.0 self.xc2 = 0.0
self.byte_count = 0 self.byte_count = 0
for key in self.bytes.keys(): for key in self.bytes.keys():
self.bytes[key] = 0 self.bytes[key] = 0
def update(self, data): def update(self, data):
''' '''
Updates the current byte counts with new data. Updates the current byte counts with new data.
@data - String of bytes to update. @data - String of bytes to update.
Returns None. Returns None.
''' '''
# Count the number of occurances of each byte value # Count the number of occurances of each byte value
for i in data: for i in data:
self.bytes[i] += 1 self.bytes[i] += 1
self.byte_count += len(data) self.byte_count += len(data)
def chisq(self): def chisq(self):
''' '''
Calculate the Chi Square critical value. Calculate the Chi Square critical value.
Returns the critical value. Returns the critical value.
''' '''
expected = self.byte_count / self.IDEAL expected = self.byte_count / self.IDEAL
if expected: if expected:
for byte in self.bytes.values(): for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected self.xc2 += ((byte - expected) ** 2 ) / expected
return self.xc2 return self.xc2
class EntropyBlock(object): class EntropyBlock(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.start = None self.start = None
self.end = None self.end = None
self.length = None self.length = None
for (k,v) in iterator(kwargs): for (k,v) in iterator(kwargs):
setattr(self, k, v) setattr(self, k, v)
class HeuristicCompressionAnalyzer(Module): class HeuristicCompressionAnalyzer(Module):
''' '''
Performs analysis and attempts to interpret the results. Performs analysis and attempts to interpret the results.
''' '''
BLOCK_SIZE = 32 BLOCK_SIZE = 32
CHI_CUTOFF = 512 CHI_CUTOFF = 512
ENTROPY_TRIGGER = .90 ENTROPY_TRIGGER = .90
MIN_BLOCK_SIZE = 4096 MIN_BLOCK_SIZE = 4096
BLOCK_OFFSET = 1024 BLOCK_OFFSET = 1024
ENTROPY_BLOCK_SIZE = 1024 ENTROPY_BLOCK_SIZE = 1024
TITLE = "Heuristic Compression" TITLE = "Heuristic Compression"
DEPENDS = [ DEPENDS = [
Dependency(name='Entropy', Dependency(name='Entropy',
attribute='entropy', attribute='entropy',
kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}), kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}),
] ]
CLI = [ CLI = [
Option(short='H', Option(short='H',
long='heuristic', long='heuristic',
kwargs={'enabled' : True}, kwargs={'enabled' : True},
description='Heuristically classify high entropy data'), description='Heuristically classify high entropy data'),
Option(short='a', Option(short='a',
long='trigger', long='trigger',
kwargs={'trigger_level' : 0}, kwargs={'trigger_level' : 0},
type=float, type=float,
description='Set the entropy trigger level (0.0 - 1.0)'), description='Set the entropy trigger level (0.0 - 1.0)'),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
Kwarg(name='trigger_level', default=ENTROPY_TRIGGER), Kwarg(name='trigger_level', default=ENTROPY_TRIGGER),
] ]
def init(self): def init(self):
self.blocks = {} self.blocks = {}
self.HEADER[-1] = "HEURISTIC ENTROPY ANALYSIS" self.HEADER[-1] = "HEURISTIC ENTROPY ANALYSIS"
if self.config.block: if self.config.block:
self.block_size = self.config.block self.block_size = self.config.block
else: else:
self.block_size = self.BLOCK_SIZE self.block_size = self.BLOCK_SIZE
for result in self.entropy.results: for result in self.entropy.results:
if not has_key(self.blocks, result.file.name): if not has_key(self.blocks, result.file.name):
self.blocks[result.file.name] = [] self.blocks[result.file.name] = []
if result.entropy >= self.trigger_level and (not self.blocks[result.file.name] or self.blocks[result.file.name][-1].end is not None): if result.entropy >= self.trigger_level and (not self.blocks[result.file.name] or self.blocks[result.file.name][-1].end is not None):
self.blocks[result.file.name].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET)) self.blocks[result.file.name].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET))
elif result.entropy < self.trigger_level and self.blocks[result.file.name] and self.blocks[result.file.name][-1].end is None: elif result.entropy < self.trigger_level and self.blocks[result.file.name] and self.blocks[result.file.name][-1].end is None:
self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET
def run(self): def run(self):
for fp in iter(self.next_file, None): for fp in iter(self.next_file, None):
if has_key(self.blocks, fp.name): if has_key(self.blocks, fp.name):
self.header() self.header()
for block in self.blocks[fp.name]: for block in self.blocks[fp.name]:
if block.end is None: if block.end is None:
block.length = fp.offset + fp.length - block.start block.length = fp.offset + fp.length - block.start
else: else:
block.length = block.end - block.start block.length = block.end - block.start
if block.length >= self.MIN_BLOCK_SIZE: if block.length >= self.MIN_BLOCK_SIZE:
self.analyze(fp, block) self.analyze(fp, block)
self.footer() self.footer()
def analyze(self, fp, block): def analyze(self, fp, block):
''' '''
Perform analysis and interpretation. Perform analysis and interpretation.
''' '''
i = 0 i = 0
num_error = 0 num_error = 0
analyzer_results = [] analyzer_results = []
chi = ChiSquare() chi = ChiSquare()
fp.seek(block.start) fp.seek(block.start)
while i < block.length: while i < block.length:
j = 0 j = 0
(d, dlen) = fp.read_block() (d, dlen) = fp.read_block()
if not d: if not d:
break break
while j < dlen: while j < dlen:
chi.reset() chi.reset()
data = d[j:j+self.block_size] data = d[j:j+self.block_size]
if len(data) < self.block_size: if len(data) < self.block_size:
break break
chi.update(data) chi.update(data)
if chi.chisq() >= self.CHI_CUTOFF: if chi.chisq() >= self.CHI_CUTOFF:
num_error += 1 num_error += 1
j += self.block_size j += self.block_size
if (j + i) > block.length: if (j + i) > block.length:
break break
i += dlen i += dlen
if num_error > 0: if num_error > 0:
verdict = 'Moderate entropy data, best guess: compressed' verdict = 'Moderate entropy data, best guess: compressed'
else: else:
verdict = 'High entropy data, best guess: encrypted' verdict = 'High entropy data, best guess: encrypted'
desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error) desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error)
self.result(offset=block.start, description=desc, file=fp) self.result(offset=block.start, description=desc, file=fp)
...@@ -2,32 +2,32 @@ import binwalk.core.C ...@@ -2,32 +2,32 @@ import binwalk.core.C
from binwalk.core.common import * from binwalk.core.common import *
class Plugin(object): class Plugin(object):
''' '''
Searches for and validates compress'd data. Searches for and validates compress'd data.
''' '''
READ_SIZE = 64 READ_SIZE = 64
COMPRESS42 = "compress42" COMPRESS42 = "compress42"
COMPRESS42_FUNCTIONS = [ COMPRESS42_FUNCTIONS = [
binwalk.core.C.Function(name="is_compressed", type=bool), binwalk.core.C.Function(name="is_compressed", type=bool),
] ]
def __init__(self, module): def __init__(self, module):
self.fd = None self.fd = None
self.comp = None self.comp = None
if module.name == 'Signature': if module.name == 'Signature':
self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS) self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS)
def scan(self, result): def scan(self, result):
if self.comp: if self.comp:
if result.file and result.description.lower().startswith("compress'd data"): if result.file and result.description.lower().startswith("compress'd data"):
fd = BlockFile(result.file.name, "r", offset=result.offset, length=self.READ_SIZE) fd = BlockFile(result.file.name, "r", offset=result.offset, length=self.READ_SIZE)
compressed_data = fd.read(self.READ_SIZE) compressed_data = fd.read(self.READ_SIZE)
fd.close() fd.close()
if not self.comp.is_compressed(compressed_data, len(compressed_data)): if not self.comp.is_compressed(compressed_data, len(compressed_data)):
result.valid = False result.valid = False
class Plugin(object): class Plugin(object):
''' '''
Ensures that ASCII CPIO archive entries only get extracted once. Ensures that ASCII CPIO archive entries only get extracted once.
''' '''
def __init__(self, module): def __init__(self, module):
self.found_archive = False self.found_archive = False
self.enabled = (module.name == 'Signature') self.enabled = (module.name == 'Signature')
def pre_scan(self, module): def pre_scan(self, module):
# Be sure to re-set this at the beginning of every scan # Be sure to re-set this at the beginning of every scan
self.found_archive = False self.found_archive = False
def scan(self, result): def scan(self, result):
if self.enabled and result.valid: if self.enabled and result.valid:
# ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'. # ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'.
# Displaying each entry is useful, as it shows what files are contained in the archive, # Displaying each entry is useful, as it shows what files are contained in the archive,
# but we only want to extract the archive when the first entry is found. # but we only want to extract the archive when the first entry is found.
if result.description.startswith('ASCII cpio archive'): if result.description.startswith('ASCII cpio archive'):
if not self.found_archive: if not self.found_archive:
# This is the first entry. Set found_archive and allow the scan to continue normally. # This is the first entry. Set found_archive and allow the scan to continue normally.
self.found_archive = True self.found_archive = True
result.extract = True result.extract = True
elif 'TRAILER!!!' in results['description']: elif 'TRAILER!!!' in results['description']:
# This is the last entry, un-set found_archive. # This is the last entry, un-set found_archive.
self.found_archive = False self.found_archive = False
# The first entry has already been found and this is the last entry, or the last entry # The first entry has already been found and this is the last entry, or the last entry
# has not yet been found. Don't extract. # has not yet been found. Don't extract.
result.extract = False result.extract = False
...@@ -4,61 +4,61 @@ from binwalk.core.compat import * ...@@ -4,61 +4,61 @@ from binwalk.core.compat import *
from binwalk.core.common import BlockFile from binwalk.core.common import BlockFile
class Plugin(object): class Plugin(object):
''' '''
Finds and extracts modified LZMA files commonly found in cable modems. Finds and extracts modified LZMA files commonly found in cable modems.
Based on Bernardo Rodrigues' work: http://w00tsec.blogspot.com/2013/11/unpacking-firmware-images-from-cable.html Based on Bernardo Rodrigues' work: http://w00tsec.blogspot.com/2013/11/unpacking-firmware-images-from-cable.html
''' '''
FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00" FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00"
SIGNATURE = "lzma compressed data" SIGNATURE = "lzma compressed data"
def __init__(self, module): def __init__(self, module):
self.original_cmd = '' self.original_cmd = ''
self.enabled = (module.name == 'Signature') self.enabled = (module.name == 'Signature')
self.module = module self.module = module
if self.enabled: if self.enabled:
# Replace the existing LZMA extraction command with our own # Replace the existing LZMA extraction command with our own
rules = self.module.extractor.get_rules() rules = self.module.extractor.get_rules()
for i in range(0, len(rules)): for i in range(0, len(rules)):
if rules[i]['regex'].match(self.SIGNATURE): if rules[i]['regex'].match(self.SIGNATURE):
self.original_cmd = rules[i]['cmd'] self.original_cmd = rules[i]['cmd']
rules[i]['cmd'] = self.lzma_cable_extractor rules[i]['cmd'] = self.lzma_cable_extractor
break break
def lzma_cable_extractor(self, fname): def lzma_cable_extractor(self, fname):
# Try extracting the LZMA file without modification first # Try extracting the LZMA file without modification first
if not self.module.extractor.execute(self.original_cmd, fname): if not self.module.extractor.execute(self.original_cmd, fname):
out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1] out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1]
fp_out = BlockFile(out_name, 'w') fp_out = BlockFile(out_name, 'w')
# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored # Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
fp_in = self.module.config.open_file(fname, offset=0, length=0) fp_in = self.module.config.open_file(fname, offset=0, length=0)
fp_in.set_block_size(peek=0) fp_in.set_block_size(peek=0)
i = 0 i = 0
while i < fp_in.length: while i < fp_in.length:
(data, dlen) = fp_in.read_block() (data, dlen) = fp_in.read_block()
if i == 0: if i == 0:
out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:] out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:]
else: else:
out_data = data out_data = data
fp_out.write(out_data) fp_out.write(out_data)
i += dlen i += dlen
fp_in.close() fp_in.close()
fp_out.close() fp_out.close()
# Overwrite the original file so that it can be cleaned up if -r was specified # Overwrite the original file so that it can be cleaned up if -r was specified
shutil.move(out_name, fname) shutil.move(out_name, fname)
self.module.extractor.execute(self.original_cmd, fname) self.module.extractor.execute(self.original_cmd, fname)
def scan(self, result): def scan(self, result):
# The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D. # The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D.
if self.enabled and result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description: if self.enabled and result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description:
if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description: if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description:
result.valid = True result.valid = True
result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size" result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size"
...@@ -2,39 +2,39 @@ import binwalk.core.C ...@@ -2,39 +2,39 @@ import binwalk.core.C
from binwalk.core.common import BlockFile from binwalk.core.common import BlockFile
class Plugin(object): class Plugin(object):
''' '''
Searches for and validates zlib compressed data. Searches for and validates zlib compressed data.
''' '''
MIN_DECOMP_SIZE = 16 * 1024 MIN_DECOMP_SIZE = 16 * 1024
MAX_DATA_SIZE = 33 * 1024 MAX_DATA_SIZE = 33 * 1024
TINFL = "tinfl" TINFL = "tinfl"
TINFL_FUNCTIONS = [ TINFL_FUNCTIONS = [
binwalk.core.C.Function(name="is_deflated", type=int), binwalk.core.C.Function(name="is_deflated", type=int),
] ]
def __init__(self, module): def __init__(self, module):
self.tinfl = None self.tinfl = None
self.module = module self.module = module
# Only initialize this plugin if this is a signature scan # Only initialize this plugin if this is a signature scan
if module.name == 'Signature': if module.name == 'Signature':
# Load libtinfl.so # Load libtinfl.so
self.tinfl = binwalk.core.C.Library(self.TINFL, self.TINFL_FUNCTIONS) self.tinfl = binwalk.core.C.Library(self.TINFL, self.TINFL_FUNCTIONS)
def scan(self, result): def scan(self, result):
# If this result is a zlib signature match, try to decompress the data # If this result is a zlib signature match, try to decompress the data
if self.tinfl and result.file and result.description.lower().startswith('zlib'): if self.tinfl and result.file and result.description.lower().startswith('zlib'):
# Seek to and read the suspected zlib data # Seek to and read the suspected zlib data
fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE) fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
data = fd.read(self.MAX_DATA_SIZE) data = fd.read(self.MAX_DATA_SIZE)
fd.close() fd.close()
# Check if this is valid zlib data # Check if this is valid zlib data
decomp_size = self.tinfl.is_deflated(data, len(data), 1) decomp_size = self.tinfl.is_deflated(data, len(data), 1)
if decomp_size > 0: if decomp_size > 0:
result.description += ", uncompressed size >= %d" % decomp_size result.description += ", uncompressed size >= %d" % decomp_size
else: else:
result.valid = False result.valid = False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment