Commit a5217d62 by devttys0

s/\t/ /g

parent 84e83d0f
......@@ -6,126 +6,126 @@ import ctypes.util
from binwalk.core.compat import *
class Function(object):
Container class for defining library functions.
def __init__(self, **kwargs): = None
self.type = int
Container class for defining library functions.
def __init__(self, **kwargs): = None
self.type = int
for (k, v) in iterator(kwargs):
setattr(self, k, v)
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class FunctionHandler(object):
Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues.
bytes : ctypes.c_char_p,
str : ctypes.c_char_p,
int : ctypes.c_int,
float : ctypes.c_float,
bool : ctypes.c_int,
None : ctypes.c_int,
None : int,
int : int,
float : float,
bool : bool,
str : bytes2str,
bytes : str2bytes,
def __init__(self, library, function):
Class constructor.
@library - Library handle as returned by ctypes.cdll.LoadLibrary.
@function - An instance of the binwalk.core.C.Function class.
Returns None.
self.retype = function.type
self.function = getattr(library,
if has_key(self.PY2CTYPES, self.retype):
self.function.restype = self.PY2CTYPES[self.retype]
self.retval_converter = self.RETVAL_CONVERTERS[self.retype]
raise Exception("Unknown return type: '%s'" % self.retype)
def run(self, *args):
Executes the library function, handling Python 2/3 compatibility and properly converting the return type.
@*args - Library function arguments.
Returns the return value from the libraray function.
args = list(args)
# Python3 expects a bytes object for char *'s, not a str.
# This allows us to pass either, regardless of the Python version.
for i in range(0, len(args)):
if isinstance(args[i], str):
args[i] = str2bytes(args[i])
return self.retval_converter(self.function(*args))
Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues.
bytes : ctypes.c_char_p,
str : ctypes.c_char_p,
int : ctypes.c_int,
float : ctypes.c_float,
bool : ctypes.c_int,
None : ctypes.c_int,
None : int,
int : int,
float : float,
bool : bool,
str : bytes2str,
bytes : str2bytes,
def __init__(self, library, function):
Class constructor.
@library - Library handle as returned by ctypes.cdll.LoadLibrary.
@function - An instance of the binwalk.core.C.Function class.
Returns None.
self.retype = function.type
self.function = getattr(library,
if has_key(self.PY2CTYPES, self.retype):
self.function.restype = self.PY2CTYPES[self.retype]
self.retval_converter = self.RETVAL_CONVERTERS[self.retype]
raise Exception("Unknown return type: '%s'" % self.retype)
def run(self, *args):
Executes the library function, handling Python 2/3 compatibility and properly converting the return type.
@*args - Library function arguments.
Returns the return value from the libraray function.
args = list(args)
# Python3 expects a bytes object for char *'s, not a str.
# This allows us to pass either, regardless of the Python version.
for i in range(0, len(args)):
if isinstance(args[i], str):
args[i] = str2bytes(args[i])
return self.retval_converter(self.function(*args))
class Library(object):
Class for loading the specified library via ctypes.
Class for loading the specified library via ctypes.
def __init__(self, library, functions):
Class constructor.
def __init__(self, library, functions):
Class constructor.
@library - Library name (e.g., 'magic' for libmagic).
@functions - A dictionary of function names and their return types (e.g., {'magic_buffer' : str})
@library - Library name (e.g., 'magic' for libmagic).
@functions - A dictionary of function names and their return types (e.g., {'magic_buffer' : str})
Returns None.
self.library = ctypes.cdll.LoadLibrary(self.find_library(library))
if not self.library:
raise Exception("Failed to load library '%s'" % library)
Returns None.
self.library = ctypes.cdll.LoadLibrary(self.find_library(library))
if not self.library:
raise Exception("Failed to load library '%s'" % library)
for function in functions:
f = FunctionHandler(self.library, function)
for function in functions:
f = FunctionHandler(self.library, function)
def find_library(self, library):
Locates the specified library.
def find_library(self, library):
Locates the specified library.
@library - Library name (e.g., 'magic' for libmagic).
@library - Library name (e.g., 'magic' for libmagic).
Returns a string to be passed to ctypes.cdll.LoadLibrary.
lib_path = None
system_paths = {
'linux' : ['/usr/local/lib/' % library],
'linux2' : ['/usr/local/lib/' % library],
'linux3' : ['/usr/local/lib/' % library],
'darwin' : ['/opt/local/lib/lib%s.dylib' % library,
'/usr/local/lib/lib%s.dylib' % library,
] + glob.glob('/usr/local/Cellar/lib%s/*/lib/lib%s.dylib' % (library, library)),
'win32' : ['%s.dll' % library]
lib_path = ctypes.util.find_library(library)
if not lib_path:
for path in system_paths[sys.platform]:
if os.path.exists(path):
lib_path = path
if not lib_path:
raise Exception("Failed to locate library '%s'" % library)
return lib_path
Returns a string to be passed to ctypes.cdll.LoadLibrary.
lib_path = None
system_paths = {
'linux' : ['/usr/local/lib/' % library],
'linux2' : ['/usr/local/lib/' % library],
'linux3' : ['/usr/local/lib/' % library],
'darwin' : ['/opt/local/lib/lib%s.dylib' % library,
'/usr/local/lib/lib%s.dylib' % library,
] + glob.glob('/usr/local/Cellar/lib%s/*/lib/lib%s.dylib' % (library, library)),
'win32' : ['%s.dll' % library]
lib_path = ctypes.util.find_library(library)
if not lib_path:
for path in system_paths[sys.platform]:
if os.path.exists(path):
lib_path = path
if not lib_path:
raise Exception("Failed to locate library '%s'" % library)
return lib_path
......@@ -9,383 +9,383 @@ from binwalk.core.compat import *
# This allows other modules/scripts to subclass BlockFile from a custom class. Defaults to io.FileIO.
if has_key(__builtins__, 'BLOCK_FILE_PARENT_CLASS'):
def file_md5(file_name):
Generate an MD5 hash of the specified file.
@file_name - The file to hash.
Generate an MD5 hash of the specified file.
@file_name - The file to hash.
Returns an MD5 hex digest string.
md5 = hashlib.md5()
Returns an MD5 hex digest string.
md5 = hashlib.md5()
with open(file_name, 'rb') as f:
for chunk in iter(lambda:*md5.block_size), b''):
with open(file_name, 'rb') as f:
for chunk in iter(lambda:*md5.block_size), b''):
return md5.hexdigest()
return md5.hexdigest()
def file_size(filename):
Obtains the size of a given file.
@filename - Path to the file.
Returns the size of the file.
# Using open/lseek works on both regular files and block devices
fd =, os.O_RDONLY)
return os.lseek(fd, 0, os.SEEK_END)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
Obtains the size of a given file.
@filename - Path to the file.
Returns the size of the file.
# Using open/lseek works on both regular files and block devices
fd =, os.O_RDONLY)
return os.lseek(fd, 0, os.SEEK_END)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
def strip_quoted_strings(string):
Strips out data in between double quotes.
@string - String to strip.
Returns a sanitized string.
# This regex removes all quoted data from string.
# Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.
return re.sub(r'\"(.*)\"', "", string)
Strips out data in between double quotes.
@string - String to strip.
Returns a sanitized string.
# This regex removes all quoted data from string.
# Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.
return re.sub(r'\"(.*)\"', "", string)
def get_quoted_strings(string):
Returns a string comprised of all data in between double quotes.
@string - String to get quoted data from.
Returns a string of quoted data on success.
Returns a blank string if no quoted data is present.
# This regex grabs all quoted data from string.
# Note that this gets everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included.
return re.findall(r'\"(.*)\"', string)[0]
except KeyboardInterrupt as e:
raise e
except Exception:
return ''
Returns a string comprised of all data in between double quotes.
@string - String to get quoted data from.
Returns a string of quoted data on success.
Returns a blank string if no quoted data is present.
# This regex grabs all quoted data from string.
# Note that this gets everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included.
return re.findall(r'\"(.*)\"', string)[0]
except KeyboardInterrupt as e:
raise e
except Exception:
return ''
def unique_file_name(base_name, extension=''):
Creates a unique file name based on the specified base name.
Creates a unique file name based on the specified base name.
@base_name - The base name to use for the unique file name.
@extension - The file extension to use for the unique file name.
@base_name - The base name to use for the unique file name.
@extension - The file extension to use for the unique file name.
Returns a unique file string.
idcount = 0
if extension and not extension.startswith('.'):
extension = '.%s' % extension
Returns a unique file string.
idcount = 0
if extension and not extension.startswith('.'):
extension = '.%s' % extension
fname = base_name + extension
fname = base_name + extension
while os.path.exists(fname):
fname = "%s-%d%s" % (base_name, idcount, extension)
idcount += 1
while os.path.exists(fname):
fname = "%s-%d%s" % (base_name, idcount, extension)
idcount += 1
return fname
return fname
def strings(filename, minimum=4):
A strings generator, similar to the Unix strings utility.
@filename - The file to search for strings in.
@minimum - The minimum string length to search for.
Yeilds printable ASCII strings from filename.
result = ""
with BlockFile(filename) as f:
while True:
(data, dlen) = f.read_block()
if not data:
for c in data:
if c in string.printable:
result += c
elif len(result) >= minimum:
yield result
result = ""
result = ""
A strings generator, similar to the Unix strings utility.
@filename - The file to search for strings in.
@minimum - The minimum string length to search for.
Yeilds printable ASCII strings from filename.
result = ""
with BlockFile(filename) as f:
while True:
(data, dlen) = f.read_block()
if not data:
for c in data:
if c in string.printable:
result += c
elif len(result) >= minimum:
yield result
result = ""
result = ""
class MathExpression(object):
Class for safely evaluating mathematical expressions from a string.
Stolen from:
ast.Add: op.add,
ast.Sub: op.sub,
ast.Mult: op.mul,
ast.Div: op.truediv,
ast.Pow: op.pow,
ast.BitXor: op.xor
def __init__(self, expression):
self.expression = expression
self.value = None
if expression:
self.value = self.evaluate(self.expression)
except KeyboardInterrupt as e:
raise e
except Exception:
def evaluate(self, expr):
return self._eval(ast.parse(expr).body[0].value)
def _eval(self, node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.operator): # <operator>
return self.OPERATORS[type(node)]
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return self._eval(node.op)(self._eval(node.left), self._eval(node.right))
raise TypeError(node)
Class for safely evaluating mathematical expressions from a string.
Stolen from:
ast.Add: op.add,
ast.Sub: op.sub,
ast.Mult: op.mul,
ast.Div: op.truediv,
ast.Pow: op.pow,
ast.BitXor: op.xor
def __init__(self, expression):
self.expression = expression
self.value = None
if expression:
self.value = self.evaluate(self.expression)
except KeyboardInterrupt as e:
raise e
except Exception:
def evaluate(self, expr):
return self._eval(ast.parse(expr).body[0].value)
def _eval(self, node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.operator): # <operator>
return self.OPERATORS[type(node)]
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return self._eval(node.op)(self._eval(node.left), self._eval(node.right))
raise TypeError(node)
Abstraction class for accessing binary files.
This class overrides io.FilIO's read and write methods. This guaruntees two things:
1. All requested data will be read/written via the read and write methods.
2. All reads return a str object and all writes can accept either a str or a
bytes object, regardless of the Python interpreter version.
However, the downside is that other io.FileIO methods won't work properly in Python 3,
namely things that are wrappers around (e.g., readline, readlines, etc).
This class also provides a read_block method, which is used by binwalk to read in a
block of data, plus some additional data (MAX_TRAILING_SIZE), but on the next block read
pick up at the end of the previous data block (not the end of the additional data). This
is necessary for scans where a signature may span a block boundary.
The descision to force read to return a str object instead of a bytes object is questionable
for Python 3, it seemed the best way to abstract differences in Python 2/3 from the rest
of the code (especially for people writing plugins) and to add Python 3 support with
minimal code change.
# The MAX_TRAILING_SIZE limits the amount of data available to a signature.
# While most headers/signatures are far less than this value, some may reference
# pointers in the header structure which may point well beyond the header itself.
# Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time.
# Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks.
DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
Class constructor.
@fname - Path to the file to be opened.
@mode - Mode to open the file in (default: 'r').
@length - Maximum number of bytes to read from the file via self.block_read().
@offset - Offset at which to start reading from the file.
@block - Size of data block to read (excluding any trailing size),
@peek - Size of trailing data to append to the end of each block.
@swap - Swap every n bytes of data.
Returns None.
self.total_read = 0
self.swap_size = swap
self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
# Python 2.6 doesn't like modes like 'rb' or 'wb'
mode = mode.replace('b', '')
self.size = file_size(fname)
except KeyboardInterrupt as e:
raise e
except Exception:
self.size = 0
if offset < 0:
self.offset = self.size + offset
self.offset = offset
if self.offset < 0:
self.offset = 0
elif self.offset > self.size:
self.offset = self.size
if offset < 0:
self.length = offset * -1
elif length:
self.length = length
self.length = self.size - offset
if self.length < 0:
self.length = 0
elif self.length > self.size:
self.length = self.size
if block is not None:
self.block_read_size = block
self.base_block_size = self.block_read_size
if peek is not None:
self.block_peek_size = peek
self.base_peek_size = self.block_peek_size
super(self.__class__, self).__init__(fname, mode)
# Work around for python 2.6 where FileIO._name is not defined
except AttributeError:
self._name = fname
def _swap_data_block(self, block):
Reverses every self.swap_size bytes inside the specified data block.
Size of data block must be a multiple of self.swap_size.
@block - The data block to swap.
Returns a swapped string.
i = 0
data = ""
if self.swap_size > 0:
while i < len(block):
data += block[i:i+self.swap_size][::-1]
i += self.swap_size
data = block
return data
def reset(self):
self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
def set_block_size(self, block=None, peek=None):
if block is not None:
self.block_read_size = block
if peek is not None:
self.block_peek_size = peek
def write(self, data):
Writes data to the opened file.
io.FileIO.write does not guaruntee that all data will be written;
this method overrides io.FileIO.write and does guaruntee that all data will be written.
Returns the number of bytes written.
n = 0
l = len(data)
data = str2bytes(data)
while n < l:
n += super(self.__class__, self).write(data[n:])
return n
def read(self, n=-1):
Reads up to n bytes of data (or to EOF if n is not specified).
Will not read more than self.length bytes. does not guaruntee that all requested data will be read;
this method overrides and does guaruntee that all data will be read.
Returns a str object containing the read data.
l = 0
data = b''
if self.total_read < self.length:
# Don't read more than self.length bytes from the file
if (self.total_read + n) > self.length:
n = self.length - self.total_read
while n < 0 or l < n:
tmp = super(self.__class__, self).read(n-l)
if tmp:
data += tmp
l += len(tmp)
self.total_read += len(data)
return self._swap_data_block(bytes2str(data))
def peek(self, n=-1):
Peeks at data in file.
pos = self.tell()
data =
return data
def seek(self, n, whence=os.SEEK_SET):
if whence == os.SEEK_SET:
self.total_read = n - self.offset
elif whence == os.SEEK_CUR:
self.total_read += n
elif whence == os.SEEK_END:
self.total_read = self.size + n
super(self.__class__, self).seek(n, whence)
def read_block(self):
Reads in a block of data from the target file.
Returns a tuple of (str(file block data), block data length).
data =
dlen = len(data)
data += self.peek(self.block_peek_size)
return (data, dlen)
Abstraction class for accessing binary files.
This class overrides io.FilIO's read and write methods. This guaruntees two things:
1. All requested data will be read/written via the read and write methods.
2. All reads return a str object and all writes can accept either a str or a
bytes object, regardless of the Python interpreter version.
However, the downside is that other io.FileIO methods won't work properly in Python 3,
namely things that are wrappers around (e.g., readline, readlines, etc).
This class also provides a read_block method, which is used by binwalk to read in a
block of data, plus some additional data (MAX_TRAILING_SIZE), but on the next block read
pick up at the end of the previous data block (not the end of the additional data). This
is necessary for scans where a signature may span a block boundary.
The descision to force read to return a str object instead of a bytes object is questionable
for Python 3, it seemed the best way to abstract differences in Python 2/3 from the rest
of the code (especially for people writing plugins) and to add Python 3 support with
minimal code change.
# The MAX_TRAILING_SIZE limits the amount of data available to a signature.
# While most headers/signatures are far less than this value, some may reference
# pointers in the header structure which may point well beyond the header itself.
# Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time.
# Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks.
DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
Class constructor.
@fname - Path to the file to be opened.
@mode - Mode to open the file in (default: 'r').
@length - Maximum number of bytes to read from the file via self.block_read().
@offset - Offset at which to start reading from the file.
@block - Size of data block to read (excluding any trailing size),
@peek - Size of trailing data to append to the end of each block.
@swap - Swap every n bytes of data.
Returns None.
self.total_read = 0
self.swap_size = swap
self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
# Python 2.6 doesn't like modes like 'rb' or 'wb'
mode = mode.replace('b', '')
self.size = file_size(fname)
except KeyboardInterrupt as e:
raise e
except Exception:
self.size = 0
if offset < 0:
self.offset = self.size + offset
self.offset = offset
if self.offset < 0:
self.offset = 0
elif self.offset > self.size:
self.offset = self.size
if offset < 0:
self.length = offset * -1
elif length:
self.length = length
self.length = self.size - offset
if self.length < 0:
self.length = 0
elif self.length > self.size:
self.length = self.size
if block is not None:
self.block_read_size = block
self.base_block_size = self.block_read_size
if peek is not None:
self.block_peek_size = peek
self.base_peek_size = self.block_peek_size
super(self.__class__, self).__init__(fname, mode)
# Work around for python 2.6 where FileIO._name is not defined
except AttributeError:
self._name = fname
def _swap_data_block(self, block):
Reverses every self.swap_size bytes inside the specified data block.
Size of data block must be a multiple of self.swap_size.
@block - The data block to swap.
Returns a swapped string.
i = 0
data = ""
if self.swap_size > 0:
while i < len(block):
data += block[i:i+self.swap_size][::-1]
i += self.swap_size
data = block
return data
def reset(self):
self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
def set_block_size(self, block=None, peek=None):
if block is not None:
self.block_read_size = block
if peek is not None:
self.block_peek_size = peek
def write(self, data):
Writes data to the opened file.
io.FileIO.write does not guaruntee that all data will be written;
this method overrides io.FileIO.write and does guaruntee that all data will be written.
Returns the number of bytes written.
n = 0
l = len(data)
data = str2bytes(data)
while n < l:
n += super(self.__class__, self).write(data[n:])
return n
def read(self, n=-1):
Reads up to n bytes of data (or to EOF if n is not specified).
Will not read more than self.length bytes. does not guaruntee that all requested data will be read;
this method overrides and does guaruntee that all data will be read.
Returns a str object containing the read data.
l = 0
data = b''
if self.total_read < self.length:
# Don't read more than self.length bytes from the file
if (self.total_read + n) > self.length:
n = self.length - self.total_read
while n < 0 or l < n:
tmp = super(self.__class__, self).read(n-l)
if tmp:
data += tmp
l += len(tmp)
self.total_read += len(data)
return self._swap_data_block(bytes2str(data))
def peek(self, n=-1):
Peeks at data in file.
pos = self.tell()
data =
return data
def seek(self, n, whence=os.SEEK_SET):
if whence == os.SEEK_SET:
self.total_read = n - self.offset
elif whence == os.SEEK_CUR:
self.total_read += n
elif whence == os.SEEK_END:
self.total_read = self.size + n
super(self.__class__, self).seek(n, whence)
def read_block(self):
Reads in a block of data from the target file.
Returns a tuple of (str(file block data), block data length).
data =
dlen = len(data)
data += self.peek(self.block_peek_size)
return (data, dlen)
......@@ -7,68 +7,68 @@ import string
PY_MAJOR_VERSION = sys.version_info[0]
string.letters = string.ascii_letters
string.letters = string.ascii_letters
def iterator(dictionary):
For cross compatibility between Python 2 and Python 3 dictionaries.
return dictionary.items()
return dictionary.iteritems()
For cross compatibility between Python 2 and Python 3 dictionaries.
return dictionary.items()
return dictionary.iteritems()
def has_key(dictionary, key):
For cross compatibility between Python 2 and Python 3 dictionaries.
return key in dictionary
return dictionary.has_key(key)
For cross compatibility between Python 2 and Python 3 dictionaries.
return key in dictionary
return dictionary.has_key(key)
def get_keys(dictionary):
For cross compatibility between Python 2 and Python 3 dictionaries.
return list(dictionary.keys())
return dictionary.keys()
For cross compatibility between Python 2 and Python 3 dictionaries.
return list(dictionary.keys())
return dictionary.keys()
def str2bytes(string):
For cross compatibility between Python 2 and Python 3 strings.
if isinstance(string, type('')) and PY_MAJOR_VERSION > 2:
return bytes(string, 'latin1')
return string
For cross compatibility between Python 2 and Python 3 strings.
if isinstance(string, type('')) and PY_MAJOR_VERSION > 2:
return bytes(string, 'latin1')
return string
def bytes2str(bs):
For cross compatibility between Python 2 and Python 3 strings.
if isinstance(bs, type(b'')) and PY_MAJOR_VERSION > 2:
return bs.decode('latin1')
return bs
For cross compatibility between Python 2 and Python 3 strings.
if isinstance(bs, type(b'')) and PY_MAJOR_VERSION > 2:
return bs.decode('latin1')
return bs
def string_decode(string):
For cross compatibility between Python 2 and Python 3 strings.
return bytes(string, 'utf-8').decode('unicode_escape')
return string.decode('string_escape')
For cross compatibility between Python 2 and Python 3 strings.
return bytes(string, 'utf-8').decode('unicode_escape')
return string.decode('string_escape')
def user_input(prompt=''):
For getting raw user input in Python 2 and 3.
return input(prompt)
return raw_input(prompt)
For getting raw user input in Python 2 and 3.
return input(prompt)
return raw_input(prompt)
......@@ -6,171 +6,171 @@ from binwalk.core.compat import *
class Display(object):
def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None):
self.quiet = quiet
self.filter = filter
self.verbose = verbose
self.fit_to_screen = fit_to_screen
self.fp = None
self.csv = None
self.num_columns = 0
self.custom_verbose_format = ""
self.custom_verbose_args = []
if log:
self.fp = open(log, "w")
if csv:
self.csv = pycsv.writer(self.fp)
def format_strings(self, header, result):
self.result_format = result
self.header_format = header
if self.num_columns == 0:
self.num_columns = len(header.split())
def log(self, fmt, columns):
if self.fp:
if self.csv:
self.fp.write(fmt % tuple(columns))
def add_custom_header(self, fmt, args):
self.custom_verbose_format = fmt
self.custom_verbose_args = args
def header(self, *args, **kwargs):
file_name = None
self.num_columns = len(args)
if has_key(kwargs, 'file_name'):
file_name = kwargs['file_name']
if self.verbose and file_name:
md5sum = binwalk.core.common.file_md5(file_name)
timestamp ="%Y-%m-%d %H:%M:%S")
if self.csv:
self.log("", ["FILE", "MD5SUM", "TIMESTAMP"])
self.log("", [file_name, md5sum, timestamp])
self._fprint("%s", "\n", csv=False)
self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False)
self._fprint("Target File: %s\n", [file_name], csv=False, filter=False)
self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False)
if self.custom_verbose_format and self.custom_verbose_args:
self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False)
self._fprint("%s", "\n", csv=False, filter=False)
self._fprint(self.header_format, args, filter=False)
self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False)
def result(self, *args):
# Convert to list for item assignment
args = list(args)
# Replace multiple spaces with single spaces. This is to prevent accidentally putting
# four spaces in the description string, which would break auto-formatting.
for i in range(len(args)):
if isinstance(args[i], str):
while " " in args[i]:
args[i] = args[i].replace(" " , " ")
self._fprint(self.result_format, tuple(args))
def footer(self):
self._fprint("%s", "\n", csv=False, filter=False)
def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True):
line = fmt % tuple(columns)
if not filter or self.filter.valid_result(line):
if not self.quiet and stdout:
sys.stdout.write(self._format_line(line.strip()) + "\n")
if self.fp and not (self.csv and not csv):
self.log(fmt, columns)
def _append_to_data_parts(self, data, start, end):
Intelligently appends data to self.string_parts.
For use by self._format.
while data[start] == ' ':
start += 1
if start == end:
end = len(data[start:])
except KeyboardInterrupt as e:
raise e
except Exception:
except KeyboardInterrupt as e:
raise e
except Exception:
return start
def _format_line(self, line):
Formats a line of text to fit in the terminal window.
For Tim.
offset = 0
space_offset = 0
self.string_parts = []
delim = '\n'
if self.fit_to_screen and len(line) > self.SCREEN_WIDTH:
line_columns = line.split(None, self.num_columns-1)
if line_columns:
delim = '\n' + ' ' * line.rfind(line_columns[-1])
while len(line[offset:]) > self.SCREEN_WIDTH:
space_offset = line[offset:offset+self.HEADER_WIDTH].rfind(' ')
if space_offset == -1 or space_offset == 0:
space_offset = self.SCREEN_WIDTH
self._append_to_data_parts(line, offset, offset+space_offset)
offset += space_offset
self._append_to_data_parts(line, offset, offset+len(line[offset:]))
return delim.join(self.string_parts)
def _configure_formatting(self):
Configures output formatting, and fitting output to the current terminal width.
Returns None.
self.format_strings(self.DEFAULT_FORMAT, self.DEFAULT_FORMAT)
if self.fit_to_screen:
import fcntl
import struct
import termios
# Get the terminal window width
hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1]
except KeyboardInterrupt as e:
raise e
except Exception:
def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None):
self.quiet = quiet
self.filter = filter
self.verbose = verbose
self.fit_to_screen = fit_to_screen
self.fp = None
self.csv = None
self.num_columns = 0
self.custom_verbose_format = ""
self.custom_verbose_args = []
if log:
self.fp = open(log, "w")
if csv:
self.csv = pycsv.writer(self.fp)
def format_strings(self, header, result):
self.result_format = result
self.header_format = header
if self.num_columns == 0:
self.num_columns = len(header.split())
def log(self, fmt, columns):
if self.fp:
if self.csv:
self.fp.write(fmt % tuple(columns))
def add_custom_header(self, fmt, args):
self.custom_verbose_format = fmt
self.custom_verbose_args = args
def header(self, *args, **kwargs):
file_name = None
self.num_columns = len(args)
if has_key(kwargs, 'file_name'):
file_name = kwargs['file_name']
if self.verbose and file_name:
md5sum = binwalk.core.common.file_md5(file_name)
timestamp ="%Y-%m-%d %H:%M:%S")
if self.csv:
self.log("", ["FILE", "MD5SUM", "TIMESTAMP"])
self.log("", [file_name, md5sum, timestamp])
self._fprint("%s", "\n", csv=False)
self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False)
self._fprint("Target File: %s\n", [file_name], csv=False, filter=False)
self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False)
if self.custom_verbose_format and self.custom_verbose_args:
self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False)
self._fprint("%s", "\n", csv=False, filter=False)
self._fprint(self.header_format, args, filter=False)
self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False)
def result(self, *args):
# Convert to list for item assignment
args = list(args)
# Replace multiple spaces with single spaces. This is to prevent accidentally putting
# four spaces in the description string, which would break auto-formatting.
for i in range(len(args)):
if isinstance(args[i], str):
while " " in args[i]:
args[i] = args[i].replace(" " , " ")
self._fprint(self.result_format, tuple(args))
def footer(self):
self._fprint("%s", "\n", csv=False, filter=False)
def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True):
line = fmt % tuple(columns)
if not filter or self.filter.valid_result(line):
if not self.quiet and stdout:
sys.stdout.write(self._format_line(line.strip()) + "\n")
if self.fp and not (self.csv and not csv):
self.log(fmt, columns)
def _append_to_data_parts(self, data, start, end):
Intelligently appends data to self.string_parts.
For use by self._format.
while data[start] == ' ':
start += 1
if start == end:
end = len(data[start:])
except KeyboardInterrupt as e:
raise e
except Exception:
except KeyboardInterrupt as e:
raise e
except Exception:
return start
def _format_line(self, line):
Formats a line of text to fit in the terminal window.
For Tim.
offset = 0
space_offset = 0
self.string_parts = []
delim = '\n'
if self.fit_to_screen and len(line) > self.SCREEN_WIDTH:
line_columns = line.split(None, self.num_columns-1)
if line_columns:
delim = '\n' + ' ' * line.rfind(line_columns[-1])
while len(line[offset:]) > self.SCREEN_WIDTH:
space_offset = line[offset:offset+self.HEADER_WIDTH].rfind(' ')
if space_offset == -1 or space_offset == 0:
space_offset = self.SCREEN_WIDTH
self._append_to_data_parts(line, offset, offset+space_offset)
offset += space_offset
self._append_to_data_parts(line, offset, offset+len(line[offset:]))
return delim.join(self.string_parts)
def _configure_formatting(self):
Configures output formatting, and fitting output to the current terminal width.
Returns None.
self.format_strings(self.DEFAULT_FORMAT, self.DEFAULT_FORMAT)
if self.fit_to_screen:
import fcntl
import struct
import termios
# Get the terminal window width
hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1]
except KeyboardInterrupt as e:
raise e
except Exception:
......@@ -5,205 +5,205 @@ from binwalk.core.compat import *
class FilterType(object):
def __init__(self, **kwargs):
self.type = None
self.filter = None
self.regex = None
def __init__(self, **kwargs):
self.type = None
self.filter = None
self.regex = None
for (k,v) in iterator(kwargs):
setattr(self, k, v)
for (k,v) in iterator(kwargs):
setattr(self, k, v)
if self.regex is None:
self.regex = re.compile(self.filter)
if self.regex is None:
self.regex = re.compile(self.filter)
class FilterInclude(FilterType):
def __init__(self, **kwargs):
super(FilterInclude, self).__init__(**kwargs)
self.type = self.FILTER_INCLUDE
def __init__(self, **kwargs):
super(FilterInclude, self).__init__(**kwargs)
self.type = self.FILTER_INCLUDE
class FilterExclude(FilterType):
def __init__(self, **kwargs):
super(FilterExclude, self).__init__(**kwargs)
self.type = self.FILTER_EXCLUDE
def __init__(self, **kwargs):
super(FilterExclude, self).__init__(**kwargs)
self.type = self.FILTER_EXCLUDE
class Filter(object):
Class to filter results based on include/exclude rules and false positive detection.
An instance of this class is available via the Binwalk.filter object.
Note that all filter strings should be in lower case.
# If the result returned by libmagic is "data" or contains the text
# 'invalid' or a backslash are known to be invalid/false positives.
DATA_RESULT = "data"
INVALID_RESULTS = ["invalid", "\\"]
INVALID_RESULT = "invalid"
def __init__(self, show_invalid_results=False):
Class constructor.
@show_invalid_results - Set to True to display results marked as invalid.
Returns None.
self.filters = []
self.grep_filters = []
self.show_invalid_results = show_invalid_results
self.exclusive_filter = False = Signature(self)
def include(self, match, exclusive=True):
Adds a new filter which explicitly includes results that contain
the specified matching text.
@match - Regex, or list of regexs, to match.
@exclusive - If True, then results that do not explicitly contain
a FILTER_INCLUDE match will be excluded. If False,
signatures that contain the FILTER_INCLUDE match will
be included in the scan, but will not cause non-matching
results to be excluded.
Returns None.
if not isinstance(match, type([])):
matches = [match]
matches = match
for m in matches:
if m:
if exclusive and not self.exclusive_filter:
self.exclusive_filter = True
def exclude(self, match):
Adds a new filter which explicitly excludes results that contain
the specified matching text.
@match - Regex, or list of regexs, to match.
Returns None.
if not isinstance(match, type([])):
matches = [match]
matches = match
for m in matches:
if m:
def filter(self, data):
Checks to see if a given string should be excluded from or included in the results.
Called internally by Binwalk.scan().
@data - String to check.
Returns FILTER_INCLUDE if the string should be included.
Returns FILTER_EXCLUDE if the string should be excluded.
data = data.lower()
# Loop through the filters to see if any of them are a match.
# If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE).
for f in self.filters:
return f.type
# If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE.
if self.exclusive_filter:
return FilterType.FILTER_EXCLUDE
return FilterType.FILTER_INCLUDE
def valid_result(self, data):
Checks if the given string contains invalid data.
@data - String to validate.
Returns True if data is valid, False if invalid.
# A result of 'data' is never ever valid (for libmagic results)
if data == self.DATA_RESULT:
return False
# Make sure this result wasn't filtered
if self.filter(data) == FilterType.FILTER_EXCLUDE:
return False
# If showing invalid results, just return True without further checking.
if self.show_invalid_results:
return True
# Don't include quoted strings or keyword arguments in this search, as
# strings from the target file may legitimately contain the INVALID_RESULT text.
if self.INVALID_RESULT in common.strip_quoted_strings(
return False
# There should be no non-printable characters in any of the data
if self.NON_PRINTABLE_RESULT in data:
return False
return True
def grep(self, data=None, filters=[]):
Add or check case-insensitive grep filters against the supplied data string.
@data - Data string to check grep filters against. Not required if filters is specified.
@filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified.
Returns None if data is not specified.
If data is specified, returns True if the data contains a grep filter, or if no grep filters exist.
If data is specified, returns False if the data does not contain any grep filters.
# Add any specified filters to self.grep_filters
if filters:
if not isinstance(filters, type([])):
gfilters = [filters]
gfilters = filters
for gfilter in gfilters:
# Filters are case insensitive
# Check the data against all grep filters until one is found
if data is not None:
# If no grep filters have been created, always return True
if not self.grep_filters:
return True
# Filters are case insensitive
data = data.lower()
# If a filter exists in data, return True
for gfilter in self.grep_filters:
return True
# Else, return False
return False
return None
def clear(self):
Clears all include, exclude and grep filters.
Retruns None.
self.filters = []
self.grep_filters = []
Class to filter results based on include/exclude rules and false positive detection.
An instance of this class is available via the Binwalk.filter object.
Note that all filter strings should be in lower case.
# If the result returned by libmagic is "data" or contains the text
# 'invalid' or a backslash are known to be invalid/false positives.
DATA_RESULT = "data"
INVALID_RESULTS = ["invalid", "\\"]
INVALID_RESULT = "invalid"
def __init__(self, show_invalid_results=False):
Class constructor.
@show_invalid_results - Set to True to display results marked as invalid.
Returns None.
self.filters = []
self.grep_filters = []
self.show_invalid_results = show_invalid_results
self.exclusive_filter = False = Signature(self)
def include(self, match, exclusive=True):
Adds a new filter which explicitly includes results that contain
the specified matching text.
@match - Regex, or list of regexs, to match.
@exclusive - If True, then results that do not explicitly contain
a FILTER_INCLUDE match will be excluded. If False,
signatures that contain the FILTER_INCLUDE match will
be included in the scan, but will not cause non-matching
results to be excluded.
Returns None.
if not isinstance(match, type([])):
matches = [match]
matches = match
for m in matches:
if m:
if exclusive and not self.exclusive_filter:
self.exclusive_filter = True
def exclude(self, match):
Adds a new filter which explicitly excludes results that contain
the specified matching text.
@match - Regex, or list of regexs, to match.
Returns None.
if not isinstance(match, type([])):
matches = [match]
matches = match
for m in matches:
if m:
def filter(self, data):
Checks to see if a given string should be excluded from or included in the results.
Called internally by Binwalk.scan().
@data - String to check.
Returns FILTER_INCLUDE if the string should be included.
Returns FILTER_EXCLUDE if the string should be excluded.
data = data.lower()
# Loop through the filters to see if any of them are a match.
# If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE).
for f in self.filters:
return f.type
# If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE.
if self.exclusive_filter:
return FilterType.FILTER_EXCLUDE
return FilterType.FILTER_INCLUDE
def valid_result(self, data):
Checks if the given string contains invalid data.
@data - String to validate.
Returns True if data is valid, False if invalid.
# A result of 'data' is never ever valid (for libmagic results)
if data == self.DATA_RESULT:
return False
# Make sure this result wasn't filtered
if self.filter(data) == FilterType.FILTER_EXCLUDE:
return False
# If showing invalid results, just return True without further checking.
if self.show_invalid_results:
return True
# Don't include quoted strings or keyword arguments in this search, as
# strings from the target file may legitimately contain the INVALID_RESULT text.
if self.INVALID_RESULT in common.strip_quoted_strings(
return False
# There should be no non-printable characters in any of the data
if self.NON_PRINTABLE_RESULT in data:
return False
return True
def grep(self, data=None, filters=[]):
Add or check case-insensitive grep filters against the supplied data string.
@data - Data string to check grep filters against. Not required if filters is specified.
@filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified.
Returns None if data is not specified.
If data is specified, returns True if the data contains a grep filter, or if no grep filters exist.
If data is specified, returns False if the data does not contain any grep filters.
# Add any specified filters to self.grep_filters
if filters:
if not isinstance(filters, type([])):
gfilters = [filters]
gfilters = filters
for gfilter in gfilters:
# Filters are case insensitive
# Check the data against all grep filters until one is found
if data is not None:
# If no grep filters have been created, always return True
if not self.grep_filters:
return True
# Filters are case insensitive
data = data.lower()
# If a filter exists in data, return True
for gfilter in self.grep_filters:
return True
# Else, return False
return False
return None
def clear(self):
Clears all include, exclude and grep filters.
Retruns None.
self.filters = []
self.grep_filters = []
......@@ -10,777 +10,777 @@ import binwalk.core.plugin
from binwalk.core.compat import *
class Option(object):
A container class that allows modules to declare command line options.
def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=None):
Class constructor.
@kwargs - A dictionary of kwarg key-value pairs affected by this command line option.
@priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options.
@description - A description to be displayed in the help output.
@short - The short option to use (optional).
@long - The long option to use (if None, this option will not be displayed in help output).
@type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.core.common.BlockFile, list, str, int, float).
@dtype - The displayed accepted type string, to be shown in help output.
Returns None.
self.kwargs = kwargs
self.priority = priority
self.description = description
self.short = short
self.long = long
self.type = type
self.dtype = dtype
if not self.dtype and self.type:
if self.type in [io.FileIO, argparse.FileType, binwalk.core.common.BlockFile]:
self.dtype = 'file'
elif self.type in [int, float, str]:
self.dtype = self.type.__name__
self.dtype = str.__name__
A container class that allows modules to declare command line options.
def __init__(self, kwargs={}, priority=0, description="", short="", long="", type=None, dtype=None):
Class constructor.
@kwargs - A dictionary of kwarg key-value pairs affected by this command line option.
@priority - A value from 0 to 100. Higher priorities will override kwarg values set by lower priority options.
@description - A description to be displayed in the help output.
@short - The short option to use (optional).
@long - The long option to use (if None, this option will not be displayed in help output).
@type - The accepted data type (one of: io.FileIO/argparse.FileType/binwalk.core.common.BlockFile, list, str, int, float).
@dtype - The displayed accepted type string, to be shown in help output.
Returns None.
self.kwargs = kwargs
self.priority = priority
self.description = description
self.short = short
self.long = long
self.type = type
self.dtype = dtype
if not self.dtype and self.type:
if self.type in [io.FileIO, argparse.FileType, binwalk.core.common.BlockFile]:
self.dtype = 'file'
elif self.type in [int, float, str]:
self.dtype = self.type.__name__
self.dtype = str.__name__
class Kwarg(object):
A container class allowing modules to specify their expected __init__ kwarg(s).
def __init__(self, name="", default=None, description=""):
Class constructor.
@name - Kwarg name.
@default - Default kwarg value.
@description - Description string.
Return None.
''' = name
self.default = default
self.description = description
A container class allowing modules to specify their expected __init__ kwarg(s).
def __init__(self, name="", default=None, description=""):
Class constructor.
@name - Kwarg name.
@default - Default kwarg value.
@description - Description string.
Return None.
''' = name
self.default = default
self.description = description
class Dependency(object):
A container class for declaring module dependencies.
A container class for declaring module dependencies.
def __init__(self, attribute="", name="", kwargs={}):
self.attribute = attribute = name
self.kwargs = kwargs
self.module = None
def __init__(self, attribute="", name="", kwargs={}):
self.attribute = attribute = name
self.kwargs = kwargs
self.module = None
class Result(object):
Generic class for storing and accessing scan results.
def __init__(self, **kwargs):
Class constructor.
@offset - The file offset of the result.
@size - Size of the result, if known.
@description - The result description, as displayed to the user.
@module - Name of the module that generated the result.
@file - The file object of the scanned file.
@valid - Set to True if the result if value, False if invalid.
@display - Set to True to display the result to the user, False to hide it.
@extract - Set to True to flag this result for extraction.
@plot - Set to Flase to exclude this result from entropy plots.
@name - Name of the result found (None if not applicable or unknown).
Provide additional kwargs as necessary.
Returns None.
self.offset = 0
self.size = 0
self.description = ''
self.module = ''
self.file = None
self.valid = True
self.display = True
self.extract = True
self.plot = True = None
for (k, v) in iterator(kwargs):
setattr(self, k, v)
Generic class for storing and accessing scan results.
def __init__(self, **kwargs):
Class constructor.
@offset - The file offset of the result.
@size - Size of the result, if known.
@description - The result description, as displayed to the user.
@module - Name of the module that generated the result.
@file - The file object of the scanned file.
@valid - Set to True if the result if value, False if invalid.
@display - Set to True to display the result to the user, False to hide it.
@extract - Set to True to flag this result for extraction.
@plot - Set to Flase to exclude this result from entropy plots.
@name - Name of the result found (None if not applicable or unknown).
Provide additional kwargs as necessary.
Returns None.
self.offset = 0
self.size = 0
self.description = ''
self.module = ''
self.file = None
self.valid = True
self.display = True
self.extract = True
self.plot = True = None
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class Error(Result):
A subclass of binwalk.core.module.Result.
def __init__(self, **kwargs):
Accepts all the same kwargs as binwalk.core.module.Result, but the following are also added:
A subclass of binwalk.core.module.Result.
def __init__(self, **kwargs):
Accepts all the same kwargs as binwalk.core.module.Result, but the following are also added:
@exception - In case of an exception, this is the exception object.
@exception - In case of an exception, this is the exception object.
Returns None.
self.exception = None
Result.__init__(self, **kwargs)
Returns None.
self.exception = None
Result.__init__(self, **kwargs)
class Module(object):
All module classes must be subclassed from this.
# The module title, as displayed in help output
TITLE = ""
# A list of binwalk.core.module.Option command line options
CLI = []
# A list of binwalk.core.module.Kwargs accepted by __init__
# A list of default dependencies for all modules; do not override this unless you
# understand the consequences of doing so.
# A list of dependencies that can be filled in as needed by each individual module.
# Format string for printing the header during a scan.
# Must be set prior to calling self.header.
HEADER_FORMAT = "%-12s %-12s %s\n"
# Format string for printing each result during a scan.
# Must be set prior to calling self.result.
RESULT_FORMAT = "%-12d 0x%-12X %s\n"
# Format string for printing custom information in the verbose header output.
# Must be set prior to calling self.header.
# The header to print during a scan.
# Set to None to not print a header.
# Note that this will be formatted per the HEADER_FORMAT format string.
# Must be set prior to calling self.header.
# The Result attribute names to print during a scan, as provided to the self.results method.
# Set to None to not print any results.
# Note that these will be formatted per the RESULT_FORMAT format string.
# Must be set prior to calling self.result.
RESULT = ["offset", "offset", "description"]
# The custom data to print in the verbose header output.
# Note that these will be formatted per the VERBOSE_FORMAT format string.
# Must be set prior to calling self.header.
# If set to True, the progress status will be automatically updated for each result
# containing valid file and offset attributes.
# Modules with higher priorities are executed first
# Modules with a higher order are displayed first in help output
# Set to False if this is not a primary module (e.g., General, Extractor modules)
def __init__(self, **kwargs):
self.errors = []
self.results = []
self.target_file_list = []
self.status = None
self.enabled = False
self.current_target_file_name = None = self.__class__.__name__
self.plugins = binwalk.core.plugin.Plugins(self)
self.dependencies = self.DEFAULT_DEPENDS + self.DEPENDS
process_kwargs(self, kwargs)
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.target_file_list = list(self.config.target_files)
except AttributeError as e:
def __del__(self):
return None
def __enter__(self):
return self
def __exit__(self, x, z, y):
return None
def load(self):
Invoked at module load time.
May be overridden by the module sub-class.
return None
def reset(self):
Invoked only for dependency modules immediately prior to starting a new primary module.
return None
def init(self):
Invoked prior to
May be overridden by the module sub-class.
Returns None.
return None
def run(self):
Executes the main module routine.
Must be overridden by the module sub-class.
Returns True on success, False on failure.
return False
def callback(self, r):
Processes the result from all modules. Called for all dependency modules when a valid result is found.
@r - The result, an instance of binwalk.core.module.Result.
Returns None.
return None
def validate(self, r):
Validates the result.
May be overridden by the module sub-class.
@r - The result, an instance of binwalk.core.module.Result.
Returns None.
r.valid = True
return None
def _plugins_pre_scan(self):
def _plugins_post_scan(self):
def _plugins_result(self, r):
def _build_display_args(self, r):
args = []
if self.RESULT:
if type(self.RESULT) != type([]):
result = [self.RESULT]
result = self.RESULT
for name in result:
args.append(getattr(r, name))
return args
def next_file(self):
Gets the next file to be scanned (including pending extracted files, if applicable).
Also re/initializes self.status.
All modules should access the target file list through this method.
fp = None
# Add any pending extracted files to the target_files list and reset the extractor's pending file list
self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending]
self.extractor.pending = []
if self.target_file_list:
fp = self.target_file_list.pop(0)
self.status.clear() = fp.length
if fp is not None:
self.current_target_file_name =
self.current_target_file_name = None
return fp
def clear(self, results=True, errors=True):
Clears results and errors lists.
if results:
self.results = []
if errors:
self.errors = []
def result(self, r=None, **kwargs):
Validates a result, stores it in self.results and prints it.
Accepts the same kwargs as the binwalk.core.module.Result class.
@r - An existing instance of binwalk.core.module.Result.
Returns an instance of binwalk.core.module.Result.
if r is None:
r = Result(**kwargs)
r.module = self.__class__.__name__
# Any module that is reporting results, valid or not, should be marked as enabled
if not self.enabled:
self.enabled = True
for dependency in self.dependencies:
getattr(self, dependency.attribute).callback(r)
except AttributeError:
if r.valid:
# Update the progress status automatically if it is not being done manually by the module
if r.offset and r.file and self.AUTO_UPDATE_STATUS: = r.file.length
self.status.completed = r.offset
if r.display:
display_args = self._build_display_args(r)
if display_args:
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
return r
def error(self, **kwargs):
Stores the specified error in self.errors.
Accepts the same kwargs as the binwalk.core.module.Error class.
Returns None.
exception_header_width = 100
e = Error(**kwargs)
e.module = self.__class__.__name__
if e.exception:
sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n")
sys.stderr.write("-" * exception_header_width + "\n")
sys.stderr.write("-" * exception_header_width + "\n\n")
elif e.description:
sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n")
def header(self):
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE)
if type(self.HEADER) == type([]):
self.config.display.header(*self.HEADER, file_name=self.current_target_file_name)
elif self.HEADER:
self.config.display.header(self.HEADER, file_name=self.current_target_file_name)
def footer(self):
def main(self, parent):
Responsible for calling self.init, initializing self.config.display, and calling
Returns the value returned from
self.status = parent.status
self.modules = parent.loaded_modules
# Reset all dependency modules
for dependency in self.dependencies:
if hasattr(self, dependency.attribute):
getattr(self, dependency.attribute).reset()
except KeyboardInterrupt as e:
raise e
except Exception as e:
return False
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
except KeyboardInterrupt as e:
raise e
except Exception as e:
return False
retval =
except KeyboardInterrupt as e:
raise e
except Exception as e:
return False
return retval
All module classes must be subclassed from this.
# The module title, as displayed in help output
TITLE = ""
# A list of binwalk.core.module.Option command line options
CLI = []
# A list of binwalk.core.module.Kwargs accepted by __init__
# A list of default dependencies for all modules; do not override this unless you
# understand the consequences of doing so.
# A list of dependencies that can be filled in as needed by each individual module.
# Format string for printing the header during a scan.
# Must be set prior to calling self.header.
HEADER_FORMAT = "%-12s %-12s %s\n"
# Format string for printing each result during a scan.
# Must be set prior to calling self.result.
RESULT_FORMAT = "%-12d 0x%-12X %s\n"
# Format string for printing custom information in the verbose header output.
# Must be set prior to calling self.header.
# The header to print during a scan.
# Set to None to not print a header.
# Note that this will be formatted per the HEADER_FORMAT format string.
# Must be set prior to calling self.header.
# The Result attribute names to print during a scan, as provided to the self.results method.
# Set to None to not print any results.
# Note that these will be formatted per the RESULT_FORMAT format string.
# Must be set prior to calling self.result.
RESULT = ["offset", "offset", "description"]
# The custom data to print in the verbose header output.
# Note that these will be formatted per the VERBOSE_FORMAT format string.
# Must be set prior to calling self.header.
# If set to True, the progress status will be automatically updated for each result
# containing valid file and offset attributes.
# Modules with higher priorities are executed first
# Modules with a higher order are displayed first in help output
# Set to False if this is not a primary module (e.g., General, Extractor modules)
def __init__(self, **kwargs):
self.errors = []
self.results = []
self.target_file_list = []
self.status = None
self.enabled = False
self.current_target_file_name = None = self.__class__.__name__
self.plugins = binwalk.core.plugin.Plugins(self)
self.dependencies = self.DEFAULT_DEPENDS + self.DEPENDS
process_kwargs(self, kwargs)
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.target_file_list = list(self.config.target_files)
except AttributeError as e:
def __del__(self):
return None
def __enter__(self):
return self
def __exit__(self, x, z, y):
return None
def load(self):
Invoked at module load time.
May be overridden by the module sub-class.
return None
def reset(self):
Invoked only for dependency modules immediately prior to starting a new primary module.
return None
def init(self):
Invoked prior to
May be overridden by the module sub-class.
Returns None.
return None
def run(self):
Executes the main module routine.
Must be overridden by the module sub-class.
Returns True on success, False on failure.
return False
def callback(self, r):
Processes the result from all modules. Called for all dependency modules when a valid result is found.
@r - The result, an instance of binwalk.core.module.Result.
Returns None.
return None
def validate(self, r):
Validates the result.
May be overridden by the module sub-class.
@r - The result, an instance of binwalk.core.module.Result.
Returns None.
r.valid = True
return None
def _plugins_pre_scan(self):
def _plugins_post_scan(self):
def _plugins_result(self, r):
def _build_display_args(self, r):
args = []
if self.RESULT:
if type(self.RESULT) != type([]):
result = [self.RESULT]
result = self.RESULT
for name in result:
args.append(getattr(r, name))
return args
def next_file(self):
Gets the next file to be scanned (including pending extracted files, if applicable).
Also re/initializes self.status.
All modules should access the target file list through this method.
fp = None
# Add any pending extracted files to the target_files list and reset the extractor's pending file list
self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending]
self.extractor.pending = []
if self.target_file_list:
fp = self.target_file_list.pop(0)
self.status.clear() = fp.length
if fp is not None:
self.current_target_file_name =
self.current_target_file_name = None
return fp
def clear(self, results=True, errors=True):
Clears results and errors lists.
if results:
self.results = []
if errors:
self.errors = []
def result(self, r=None, **kwargs):
Validates a result, stores it in self.results and prints it.
Accepts the same kwargs as the binwalk.core.module.Result class.
@r - An existing instance of binwalk.core.module.Result.
Returns an instance of binwalk.core.module.Result.
if r is None:
r = Result(**kwargs)
r.module = self.__class__.__name__
# Any module that is reporting results, valid or not, should be marked as enabled
if not self.enabled:
self.enabled = True
for dependency in self.dependencies:
getattr(self, dependency.attribute).callback(r)
except AttributeError:
if r.valid:
# Update the progress status automatically if it is not being done manually by the module
if r.offset and r.file and self.AUTO_UPDATE_STATUS: = r.file.length
self.status.completed = r.offset
if r.display:
display_args = self._build_display_args(r)
if display_args:
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
return r
def error(self, **kwargs):
Stores the specified error in self.errors.
Accepts the same kwargs as the binwalk.core.module.Error class.
Returns None.
exception_header_width = 100
e = Error(**kwargs)
e.module = self.__class__.__name__
if e.exception:
sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n")
sys.stderr.write("-" * exception_header_width + "\n")
sys.stderr.write("-" * exception_header_width + "\n\n")
elif e.description:
sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n")
def header(self):
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE)
if type(self.HEADER) == type([]):
self.config.display.header(*self.HEADER, file_name=self.current_target_file_name)
elif self.HEADER:
self.config.display.header(self.HEADER, file_name=self.current_target_file_name)
def footer(self):
def main(self, parent):
Responsible for calling self.init, initializing self.config.display, and calling
Returns the value returned from
self.status = parent.status
self.modules = parent.loaded_modules
# Reset all dependency modules
for dependency in self.dependencies:
if hasattr(self, dependency.attribute):
getattr(self, dependency.attribute).reset()
except KeyboardInterrupt as e:
raise e
except Exception as e:
return False
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
except KeyboardInterrupt as e:
raise e
except Exception as e:
return False
retval =
except KeyboardInterrupt as e:
raise e
except Exception as e:
return False
return retval
class Status(object):
Class used for tracking module status (e.g., % complete).
Class used for tracking module status (e.g., % complete).
def __init__(self, **kwargs):
self.kwargs = kwargs
def __init__(self, **kwargs):
self.kwargs = kwargs
def clear(self):
for (k,v) in iterator(self.kwargs):
setattr(self, k, v)
def clear(self):
for (k,v) in iterator(self.kwargs):
setattr(self, k, v)
class ModuleException(Exception):
Module exception class.
Nothing special here except the name.
Module exception class.
Nothing special here except the name.
class Modules(object):
Main class used for running and managing modules.
def __init__(self, *argv, **kargv):
Class constructor.
@argv - List of command line options. Must not include the program name (e.g., sys.argv[1:]).
@kargv - Keyword dictionary of command line options.
Returns None.
self.arguments = []
self.loaded_modules = {}
self.default_dependency_modules = {}
self.status = Status(completed=0, total=0)
self._set_arguments(list(argv), kargv)
def _set_arguments(self, argv=[], kargv={}):
for (k,v) in iterator(kargv):
k = self._parse_api_opt(k)
if v not in [True, False, None]:
argv.append("%s %s" % (k, v))
if not argv and not self.arguments:
self.arguments = sys.argv[1:]
elif argv:
self.arguments = argv
def _parse_api_opt(self, opt):
# If the argument already starts with a hyphen, don't add hyphens in front of it
if opt.startswith('-'):
return opt
# Short options are only 1 character
elif len(opt) == 1:
return '-' + opt
return '--' + opt
def list(self, attribute="run"):
Finds all modules with the specified attribute.
@attribute - The desired module attribute.
Returns a list of modules that contain the specified attribute, in the order they should be executed.
import binwalk.modules
modules = {}
for (name, module) in inspect.getmembers(binwalk.modules):
if inspect.isclass(module) and hasattr(module, attribute):
modules[module] = module.PRIORITY
return sorted(modules, key=modules.get, reverse=True)
def help(self):
Generates formatted help output.
Returns the help string.
modules = {}
help_string = "\nBinwalk v%s\nCraig Heffner,\n" % binwalk.core.settings.Settings.VERSION
# Build a dictionary of modules and their ORDER attributes.
# This makes it easy to sort modules by their ORDER attribute for display.
for module in self.list(attribute="CLI"):
if module.CLI:
modules[module] = module.ORDER
for module in sorted(modules, key=modules.get, reverse=True):
help_string += "\n%s Options:\n" % module.TITLE
for module_option in module.CLI:
if module_option.long:
long_opt = '--' + module_option.long
if module_option.dtype:
optargs = "=<%s>" % module_option.dtype
optargs = ""
if module_option.short:
short_opt = "-" + module_option.short + ","
short_opt = " "
fmt = " %%s %%s%%-%ds%%s\n" % (32-len(long_opt))
help_string += fmt % (short_opt, long_opt, optargs, module_option.description)
return help_string + "\n"
def execute(self, *args, **kwargs):
Executes all appropriate modules according to the options specified in args/kwargs.
Returns a list of executed module objects.
run_modules = []
orig_arguments = self.arguments
if args or kwargs:
self._set_arguments(list(args), kwargs)
# Run all modules
for module in self.list():
obj =
# Add all loaded modules that marked themselves as enabled to the run_modules list
for (module, obj) in iterator(self.loaded_modules):
# Report the results if the module is enabled and if it is a primary module or if it reported any results/errors
if obj.enabled and (obj.PRIMARY or obj.results or obj.errors):
self.arguments = orig_arguments
return run_modules
def run(self, module, dependency=False, kwargs={}):
Runs a specific module.
obj = self.load(module, kwargs)
if isinstance(obj, binwalk.core.module.Module) and obj.enabled:
# If the module is not being loaded as a dependency, add it to the loaded modules dictionary
if not dependency:
self.loaded_modules[module] = obj
return obj
def load(self, module, kwargs={}):
argv = self.argv(module, argv=self.arguments)
argv.update(self.dependencies(module, argv['enabled']))
return module(**argv)
def dependencies(self, module, module_enabled):
import binwalk.modules
attributes = {}
for dependency in module.DEFAULT_DEPENDS+module.DEPENDS:
# The dependency module must be imported by
if hasattr(binwalk.modules,
dependency.module = getattr(binwalk.modules,
raise ModuleException("%s depends on %s which was not found in\n" % (str(module),
# No recursive dependencies, thanks
if dependency.module == module:
# Only load dependencies with custom kwargs from modules that are enabled, else madness ensues.
# Example: Heursitic module depends on entropy module, and sets entropy kwargs to contain 'enabled' : True.
# Without this check, an entropy scan would always be run, even if -H or -E weren't specified!
# Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't
# set any custom kwargs for those dependencies.
if module_enabled or not dependency.kwargs:
depobj =, dependency=True, kwargs=dependency.kwargs)
# If a dependency failed, consider this a non-recoverable error and raise an exception
if depobj.errors:
raise ModuleException("Failed to load " +
attributes[dependency.attribute] = depobj
return attributes
def argv(self, module, argv=sys.argv[1:]):
Processes argv for any options specific to the specified module.
@module - The module to process argv for.
@argv - A list of command line arguments (excluding argv[0]).
Returns a dictionary of kwargs for the specified module.
kwargs = {'enabled' : False}
last_priority = {}
longs = []
shorts = ""
parser = argparse.ArgumentParser(add_help=False)
# Must build arguments from all modules so that:
# 1) Any conflicting arguments will raise an exception
# 2) The only unknown arguments will be the target files, making them easy to identify
for m in self.list(attribute="CLI"):
for module_option in m.CLI:
if not module_option.long:
if module_option.type is None:
action = 'store_true'
action = None
if module_option.short:
parser.add_argument('-' + module_option.short, '--' + module_option.long, action=action, dest=module_option.long)
parser.add_argument('--' + module_option.long, action=action, dest=module_option.long)
args, unknown = parser.parse_known_args(argv)
args = args.__dict__
# Only add parsed options pertinent to the requested module
for module_option in module.CLI:
if module_option.type == binwalk.core.common.BlockFile:
for k in get_keys(module_option.kwargs):
kwargs[k] = []
for unk in unknown:
elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]:
for (name, value) in iterator(module_option.kwargs):
if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
if module_option.type is not None:
value = args[module_option.long]
last_priority[name] = module_option.priority
# Do this manually as argparse doesn't seem to be able to handle hexadecimal values
if module_option.type == int:
kwargs[name] = int(value, 0)
elif module_option.type == float:
kwargs[name] = float(value)
elif module_option.type == dict:
if not has_key(kwargs, name):
kwargs[name] = {}
kwargs[name][len(kwargs[name])] = value
elif module_option.type == list:
if not has_key(kwargs, name):
kwargs[name] = []
kwargs[name] = value
return kwargs
def kwargs(self, obj, kwargs):
Processes a module's kwargs. All modules should use this for kwarg processing.
@obj - An instance of the module (e.g., self)
@kwargs - The kwargs passed to the module
Returns None.
if hasattr(obj, "KWARGS"):
for module_argument in obj.KWARGS:
if has_key(kwargs,
arg_value = kwargs[]
arg_value = module_argument.default
setattr(obj,, arg_value)
for (k, v) in iterator(kwargs):
if not hasattr(obj, k):
setattr(obj, k, v)
raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj))
Main class used for running and managing modules.
def __init__(self, *argv, **kargv):
Class constructor.
@argv - List of command line options. Must not include the program name (e.g., sys.argv[1:]).
@kargv - Keyword dictionary of command line options.
Returns None.
self.arguments = []
self.loaded_modules = {}
self.default_dependency_modules = {}
self.status = Status(completed=0, total=0)
self._set_arguments(list(argv), kargv)
def _set_arguments(self, argv=[], kargv={}):
for (k,v) in iterator(kargv):
k = self._parse_api_opt(k)
if v not in [True, False, None]:
argv.append("%s %s" % (k, v))
if not argv and not self.arguments:
self.arguments = sys.argv[1:]
elif argv:
self.arguments = argv
def _parse_api_opt(self, opt):
# If the argument already starts with a hyphen, don't add hyphens in front of it
if opt.startswith('-'):
return opt
# Short options are only 1 character
elif len(opt) == 1:
return '-' + opt
return '--' + opt
def list(self, attribute="run"):
Finds all modules with the specified attribute.
@attribute - The desired module attribute.
Returns a list of modules that contain the specified attribute, in the order they should be executed.
import binwalk.modules
modules = {}
for (name, module) in inspect.getmembers(binwalk.modules):
if inspect.isclass(module) and hasattr(module, attribute):
modules[module] = module.PRIORITY
return sorted(modules, key=modules.get, reverse=True)
def help(self):
Generates formatted help output.
Returns the help string.
modules = {}
help_string = "\nBinwalk v%s\nCraig Heffner,\n" % binwalk.core.settings.Settings.VERSION
# Build a dictionary of modules and their ORDER attributes.
# This makes it easy to sort modules by their ORDER attribute for display.
for module in self.list(attribute="CLI"):
if module.CLI:
modules[module] = module.ORDER
for module in sorted(modules, key=modules.get, reverse=True):
help_string += "\n%s Options:\n" % module.TITLE
for module_option in module.CLI:
if module_option.long:
long_opt = '--' + module_option.long
if module_option.dtype:
optargs = "=<%s>" % module_option.dtype
optargs = ""
if module_option.short:
short_opt = "-" + module_option.short + ","
short_opt = " "
fmt = " %%s %%s%%-%ds%%s\n" % (32-len(long_opt))
help_string += fmt % (short_opt, long_opt, optargs, module_option.description)
return help_string + "\n"
def execute(self, *args, **kwargs):
Executes all appropriate modules according to the options specified in args/kwargs.
Returns a list of executed module objects.
run_modules = []
orig_arguments = self.arguments
if args or kwargs:
self._set_arguments(list(args), kwargs)
# Run all modules
for module in self.list():
obj =
# Add all loaded modules that marked themselves as enabled to the run_modules list
for (module, obj) in iterator(self.loaded_modules):
# Report the results if the module is enabled and if it is a primary module or if it reported any results/errors
if obj.enabled and (obj.PRIMARY or obj.results or obj.errors):
self.arguments = orig_arguments
return run_modules
def run(self, module, dependency=False, kwargs={}):
Runs a specific module.
obj = self.load(module, kwargs)
if isinstance(obj, binwalk.core.module.Module) and obj.enabled:
# If the module is not being loaded as a dependency, add it to the loaded modules dictionary
if not dependency:
self.loaded_modules[module] = obj
return obj
def load(self, module, kwargs={}):
argv = self.argv(module, argv=self.arguments)
argv.update(self.dependencies(module, argv['enabled']))
return module(**argv)
def dependencies(self, module, module_enabled):
import binwalk.modules
attributes = {}
for dependency in module.DEFAULT_DEPENDS+module.DEPENDS:
# The dependency module must be imported by
if hasattr(binwalk.modules,
dependency.module = getattr(binwalk.modules,
raise ModuleException("%s depends on %s which was not found in\n" % (str(module),
# No recursive dependencies, thanks
if dependency.module == module:
# Only load dependencies with custom kwargs from modules that are enabled, else madness ensues.
# Example: Heursitic module depends on entropy module, and sets entropy kwargs to contain 'enabled' : True.
# Without this check, an entropy scan would always be run, even if -H or -E weren't specified!
# Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't
# set any custom kwargs for those dependencies.
if module_enabled or not dependency.kwargs:
depobj =, dependency=True, kwargs=dependency.kwargs)
# If a dependency failed, consider this a non-recoverable error and raise an exception
if depobj.errors:
raise ModuleException("Failed to load " +
attributes[dependency.attribute] = depobj
return attributes
def argv(self, module, argv=sys.argv[1:]):
Processes argv for any options specific to the specified module.
@module - The module to process argv for.
@argv - A list of command line arguments (excluding argv[0]).
Returns a dictionary of kwargs for the specified module.
kwargs = {'enabled' : False}
last_priority = {}
longs = []
shorts = ""
parser = argparse.ArgumentParser(add_help=False)
# Must build arguments from all modules so that:
# 1) Any conflicting arguments will raise an exception
# 2) The only unknown arguments will be the target files, making them easy to identify
for m in self.list(attribute="CLI"):
for module_option in m.CLI:
if not module_option.long:
if module_option.type is None:
action = 'store_true'
action = None
if module_option.short:
parser.add_argument('-' + module_option.short, '--' + module_option.long, action=action, dest=module_option.long)
parser.add_argument('--' + module_option.long, action=action, dest=module_option.long)
args, unknown = parser.parse_known_args(argv)
args = args.__dict__
# Only add parsed options pertinent to the requested module
for module_option in module.CLI:
if module_option.type == binwalk.core.common.BlockFile:
for k in get_keys(module_option.kwargs):
kwargs[k] = []
for unk in unknown:
elif has_key(args, module_option.long) and args[module_option.long] not in [None, False]:
for (name, value) in iterator(module_option.kwargs):
if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
if module_option.type is not None:
value = args[module_option.long]
last_priority[name] = module_option.priority
# Do this manually as argparse doesn't seem to be able to handle hexadecimal values
if module_option.type == int:
kwargs[name] = int(value, 0)
elif module_option.type == float:
kwargs[name] = float(value)
elif module_option.type == dict:
if not has_key(kwargs, name):
kwargs[name] = {}
kwargs[name][len(kwargs[name])] = value
elif module_option.type == list:
if not has_key(kwargs, name):
kwargs[name] = []
kwargs[name] = value
return kwargs
def kwargs(self, obj, kwargs):
Processes a module's kwargs. All modules should use this for kwarg processing.
@obj - An instance of the module (e.g., self)
@kwargs - The kwargs passed to the module
Returns None.
if hasattr(obj, "KWARGS"):
for module_argument in obj.KWARGS:
if has_key(kwargs,
arg_value = kwargs[]
arg_value = module_argument.default
setattr(obj,, arg_value)
for (k, v) in iterator(kwargs):
if not hasattr(obj, k):
setattr(obj, k, v)
raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj))
def process_kwargs(obj, kwargs):
Convenience wrapper around binwalk.core.module.Modules.kwargs.
Convenience wrapper around binwalk.core.module.Modules.kwargs.
@obj - The class object (an instance of a sub-class of binwalk.core.module.Module).
@kwargs - The kwargs provided to the object's __init__ method.
@obj - The class object (an instance of a sub-class of binwalk.core.module.Module).
@kwargs - The kwargs provided to the object's __init__ method.
Returns None.
return Modules().kwargs(obj, kwargs)
Returns None.
return Modules().kwargs(obj, kwargs)
def show_help(fd=sys.stdout):
Convenience wrapper around
Convenience wrapper around
@fd - An object with a write method (e.g., sys.stdout, sys.stderr, etc).
@fd - An object with a write method (e.g., sys.stdout, sys.stderr, etc).
Returns None.
Returns None.
......@@ -7,350 +7,350 @@ from binwalk.core.filter import FilterType
class MagicSignature(object):
def __init__(self, **kwargs):
self.offset = 0
self.type = ''
self.condition = ''
self.description = ''
self.length = 0
for (k,v) in iterator(kwargs):
v = int(v, 0)
except KeyboardInterrupt as e:
raise e
except Exception:
setattr(self, k, v)
def __init__(self, **kwargs):
self.offset = 0
self.type = ''
self.condition = ''
self.description = ''
self.length = 0
for (k,v) in iterator(kwargs):
v = int(v, 0)
except KeyboardInterrupt as e:
raise e
except Exception:
setattr(self, k, v)
class MagicParser(object):
Class for loading, parsing and creating libmagic-compatible magic files.
This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object.
One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string:
import binwalk
bw = binwalk.Binwalk()
# Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting
# temporary file name to the list of magic files in the Binwalk class instance.
bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature'))
All magic files generated by this class will be deleted when the class deconstructor is called.
BIG_ENDIAN = 'big'
LITTLE_ENDIAN = 'little'
MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n"
DEFAULT_DISPLAY_NAME = "Raw string signature"
# If libmagic returns multiple results, they are delimited with this string.
def __init__(self, filter=None, smart=None):
Class constructor.
@filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used.
@smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used.
Returns None.
self.matches = set([])
self.signatures = {}
self.filter = filter = smart
self.raw_fd = None
self.signature_count = 0
def __del__(self):
except KeyboardInterrupt as e:
raise e
except Exception:
def rm_magic_files(self):
Cleans up the temporary magic file(s).
Returns None.
except KeyboardInterrupt as e:
raise e
except Exception:
except KeyboardInterrupt as e:
raise e
except Exception:
def cleanup(self):
Cleans up any tempfiles created by the class instance.
Returns None.
def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME):
Generates a magic file from a signature string.
This method is intended to be used once per instance.
If invoked multiple times, any previously created magic files will be closed and deleted.
@signature_string - The string signature to search for.
@offset - The offset at which the signature should occur.
@display_name - The text to display when the signature is found.
Returns the name of the generated temporary magic file.
self.raw_fd = tempfile.NamedTemporaryFile()
self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name))
def parse(self, file_name):
Parses magic file(s) and contatenates them into a single temporary magic file
while simultaneously removing filtered signatures.
@file_name - Magic file, or list of magic files, to parse.
Returns the name of the generated temporary magic file, which will be automatically
deleted when the class deconstructor is called.
self.matches = set([])
self.signatures = {}
self.signature_count = 0
self.fd = tempfile.NamedTemporaryFile()
if isinstance(file_name, type([])):
files = file_name
files = [file_name]
for fname in files:
if os.path.exists(fname):
sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname)
def parse_file(self, file_name):
Parses a magic file and appends valid signatures to the temporary magic file, as allowed
by the existing filter rules.
@file_name - Magic file to parse.
Returns None.
# Default to not including signature entries until we've
# found what looks like a valid entry.
include = False
line_count = 0
for line in open(file_name, 'r').readlines():
line_count += 1
# Check if this is the first line of a signature entry
entry = self._parse_line(line)
if entry is not None:
# If this signature is marked for inclusion, include it.
if self.filter.filter(entry.description) == FilterType.FILTER_INCLUDE:
include = True
self.signature_count += 1
if not has_key(self.signatures, entry.offset):
self.signatures[entry.offset] = []
if entry.condition not in self.signatures[entry.offset]:
include = False
# Keep writing lines of the signature to the temporary magic file until
# we detect a signature that should not be included.
if include:
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e)))
def _parse_line(self, line):
Parses a signature line into its four parts (offset, type, condition and description),
looking for the first line of a given signature.
@line - The signature line to parse.
Returns a dictionary with the respective line parts populated if the line is the first of a signature.
Returns a dictionary with all parts set to None if the line is not the first of a signature.
entry = MagicSignature()
# Quick and dirty pre-filter. We are only concerned with the first line of a
# signature, which will always start with a number. Make sure the first byte of
# the line is a number; if not, don't process.
if line[:1] < '0' or line[:1] > '9':
return None
# Split the line into white-space separated parts.
# For this to work properly, replace escaped spaces ('\ ') with '\x20'.
# This means the same thing, but doesn't confuse split().
line_parts = line.replace('\\ ', '\\x20').split()
entry.offset = line_parts[0]
entry.type = line_parts[1]
# The condition line may contain escaped sequences, so be sure to decode it properly.
entry.condition = string_decode(line_parts[2])
entry.description = ' '.join(line_parts[3:])
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("%s :: %s", (str(e), line))
# We've already verified that the first character in this line is a number, so this *shouldn't*
# throw an exception, but let's catch it just in case...
entry.offset = int(entry.offset, 0)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("%s :: %s", (str(e), line))
# If this is a string, get the length of the string
if 'string' in entry.type or entry.condition == self.WILDCARD:
entry.length = len(entry.condition)
# Else, we need to jump through a few more hoops...
# Default to little endian, unless the type field starts with 'be'.
# This assumes that we're running on a little endian system...
if entry.type.startswith('be'):
endianess = self.BIG_ENDIAN
endianess = self.LITTLE_ENDIAN
# Try to convert the condition to an integer. This does not allow
# for more advanced conditions for the first line of a signature,
# but needing that is rare.
intval = int(entry.condition.strip('L'), 0)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e)))
# How long is the field type?
if entry.type == 'byte':
entry.length = 1
elif 'short' in entry.type:
entry.length = 2
elif 'long' in entry.type:
entry.length = 4
elif 'quad' in entry.type:
entry.length = 8
# Convert the integer value to a string of the appropriate endianess
entry.condition = self._to_string(intval, entry.length, endianess)
return entry
def build_signature_set(self):
Builds a set of signature tuples.
Returns a set of tuples in the format: [(<signature offset>, [signature regex])].
self.signature_set = set()
for (offset, sigs) in iterator(self.signatures):
for sig in sigs:
if sig == self.WILDCARD:
sig = re.compile('.')
sig = re.compile(re.escape(sig))
self.signature_set.add((offset, sig))
return self.signature_set
def find_signature_candidates(self, data, end):
Finds candidate signatures inside of the data buffer.
Called internally by Binwalk.single_scan.
@data - Data to scan for candidate signatures.
@end - Don't look for signatures beyond this offset.
Returns an ordered list of offsets inside of data at which candidate offsets were found.
candidate_offsets = []
for (offset, regex) in self.signature_set:
candidate_offsets += [(match.start() - offset) for match in regex.finditer(data) if (match.start() - offset) < end and (match.start() - offset) >= 0]
candidate_offsets = list(set(candidate_offsets))
return candidate_offsets
def _to_string(self, value, size, endianess):
Converts an integer value into a raw string.
@value - The integer value to convert.
@size - Size, in bytes, of the integer value.
@endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN.
Returns a raw string containing value.
data = ""
for i in range(0, size):
data += chr((value >> (8*i)) & 0xFF)
if endianess != self.LITTLE_ENDIAN:
data = data[::-1]
return data
def split(self, data):
Splits multiple libmagic results in the data string into a list of separate results.
@data - Data string returned from libmagic.
Returns a list of result strings.
return data.split(self.RESULT_SEPERATOR)
except KeyboardInterrupt as e:
raise e
except Exception:
return []
Class for loading, parsing and creating libmagic-compatible magic files.
This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object.
One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string:
import binwalk
bw = binwalk.Binwalk()
# Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting
# temporary file name to the list of magic files in the Binwalk class instance.
bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature'))
All magic files generated by this class will be deleted when the class deconstructor is called.
BIG_ENDIAN = 'big'
LITTLE_ENDIAN = 'little'
MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n"
DEFAULT_DISPLAY_NAME = "Raw string signature"
# If libmagic returns multiple results, they are delimited with this string.
def __init__(self, filter=None, smart=None):
Class constructor.
@filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used.
@smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used.
Returns None.
self.matches = set([])
self.signatures = {}
self.filter = filter = smart
self.raw_fd = None
self.signature_count = 0
def __del__(self):
except KeyboardInterrupt as e:
raise e
except Exception:
def rm_magic_files(self):
Cleans up the temporary magic file(s).
Returns None.
except KeyboardInterrupt as e:
raise e
except Exception:
except KeyboardInterrupt as e:
raise e
except Exception:
def cleanup(self):
Cleans up any tempfiles created by the class instance.
Returns None.
def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME):
Generates a magic file from a signature string.
This method is intended to be used once per instance.
If invoked multiple times, any previously created magic files will be closed and deleted.
@signature_string - The string signature to search for.
@offset - The offset at which the signature should occur.
@display_name - The text to display when the signature is found.
Returns the name of the generated temporary magic file.
self.raw_fd = tempfile.NamedTemporaryFile()
self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name))
def parse(self, file_name):
Parses magic file(s) and contatenates them into a single temporary magic file
while simultaneously removing filtered signatures.
@file_name - Magic file, or list of magic files, to parse.
Returns the name of the generated temporary magic file, which will be automatically
deleted when the class deconstructor is called.
self.matches = set([])
self.signatures = {}
self.signature_count = 0
self.fd = tempfile.NamedTemporaryFile()
if isinstance(file_name, type([])):
files = file_name
files = [file_name]
for fname in files:
if os.path.exists(fname):
sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname)
def parse_file(self, file_name):
Parses a magic file and appends valid signatures to the temporary magic file, as allowed
by the existing filter rules.
@file_name - Magic file to parse.
Returns None.
# Default to not including signature entries until we've
# found what looks like a valid entry.
include = False
line_count = 0
for line in open(file_name, 'r').readlines():
line_count += 1
# Check if this is the first line of a signature entry
entry = self._parse_line(line)
if entry is not None:
# If this signature is marked for inclusion, include it.
if self.filter.filter(entry.description) == FilterType.FILTER_INCLUDE:
include = True
self.signature_count += 1
if not has_key(self.signatures, entry.offset):
self.signatures[entry.offset] = []
if entry.condition not in self.signatures[entry.offset]:
include = False
# Keep writing lines of the signature to the temporary magic file until
# we detect a signature that should not be included.
if include:
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e)))
def _parse_line(self, line):
Parses a signature line into its four parts (offset, type, condition and description),
looking for the first line of a given signature.
@line - The signature line to parse.
Returns a dictionary with the respective line parts populated if the line is the first of a signature.
Returns a dictionary with all parts set to None if the line is not the first of a signature.
entry = MagicSignature()
# Quick and dirty pre-filter. We are only concerned with the first line of a
# signature, which will always start with a number. Make sure the first byte of
# the line is a number; if not, don't process.
if line[:1] < '0' or line[:1] > '9':
return None
# Split the line into white-space separated parts.
# For this to work properly, replace escaped spaces ('\ ') with '\x20'.
# This means the same thing, but doesn't confuse split().
line_parts = line.replace('\\ ', '\\x20').split()
entry.offset = line_parts[0]
entry.type = line_parts[1]
# The condition line may contain escaped sequences, so be sure to decode it properly.
entry.condition = string_decode(line_parts[2])
entry.description = ' '.join(line_parts[3:])
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("%s :: %s", (str(e), line))
# We've already verified that the first character in this line is a number, so this *shouldn't*
# throw an exception, but let's catch it just in case...
entry.offset = int(entry.offset, 0)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("%s :: %s", (str(e), line))
# If this is a string, get the length of the string
if 'string' in entry.type or entry.condition == self.WILDCARD:
entry.length = len(entry.condition)
# Else, we need to jump through a few more hoops...
# Default to little endian, unless the type field starts with 'be'.
# This assumes that we're running on a little endian system...
if entry.type.startswith('be'):
endianess = self.BIG_ENDIAN
endianess = self.LITTLE_ENDIAN
# Try to convert the condition to an integer. This does not allow
# for more advanced conditions for the first line of a signature,
# but needing that is rare.
intval = int(entry.condition.strip('L'), 0)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e)))
# How long is the field type?
if entry.type == 'byte':
entry.length = 1
elif 'short' in entry.type:
entry.length = 2
elif 'long' in entry.type:
entry.length = 4
elif 'quad' in entry.type:
entry.length = 8
# Convert the integer value to a string of the appropriate endianess
entry.condition = self._to_string(intval, entry.length, endianess)
return entry
def build_signature_set(self):
Builds a set of signature tuples.
Returns a set of tuples in the format: [(<signature offset>, [signature regex])].
self.signature_set = set()
for (offset, sigs) in iterator(self.signatures):
for sig in sigs:
if sig == self.WILDCARD:
sig = re.compile('.')
sig = re.compile(re.escape(sig))
self.signature_set.add((offset, sig))
return self.signature_set
def find_signature_candidates(self, data, end):
Finds candidate signatures inside of the data buffer.
Called internally by Binwalk.single_scan.
@data - Data to scan for candidate signatures.
@end - Don't look for signatures beyond this offset.
Returns an ordered list of offsets inside of data at which candidate offsets were found.
candidate_offsets = []
for (offset, regex) in self.signature_set:
candidate_offsets += [(match.start() - offset) for match in regex.finditer(data) if (match.start() - offset) < end and (match.start() - offset) >= 0]
candidate_offsets = list(set(candidate_offsets))
return candidate_offsets
def _to_string(self, value, size, endianess):
Converts an integer value into a raw string.
@value - The integer value to convert.
@size - Size, in bytes, of the integer value.
@endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN.
Returns a raw string containing value.
data = ""
for i in range(0, size):
data += chr((value >> (8*i)) & 0xFF)
if endianess != self.LITTLE_ENDIAN:
data = data[::-1]
return data
def split(self, data):
Splits multiple libmagic results in the data string into a list of separate results.
@data - Data string returned from libmagic.
Returns a list of result strings.
return data.split(self.RESULT_SEPERATOR)
except KeyboardInterrupt as e:
raise e
except Exception:
return []
......@@ -5,180 +5,180 @@ import binwalk.core.settings
from binwalk.core.compat import *
class Plugins:
Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan.
An instance of this class is available during a scan via the Binwalk.plugins object.
Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'.
The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class.
The Plugin class constructor is called once prior to scanning a file or set of files.
The Plugin class destructor (__del__) is called once after scanning all files.
The Plugin class can define one or all of the following callback methods:
o pre_scan(self, fd)
This method is called prior to running a scan against a file. It is passed the file object of
the file about to be scanned.
o pre_parser(self, result)
This method is called every time any result - valid or invalid - is found in the file being scanned.
It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic.
The contents of this dictionary key may be modified as necessary by the plugin.
o callback(self, results)
This method is called every time a valid result is found in the file being scanned. It is passed a
dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback
function, and its contents may be modified as necessary by the plugin.
o post_scan(self, fd)
This method is called after running a scan against a file, but before the file has been closed.
It is passed the file object of the scanned file.
Values returned by pre_scan affect all results during the scan of that particular file.
Values returned by callback affect only that specific scan result.
Values returned by post_scan are ignored since the scan of that file has already been completed.
By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by
default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the
plugin will only be loaded if it is explicitly named in the plugins whitelist.
SCAN = 'scan'
PRESCAN = 'pre_scan'
POSTSCAN = 'post_scan'
PLUGIN = 'Plugin'
def __init__(self, parent=None):
self.scan = []
self.pre_scan = []
self.post_scan = []
self.parent = parent
self.settings = binwalk.core.settings.Settings()
def __del__(self):
def __enter__(self):
return self
def __exit__(self, t, v, traceback):
def _call_plugins(self, callback_list, arg):
for callback in callback_list:
except KeyboardInterrupt as e:
raise e
except Exception as e:
sys.stderr.write("WARNING: %s.%s failed: %s\n" % (callback.__module__, callback.__name__, e))
def list_plugins(self):
Obtain a list of all user and system plugin modules.
Returns a dictionary of:
'user' : {
'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory"
'system' : {
'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory"
plugins = {
'user' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
'system' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
for key in plugins.keys():
plugins[key]['path'] = self.settings.paths[key][self.settings.PLUGINS]
for file_name in os.listdir(plugins[key]['path']):
if file_name.endswith(self.MODULE_EXTENSION):
module = file_name[:-len(self.MODULE_EXTENSION)]
plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name))
plugin_class = getattr(plugin, self.PLUGIN)
plugins[key]['enabled'][module] = True
plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0]
except KeyboardInterrupt as e:
raise e
except Exception as e:
plugins[key]['descriptions'][module] = 'No description'
return plugins
def load_plugins(self):
plugins = self.list_plugins()
def _load_plugin_modules(self, plugins):
for module in plugins['modules']:
file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION)
plugin = imp.load_source(module, file_path)
plugin_class = getattr(plugin, self.PLUGIN)
class_instance = plugin_class(self.parent)
self.scan.append(getattr(class_instance, self.SCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.pre_scan.append(getattr(class_instance, self.PRESCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.post_scan.append(getattr(class_instance, self.POSTSCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
except KeyboardInterrupt as e:
raise e
except Exception as e:
sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e)))
def pre_scan_callbacks(self, obj):
return self._call_plugins(self.pre_scan, obj)
def post_scan_callbacks(self, obj):
return self._call_plugins(self.post_scan, obj)
def scan_callbacks(self, obj):
return self._call_plugins(self.scan, obj)
Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan.
An instance of this class is available during a scan via the Binwalk.plugins object.
Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'.
The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class.
The Plugin class constructor is called once prior to scanning a file or set of files.
The Plugin class destructor (__del__) is called once after scanning all files.
The Plugin class can define one or all of the following callback methods:
o pre_scan(self, fd)
This method is called prior to running a scan against a file. It is passed the file object of
the file about to be scanned.
o pre_parser(self, result)
This method is called every time any result - valid or invalid - is found in the file being scanned.
It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic.
The contents of this dictionary key may be modified as necessary by the plugin.
o callback(self, results)
This method is called every time a valid result is found in the file being scanned. It is passed a
dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback
function, and its contents may be modified as necessary by the plugin.
o post_scan(self, fd)
This method is called after running a scan against a file, but before the file has been closed.
It is passed the file object of the scanned file.
Values returned by pre_scan affect all results during the scan of that particular file.
Values returned by callback affect only that specific scan result.
Values returned by post_scan are ignored since the scan of that file has already been completed.
By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by
default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the
plugin will only be loaded if it is explicitly named in the plugins whitelist.
SCAN = 'scan'
PRESCAN = 'pre_scan'
POSTSCAN = 'post_scan'
PLUGIN = 'Plugin'
def __init__(self, parent=None):
self.scan = []
self.pre_scan = []
self.post_scan = []
self.parent = parent
self.settings = binwalk.core.settings.Settings()
def __del__(self):
def __enter__(self):
return self
def __exit__(self, t, v, traceback):
def _call_plugins(self, callback_list, arg):
for callback in callback_list:
except KeyboardInterrupt as e:
raise e
except Exception as e:
sys.stderr.write("WARNING: %s.%s failed: %s\n" % (callback.__module__, callback.__name__, e))
def list_plugins(self):
Obtain a list of all user and system plugin modules.
Returns a dictionary of:
'user' : {
'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory"
'system' : {
'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory"
plugins = {
'user' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
'system' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
for key in plugins.keys():
plugins[key]['path'] = self.settings.paths[key][self.settings.PLUGINS]
for file_name in os.listdir(plugins[key]['path']):
if file_name.endswith(self.MODULE_EXTENSION):
module = file_name[:-len(self.MODULE_EXTENSION)]
plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name))
plugin_class = getattr(plugin, self.PLUGIN)
plugins[key]['enabled'][module] = True
plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0]
except KeyboardInterrupt as e:
raise e
except Exception as e:
plugins[key]['descriptions'][module] = 'No description'
return plugins
def load_plugins(self):
plugins = self.list_plugins()
def _load_plugin_modules(self, plugins):
for module in plugins['modules']:
file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION)
plugin = imp.load_source(module, file_path)
plugin_class = getattr(plugin, self.PLUGIN)
class_instance = plugin_class(self.parent)
self.scan.append(getattr(class_instance, self.SCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.pre_scan.append(getattr(class_instance, self.PRESCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.post_scan.append(getattr(class_instance, self.POSTSCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
except KeyboardInterrupt as e:
raise e
except Exception as e:
sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e)))
def pre_scan_callbacks(self, obj):
return self._call_plugins(self.pre_scan, obj)
def post_scan_callbacks(self, obj):
return self._call_plugins(self.post_scan, obj)
def scan_callbacks(self, obj):
return self._call_plugins(self.scan, obj)
......@@ -4,310 +4,310 @@ from binwalk.core.compat import *
from binwalk.core.common import get_quoted_strings, MathExpression
class Tag(object):
def __init__(self, **kwargs): = None
self.keyword = None
self.type = None
self.handler = None
self.tag = None
self.default = None
for (k,v) in iterator(kwargs):
setattr(self, k, v)
if self.type == int:
self.default = 0
elif self.type == str:
self.default = ''
if self.keyword is not None:
self.tag = self.TAG_DELIM_START + self.keyword
if self.type is None:
self.tag += self.TAG_DELIM_END
self.tag += self.TAG_ARG_SEPERATOR
if self.handler is None:
if self.type == int:
self.handler = 'get_math_arg'
elif self.type == str:
self.handler = 'get_keyword_arg'
def __init__(self, **kwargs): = None
self.keyword = None
self.type = None
self.handler = None
self.tag = None
self.default = None
for (k,v) in iterator(kwargs):
setattr(self, k, v)
if self.type == int:
self.default = 0
elif self.type == str:
self.default = ''
if self.keyword is not None:
self.tag = self.TAG_DELIM_START + self.keyword
if self.type is None:
self.tag += self.TAG_DELIM_END
self.tag += self.TAG_ARG_SEPERATOR
if self.handler is None:
if self.type == int:
self.handler = 'get_math_arg'
elif self.type == str:
self.handler = 'get_keyword_arg'
class Signature(object):
Class for parsing smart signature tags in libmagic result strings.
This class is intended for internal use only, but a list of supported 'smart keywords' that may be used
in magic files is available via the SmartSignature.KEYWORDS dictionary:
from binwalk import SmartSignature
for tag in SmartSignature.TAGS:
print tag.keyword
TAGS = [
Tag(name='raw-string', keyword='raw-string', type=str, handler='parse_raw_string'),
Tag(name='string-len', keyword='string-len', type=str, handler='parse_string_len'),
Tag(name='math', keyword='math', type=int, handler='parse_math'),
Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'),
Tag(name='jump', keyword='jump-to-offset', type=int),
Tag(name='name', keyword='file-name', type=str),
Tag(name='size', keyword='file-size', type=int),
Tag(name='adjust', keyword='offset-adjust', type=int),
Tag(name='delay', keyword='extract-delay', type=str),
Tag(name='year', keyword='file-year', type=str),
Tag(name='epoch', keyword='file-epoch', type=int),
Tag(name='raw-size', keyword='raw-string-length', type=int),
Tag(name='raw-replace', keyword='raw-replace'),
Tag(name='string-len-replace', keyword='string-len'),
def __init__(self, filter, ignore_smart_signatures=False):
Class constructor.
@filter - Instance of the MagicFilter class.
@ignore_smart_signatures - Set to True to ignore smart signature keywords.
Returns None.
self.filter = filter
self.valid = True
self.last_one_of_many = None
self.ignore_smart_signatures = ignore_smart_signatures
def parse(self, data):
Parse a given data string for smart signature keywords. If any are found, interpret them and strip them.
@data - String to parse, as returned by libmagic.
Returns a dictionary of parsed values.
results = {}
self.valid = True
if data:
for tag in self.TAGS:
if tag.handler is not None:
(d, arg) = getattr(self, tag.handler)(data, tag)
if not self.ignore_smart_signatures:
data = d
if isinstance(arg, type(False)) and arg == False and not self.ignore_smart_signatures:
self.valid = False
elif tag.type is not None:
if self.ignore_smart_signatures:
results[] = tag.default
results[] = arg
if self.ignore_smart_signatures:
results['description'] = data
results['description'] = self.strip_tags(data)
self.valid = False
results['valid'] = self.valid
return binwalk.core.module.Result(**results)
def tag_lookup(self, keyword):
for tag in self.TAGS:
if tag.keyword == keyword:
return tag
return None
def is_valid(self, data):
Validates that result data does not contain smart keywords in file-supplied strings.
@data - Data string to validate.
Returns True if data is OK.
Returns False if data is not OK.
# All strings printed from the target file should be placed in strings, else there is
# no way to distinguish between intended keywords and unintended keywords. Get all the
# quoted strings.
quoted_data = get_quoted_strings(data)
# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
if quoted_data and Tag.TAG_DELIM_START in quoted_data:
# If so, check to see if the quoted data contains any of our keywords.
# If any keywords are found inside of quoted data, consider the keywords invalid.
for tag in self.TAGS:
if tag.tag in quoted_data:
return False
return True
def safe_string(self, data):
Strips out quoted data (i.e., data taken directly from a file).
quoted_string = get_quoted_strings(data)
if quoted_string:
data = data.replace(quoted_string, "")
return data
def one_of_many(self, data, tag):
Determines if a given data string is one result of many.
@data - String result data.
Returns False if the string result is one of many and should not be displayed.
Returns True if the string result is not one of many and should be displayed.
if self.filter.valid_result(data):
if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
return (data, False)
if tag.tag in data:
# Only match on the data before the first comma, as that is typically unique and static
self.last_one_of_many = data.split(',')[0]
self.last_one_of_many = None
return (data, True)
def get_keyword_arg(self, data, tag):
Retrieves the argument for keywords that specify arguments.
@data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS.
Returns the argument string value on success.
Returns a blank string on failure.
arg = ''
safe_data = self.safe_string(data)
if tag.tag in safe_data:
arg = safe_data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0]
return (data, arg)
def get_math_arg(self, data, tag):
Retrieves the argument for keywords that specifiy mathematical expressions as arguments.
@data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS.
Returns the resulting calculated value.
value = 0
(data, arg) = self.get_keyword_arg(data, tag)
if arg:
value = MathExpression(arg).value
if value is None:
value = 0
self.valid = False
return (data, value)
def parse_math(self, data, tag):
Replace math keywords with the requested values.
@data - String result data.
Returns the modified string result data.
while tag.keyword in data:
(data, arg) = self.get_keyword_arg(data,
v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END)
math_value = "%d" % self.get_math_arg(data,
data = data.replace(v, math_value)
return (data, None)
def parse_raw_string(self, data, raw_str_tag):
Process strings that aren't NULL byte terminated, but for which we know the string length.
This should be called prior to any other smart parsing functions.
@data - String to parse.
Returns a parsed string.
if self.is_valid(data):
raw_str_length_tag = self.tag_lookup('raw-string-length')
raw_replace_tag = self.tag_lookup('raw-replace')
# Get the raw string keyword arg
(data, raw_string) = self.get_keyword_arg(data, raw_str_tag)
# Was a raw string keyword specified?
if raw_string:
# Get the raw string length arg
(data, raw_size) = self.get_math_arg(data, raw_str_length_tag)
# Replace all instances of raw-replace in data with raw_string[:raw_size]
# Also strip out everything after the raw-string keyword, including the keyword itself.
# Failure to do so may (will) result in non-printable characters and this string will be
# marked as invalid when it shouldn't be.
data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"')
return (data, True)
def parse_string_len(self, data, str_len_tag):
Process {string-len} macros.
@data - String to parse.
Returns parsed data string.
if not self.ignore_smart_signatures and self.is_valid(data):
str_len_replace_tag = self.tag_lookup('string-len-replace')
# Get the raw string keyword arg
(data, raw_string) = self.get_keyword_arg(data, str_len_tag)
# Was a string-len keyword specified?
if raw_string:
# Get the string length
string_length = '%d' % len(raw_string)
except KeyboardInterrupt as e:
raise e
except Exception:
string_length = '0'
# Strip out *everything* after the string-len keyword, including the keyword itself.
# Failure to do so can potentially allow keyword injection from a maliciously created file.
data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length)
return (data, True)
def strip_tags(self, data):
Strips the smart tags from a result string.
@data - String result data.
Returns a sanitized string.
if not self.ignore_smart_signatures:
for tag in self.TAGS:
start = data.find(tag.tag)
if start != -1:
end = data[start:].find(tag.TAG_DELIM_END)
if end != -1:
data = data.replace(data[start:start+end+1], "")
return data
Class for parsing smart signature tags in libmagic result strings.
This class is intended for internal use only, but a list of supported 'smart keywords' that may be used
in magic files is available via the SmartSignature.KEYWORDS dictionary:
from binwalk import SmartSignature
for tag in SmartSignature.TAGS:
print tag.keyword
TAGS = [
Tag(name='raw-string', keyword='raw-string', type=str, handler='parse_raw_string'),
Tag(name='string-len', keyword='string-len', type=str, handler='parse_string_len'),
Tag(name='math', keyword='math', type=int, handler='parse_math'),
Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'),
Tag(name='jump', keyword='jump-to-offset', type=int),
Tag(name='name', keyword='file-name', type=str),
Tag(name='size', keyword='file-size', type=int),
Tag(name='adjust', keyword='offset-adjust', type=int),
Tag(name='delay', keyword='extract-delay', type=str),
Tag(name='year', keyword='file-year', type=str),
Tag(name='epoch', keyword='file-epoch', type=int),
Tag(name='raw-size', keyword='raw-string-length', type=int),
Tag(name='raw-replace', keyword='raw-replace'),
Tag(name='string-len-replace', keyword='string-len'),
def __init__(self, filter, ignore_smart_signatures=False):
Class constructor.
@filter - Instance of the MagicFilter class.
@ignore_smart_signatures - Set to True to ignore smart signature keywords.
Returns None.
self.filter = filter
self.valid = True
self.last_one_of_many = None
self.ignore_smart_signatures = ignore_smart_signatures
def parse(self, data):
Parse a given data string for smart signature keywords. If any are found, interpret them and strip them.
@data - String to parse, as returned by libmagic.
Returns a dictionary of parsed values.
results = {}
self.valid = True
if data:
for tag in self.TAGS:
if tag.handler is not None:
(d, arg) = getattr(self, tag.handler)(data, tag)
if not self.ignore_smart_signatures:
data = d
if isinstance(arg, type(False)) and arg == False and not self.ignore_smart_signatures:
self.valid = False
elif tag.type is not None:
if self.ignore_smart_signatures:
results[] = tag.default
results[] = arg
if self.ignore_smart_signatures:
results['description'] = data
results['description'] = self.strip_tags(data)
self.valid = False
results['valid'] = self.valid
return binwalk.core.module.Result(**results)
def tag_lookup(self, keyword):
for tag in self.TAGS:
if tag.keyword == keyword:
return tag
return None
def is_valid(self, data):
Validates that result data does not contain smart keywords in file-supplied strings.
@data - Data string to validate.
Returns True if data is OK.
Returns False if data is not OK.
# All strings printed from the target file should be placed in strings, else there is
# no way to distinguish between intended keywords and unintended keywords. Get all the
# quoted strings.
quoted_data = get_quoted_strings(data)
# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
if quoted_data and Tag.TAG_DELIM_START in quoted_data:
# If so, check to see if the quoted data contains any of our keywords.
# If any keywords are found inside of quoted data, consider the keywords invalid.
for tag in self.TAGS:
if tag.tag in quoted_data:
return False
return True
def safe_string(self, data):
Strips out quoted data (i.e., data taken directly from a file).
quoted_string = get_quoted_strings(data)
if quoted_string:
data = data.replace(quoted_string, "")
return data
def one_of_many(self, data, tag):
Determines if a given data string is one result of many.
@data - String result data.
Returns False if the string result is one of many and should not be displayed.
Returns True if the string result is not one of many and should be displayed.
if self.filter.valid_result(data):
if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
return (data, False)
if tag.tag in data:
# Only match on the data before the first comma, as that is typically unique and static
self.last_one_of_many = data.split(',')[0]
self.last_one_of_many = None
return (data, True)
def get_keyword_arg(self, data, tag):
Retrieves the argument for keywords that specify arguments.
@data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS.
Returns the argument string value on success.
Returns a blank string on failure.
arg = ''
safe_data = self.safe_string(data)
if tag.tag in safe_data:
arg = safe_data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0]
return (data, arg)
def get_math_arg(self, data, tag):
Retrieves the argument for keywords that specifiy mathematical expressions as arguments.
@data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS.
Returns the resulting calculated value.
value = 0
(data, arg) = self.get_keyword_arg(data, tag)
if arg:
value = MathExpression(arg).value
if value is None:
value = 0
self.valid = False
return (data, value)
def parse_math(self, data, tag):
Replace math keywords with the requested values.
@data - String result data.
Returns the modified string result data.
while tag.keyword in data:
(data, arg) = self.get_keyword_arg(data,
v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END)
math_value = "%d" % self.get_math_arg(data,
data = data.replace(v, math_value)
return (data, None)
def parse_raw_string(self, data, raw_str_tag):
Process strings that aren't NULL byte terminated, but for which we know the string length.
This should be called prior to any other smart parsing functions.
@data - String to parse.
Returns a parsed string.
if self.is_valid(data):
raw_str_length_tag = self.tag_lookup('raw-string-length')
raw_replace_tag = self.tag_lookup('raw-replace')
# Get the raw string keyword arg
(data, raw_string) = self.get_keyword_arg(data, raw_str_tag)
# Was a raw string keyword specified?
if raw_string:
# Get the raw string length arg
(data, raw_size) = self.get_math_arg(data, raw_str_length_tag)
# Replace all instances of raw-replace in data with raw_string[:raw_size]
# Also strip out everything after the raw-string keyword, including the keyword itself.
# Failure to do so may (will) result in non-printable characters and this string will be
# marked as invalid when it shouldn't be.
data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"')
return (data, True)
def parse_string_len(self, data, str_len_tag):
Process {string-len} macros.
@data - String to parse.
Returns parsed data string.
if not self.ignore_smart_signatures and self.is_valid(data):
str_len_replace_tag = self.tag_lookup('string-len-replace')
# Get the raw string keyword arg
(data, raw_string) = self.get_keyword_arg(data, str_len_tag)
# Was a string-len keyword specified?
if raw_string:
# Get the string length
string_length = '%d' % len(raw_string)
except KeyboardInterrupt as e:
raise e
except Exception:
string_length = '0'
# Strip out *everything* after the string-len keyword, including the keyword itself.
# Failure to do so can potentially allow keyword injection from a maliciously created file.
data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length)
return (data, True)
def strip_tags(self, data):
Strips the smart tags from a result string.
@data - String result data.
Returns a sanitized string.
if not self.ignore_smart_signatures:
for tag in self.TAGS:
start = data.find(tag.tag)
if start != -1:
end = data[start:].find(tag.TAG_DELIM_END)
if end != -1:
data = data.replace(data[start:start+end+1], "")
return data
0 belong x Hex: 0x%.8X
#0 string x String: %s
#0 lequad x Little Endian Quad: %lld
#0 bequad x Big Endian Quad: %lld
0 lelong x Little Endian Long: %d
0 belong x Big Endian Long: %d
0 leshort x Little Endian Short: %d
0 beshort x Big Endian Short: %d
0 ledate x Little Endian Date: %s
0 bedate x Big Endian Date: %s
......@@ -4,300 +4,300 @@ from binwalk.core.common import BlockFile
from binwalk.core.module import Module, Option, Kwarg
class Plotter(Module):
Base class for visualizing binaries in Qt.
Other plotter classes are derived from this.
TITLE = "Binary Visualization"
CLI = [
kwargs={'axis' : 3, 'enabled' : True},
description='Generate a 3D binary visualization'),
kwargs={'axis' : 2, 'enabled' : True},
description='Project data points onto 3D cube walls only'),
kwargs={'max_points' : 0},
description='Set the maximum number of plotted data points'),
kwargs={'show_grids' : True},
description='Display the x-y-z grids in the resulting plot'),
Kwarg(name='axis', default=3),
Kwarg(name='max_points', default=0),
Kwarg(name='show_grids', default=False),
Kwarg(name='enabled', default=False),
# There isn't really any useful data to print to console. Disable header and result output.
def init(self):
import pyqtgraph.opengl as gl
from pyqtgraph.Qt import QtGui
self.verbose = self.config.verbose
self.offset = self.config.offset
self.length = self.config.length
self.plane_count = -1
self.plot_points = None
if self.axis == 2:
self._generate_data_point = self._generate_2d_data_point
elif self.axis == 3:
self._generate_data_point = self._generate_3d_data_point
raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis)
if not self.max_points:
self.max_points = self.MAX_PLOT_POINTS = QtGui.QApplication([])
self.window = gl.GLViewWidget()
self.window.opts['distance'] = self.VIEW_DISTANCE
if len(self.config.target_files) == 1:
def _print(self, message):
Print console messages. For internal use only.
if self.verbose:
def _generate_plot_points(self, data_points):
Generates plot points from a list of data points.
@data_points - A dictionary containing each unique point and its frequency of occurance.
Returns a set of plot points.
total = 0
min_weight = 0
weightings = {}
plot_points = {}
# If the number of data points exceeds the maximum number of allowed data points, use a
# weighting system to eliminate data points that occur less freqently.
if sum(data_points.itervalues()) > self.max_points:
# First, generate a set of weight values 1 - 10
for i in range(1, 11):
weightings[i] = 0
# Go through every data point and how many times that point occurs
for (point, count) in iterator(data_points):
# For each data point, compare it to each remaining weight value
for w in get_keys(weightings):
# If the number of times this data point occurred is >= the weight value,
# then increment the weight value. Since weight values are ordered lowest
# to highest, this means that more frequent data points also increment lower
# weight values. Thus, the more high-frequency data points there are, the
# more lower-frequency data points are eliminated.
if count >= w:
weightings[w] += 1
# Throw out weight values that exceed the maximum number of data points
if weightings[w] > self.max_points:
del weightings[w]
# If there's only one weight value left, no sense in continuing the loop...
if len(weightings) == 1:
# The least weighted value is our minimum weight
min_weight = min(weightings)
# Get rid of all data points that occur less frequently than our minimum weight
for point in get_keys(data_points):
if data_points[point] < min_weight:
del data_points[point]
for point in sorted(data_points, key=data_points.get, reverse=True):
plot_points[point] = data_points[point]
total += 1
if total >= self.max_points:
return plot_points
def _generate_data_point(self, data):
Subclasses must override this to return the appropriate data point.
@data - A string of data self.axis in length.
Returns a data point tuple.
return (0,0,0)
def _generate_data_points(self, fp):
Generates a dictionary of data points and their frequency of occurrance.
@fp - The BlockFile object to generate data points from.
Returns a dictionary.
i = 0
data_points = {}
self._print("Generating data points for %s" %
# We don't need any extra data from BlockFile
while True:
(data, dlen) = fp.read_block()
if not data or not dlen:
i = 0
while (i+(self.axis-1)) < dlen:
point = self._generate_data_point(data[i:i+self.axis])
if has_key(data_points, point):
data_points[point] += 1
data_points[point] = 1
i += 3
return data_points
def _generate_plot(self, plot_points):
import numpy as np
import pyqtgraph.opengl as gl
nitems = float(len(plot_points))
pos = np.empty((nitems, 3))
size = np.empty((nitems))
color = np.empty((nitems, 4))
i = 0
for (point, weight) in iterator(plot_points):
r = 0.0
g = 0.0
b = 0.0
pos[i] = point
frequency_percentage = (weight / nitems)
# Give points that occur more frequently a brighter color and larger point size.
# Frequency is determined as a percentage of total unique data points.
if frequency_percentage > .005:
size[i] = .20
r = 1.0
elif frequency_percentage > .002:
size[i] = .10
g = 1.0
r = 1.0
size[i] = .05
g = 1.0
color[i] = (r, g, b, 1.0)
i += 1
scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False)
scatter_plot.translate(-127.5, -127.5, -127.5)
return scatter_plot
def plot(self, wait=True):
import pyqtgraph.opengl as gl
if self.show_grids:
xgrid = gl.GLGridItem()
ygrid = gl.GLGridItem()
zgrid = gl.GLGridItem()
# Rotate x and y grids to face the correct direction
xgrid.rotate(90, 0, 1, 0)
ygrid.rotate(90, 1, 0, 0)
# Scale grids to the appropriate dimensions
xgrid.scale(12.8, 12.8, 12.8)
ygrid.scale(12.8, 12.8, 12.8)
zgrid.scale(12.8, 12.8, 12.8)
for fd in iter(self.next_file, None):
data_points = self._generate_data_points(fd)
self._print("Generating plot points from %d data points" % len(data_points))
self.plot_points = self._generate_plot_points(data_points)
del data_points
self._print("Generating graph from %d plot points" % len(self.plot_points))
if wait:
def wait(self):
from pyqtgraph.Qt import QtCore, QtGui
t = QtCore.QTimer()
def _generate_3d_data_point(self, data):
Plot data points within a 3D cube.
return (ord(data[0]), ord(data[1]), ord(data[2]))
def _generate_2d_data_point(self, data):
Plot data points projected on each cube face.
self.plane_count += 1
if self.plane_count > 5:
self.plane_count = 0
if self.plane_count == 0:
return (0, ord(data[0]), ord(data[1]))
elif self.plane_count == 1:
return (ord(data[0]), 0, ord(data[1]))
elif self.plane_count == 2:
return (ord(data[0]), ord(data[1]), 0)
elif self.plane_count == 3:
return (255, ord(data[0]), ord(data[1]))
elif self.plane_count == 4:
return (ord(data[0]), 255, ord(data[1]))
elif self.plane_count == 5:
return (ord(data[0]), ord(data[1]), 255)
def run(self):
return True
Base class for visualizing binaries in Qt.
Other plotter classes are derived from this.
TITLE = "Binary Visualization"
CLI = [
kwargs={'axis' : 3, 'enabled' : True},
description='Generate a 3D binary visualization'),
kwargs={'axis' : 2, 'enabled' : True},
description='Project data points onto 3D cube walls only'),
kwargs={'max_points' : 0},
description='Set the maximum number of plotted data points'),
kwargs={'show_grids' : True},
description='Display the x-y-z grids in the resulting plot'),
Kwarg(name='axis', default=3),
Kwarg(name='max_points', default=0),
Kwarg(name='show_grids', default=False),
Kwarg(name='enabled', default=False),
# There isn't really any useful data to print to console. Disable header and result output.
def init(self):
import pyqtgraph.opengl as gl
from pyqtgraph.Qt import QtGui
self.verbose = self.config.verbose
self.offset = self.config.offset
self.length = self.config.length
self.plane_count = -1
self.plot_points = None
if self.axis == 2:
self._generate_data_point = self._generate_2d_data_point
elif self.axis == 3:
self._generate_data_point = self._generate_3d_data_point
raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis)
if not self.max_points:
self.max_points = self.MAX_PLOT_POINTS = QtGui.QApplication([])
self.window = gl.GLViewWidget()
self.window.opts['distance'] = self.VIEW_DISTANCE
if len(self.config.target_files) == 1:
def _print(self, message):
Print console messages. For internal use only.
if self.verbose:
def _generate_plot_points(self, data_points):
Generates plot points from a list of data points.
@data_points - A dictionary containing each unique point and its frequency of occurance.
Returns a set of plot points.
total = 0
min_weight = 0
weightings = {}
plot_points = {}
# If the number of data points exceeds the maximum number of allowed data points, use a
# weighting system to eliminate data points that occur less freqently.
if sum(data_points.itervalues()) > self.max_points:
# First, generate a set of weight values 1 - 10
for i in range(1, 11):
weightings[i] = 0
# Go through every data point and how many times that point occurs
for (point, count) in iterator(data_points):
# For each data point, compare it to each remaining weight value
for w in get_keys(weightings):
# If the number of times this data point occurred is >= the weight value,
# then increment the weight value. Since weight values are ordered lowest
# to highest, this means that more frequent data points also increment lower
# weight values. Thus, the more high-frequency data points there are, the
# more lower-frequency data points are eliminated.
if count >= w:
weightings[w] += 1
# Throw out weight values that exceed the maximum number of data points
if weightings[w] > self.max_points:
del weightings[w]
# If there's only one weight value left, no sense in continuing the loop...
if len(weightings) == 1:
# The least weighted value is our minimum weight
min_weight = min(weightings)
# Get rid of all data points that occur less frequently than our minimum weight
for point in get_keys(data_points):
if data_points[point] < min_weight:
del data_points[point]
for point in sorted(data_points, key=data_points.get, reverse=True):
plot_points[point] = data_points[point]
total += 1
if total >= self.max_points:
return plot_points
def _generate_data_point(self, data):
Subclasses must override this to return the appropriate data point.
@data - A string of data self.axis in length.
Returns a data point tuple.
return (0,0,0)
def _generate_data_points(self, fp):
Generates a dictionary of data points and their frequency of occurrance.
@fp - The BlockFile object to generate data points from.
Returns a dictionary.
i = 0
data_points = {}
self._print("Generating data points for %s" %
# We don't need any extra data from BlockFile
while True:
(data, dlen) = fp.read_block()
if not data or not dlen:
i = 0
while (i+(self.axis-1)) < dlen:
point = self._generate_data_point(data[i:i+self.axis])
if has_key(data_points, point):
data_points[point] += 1
data_points[point] = 1
i += 3
return data_points
def _generate_plot(self, plot_points):
import numpy as np
import pyqtgraph.opengl as gl
nitems = float(len(plot_points))
pos = np.empty((nitems, 3))
size = np.empty((nitems))
color = np.empty((nitems, 4))
i = 0
for (point, weight) in iterator(plot_points):
r = 0.0
g = 0.0
b = 0.0
pos[i] = point
frequency_percentage = (weight / nitems)
# Give points that occur more frequently a brighter color and larger point size.
# Frequency is determined as a percentage of total unique data points.
if frequency_percentage > .005:
size[i] = .20
r = 1.0
elif frequency_percentage > .002:
size[i] = .10
g = 1.0
r = 1.0
size[i] = .05
g = 1.0
color[i] = (r, g, b, 1.0)
i += 1
scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False)
scatter_plot.translate(-127.5, -127.5, -127.5)
return scatter_plot
def plot(self, wait=True):
import pyqtgraph.opengl as gl
if self.show_grids:
xgrid = gl.GLGridItem()
ygrid = gl.GLGridItem()
zgrid = gl.GLGridItem()
# Rotate x and y grids to face the correct direction
xgrid.rotate(90, 0, 1, 0)
ygrid.rotate(90, 1, 0, 0)
# Scale grids to the appropriate dimensions
xgrid.scale(12.8, 12.8, 12.8)
ygrid.scale(12.8, 12.8, 12.8)
zgrid.scale(12.8, 12.8, 12.8)
for fd in iter(self.next_file, None):
data_points = self._generate_data_points(fd)
self._print("Generating plot points from %d data points" % len(data_points))
self.plot_points = self._generate_plot_points(data_points)
del data_points
self._print("Generating graph from %d plot points" % len(self.plot_points))
if wait:
def wait(self):
from pyqtgraph.Qt import QtCore, QtGui
t = QtCore.QTimer()
def _generate_3d_data_point(self, data):
Plot data points within a 3D cube.
return (ord(data[0]), ord(data[1]), ord(data[2]))
def _generate_2d_data_point(self, data):
Plot data points projected on each cube face.
self.plane_count += 1
if self.plane_count > 5:
self.plane_count = 0
if self.plane_count == 0:
return (0, ord(data[0]), ord(data[1]))
elif self.plane_count == 1:
return (ord(data[0]), 0, ord(data[1]))
elif self.plane_count == 2:
return (ord(data[0]), ord(data[1]), 0)
elif self.plane_count == 3:
return (255, ord(data[0]), ord(data[1]))
elif self.plane_count == 4:
return (ord(data[0]), 255, ord(data[1]))
elif self.plane_count == 5:
return (ord(data[0]), ord(data[1]), 255)
def run(self):
return True
......@@ -5,87 +5,87 @@ import binwalk.core.C
from binwalk.core.module import Option, Kwarg, Module
class Deflate(object):
Finds and extracts raw deflate compression streams.
Finds and extracts raw deflate compression streams.
BLOCK_SIZE = 33*1024
# To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes
DESCRIPTION = "Raw deflate compression stream"
BLOCK_SIZE = 33*1024
# To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes
DESCRIPTION = "Raw deflate compression stream"
TINFL_NAME = "tinfl"
TINFL_NAME = "tinfl"
binwalk.core.C.Function(name="is_deflated", type=int),
binwalk.core.C.Function(name="inflate_raw_file", type=None),
binwalk.core.C.Function(name="is_deflated", type=int),
binwalk.core.C.Function(name="inflate_raw_file", type=None),
def __init__(self, module):
self.module = module
def __init__(self, module):
self.module = module
# The tinfl library is built and installed with binwalk
self.tinfl = binwalk.core.C.Library(self.TINFL_NAME, self.TINFL_FUNCTIONS)
# Add an extraction rule
if self.module.extractor.enabled:
self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor)
# The tinfl library is built and installed with binwalk
self.tinfl = binwalk.core.C.Library(self.TINFL_NAME, self.TINFL_FUNCTIONS)
# Add an extraction rule
if self.module.extractor.enabled:
self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor)
def _extractor(self, file_name):
out_file = os.path.splitext(file_name)[0]
self.tinfl.inflate_raw_file(file_name, out_file)
def _extractor(self, file_name):
out_file = os.path.splitext(file_name)[0]
self.tinfl.inflate_raw_file(file_name, out_file)
def decompress(self, data):
description = None
def decompress(self, data):
description = None
decomp_size = self.tinfl.is_deflated(data, len(data), 0)
if decomp_size >= self.MIN_DECOMP_SIZE:
description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size
decomp_size = self.tinfl.is_deflated(data, len(data), 0)
if decomp_size >= self.MIN_DECOMP_SIZE:
description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size
return description
return description
class RawCompression(Module):
'deflate' : Deflate,
'deflate' : Deflate,
TITLE = 'Raw Compression'
TITLE = 'Raw Compression'
CLI = [
kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
description='Scan for raw deflate compression streams'),
CLI = [
kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
description='Scan for raw deflate compression streams'),
Kwarg(name='enabled', default=False),
Kwarg(name='decompressor_class', default=None),
Kwarg(name='enabled', default=False),
Kwarg(name='decompressor_class', default=None),
def init(self):
self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self)
def init(self):
self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self)
def run(self):
for fp in iter(self.next_file, None):
def run(self):
for fp in iter(self.next_file, None):
while True:
(data, dlen) = fp.read_block()
if not data:
while True:
(data, dlen) = fp.read_block()
if not data:
for i in range(0, dlen):
description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
if description:
self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
for i in range(0, dlen):
description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
if description:
self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
self.status.completed = fp.tell() - fp.offset
self.status.completed = fp.tell() - fp.offset
......@@ -6,192 +6,192 @@ from binwalk.core.module import Module, Option, Kwarg
class Entropy(Module):
XLABEL = 'Offset'
YLABEL = 'Entropy'
COLORS = ['r', 'g', 'c', 'b', 'm']
TITLE = "Entropy"
CLI = [
kwargs={'enabled' : True},
description='Calculate file entropy'),
kwargs={'save_plot' : True},
description='Save plot as a PNG'),
kwargs={'do_plot' : False},
description='Do not generate an entropy plot graph'),
kwargs={'show_legend' : False},
description='Omit the legend from the entropy plot graph'),
Kwarg(name='enabled', default=False),
Kwarg(name='save_plot', default=False),
Kwarg(name='display_results', default=True),
Kwarg(name='do_plot', default=True),
Kwarg(name='show_legend', default=True),
Kwarg(name='block_size', default=0),
# Run this module last so that it can process all other module's results and overlay them on the entropy graph
def init(self):
self.HEADER[-1] = "ENTROPY"
self.algorithm = self.shannon
self.max_description_length = 0
self.file_markers = {}
# Get a list of all other module's results to mark on the entropy graph
for (module, obj) in iterator(self.modules):
for result in obj.results:
if result.file and result.description:
description = result.description.split(',')[0]
if not has_key(self.file_markers,
self.file_markers[] = []
if len(description) > self.max_description_length:
self.max_description_length = len(description)
self.file_markers[].append((result.offset, description))
# If other modules have been run and they produced results, don't spam the terminal with entropy results
if self.file_markers:
self.display_results = False
if not self.block_size:
if self.config.block:
self.block_size = self.config.block
self.block_size = self.DEFAULT_BLOCK_SIZE
def run(self):
from pyqtgraph.Qt import QtGui
for fp in iter(self.next_file, None):
if self.display_results:
if self.display_results:
if self.do_plot and not self.save_plot:
def calculate_file_entropy(self, fp):
# Clear results from any previously analyzed files
while True:
file_offset = fp.tell()
(data, dlen) = fp.read_block()
if not data:
i = 0
while i < dlen:
entropy = self.algorithm(data[i:i+self.block_size])
r = self.result(offset=(file_offset + i), file=fp, entropy=entropy, description=("%f" % entropy), display=self.display_results)
i += self.block_size
if self.do_plot:
def shannon(self, data):
Performs a Shannon entropy analysis on a given block of data.
entropy = 0
if data:
for x in range(0, 256):
p_x = float(data.count(chr(x))) / len(data)
if p_x > 0:
entropy += - p_x*math.log(p_x, 2)
return (entropy / 8)
def gzip(self, data, truncate=True):
Performs an entropy analysis based on zlib compression ratio.
This is faster than the shannon entropy analysis, but not as accurate.
# Entropy is a simple ratio of: <zlib compressed size> / <original size>
e = float(float(len(zlib.compress(data, 9))) / float(len(data)))
if truncate and e > 1.0:
e = 1.0
return e
def plot_entropy(self, fname):
import numpy as np
import pyqtgraph as pg
i = 0
x = []
y = []
plotted_colors = {}
for r in self.results:
plt = pg.plot(title=fname, clear=True)
if self.show_legend and has_key(self.file_markers, fname):
plt.addLegend(size=(self.max_description_length*10, 0))
for (offset, description) in self.file_markers[fname]:
# If this description has already been plotted at a different offset, we need to
# use the same color for the marker, but set the description to None to prevent
# duplicate entries in the graph legend.
# Else, get the next color and use it to mark descriptions of this type.
if has_key(plotted_colors, description):
color = plotted_colors[description]
description = None
color = self.COLORS[i]
plotted_colors[description] = color
i += 1
if i >= len(self.COLORS):
i = 0
plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5))
# Plot data points
plt.plot(x, y, pen='y')
# TODO: legend is not displayed properly when saving plots to disk
if self.save_plot:
exporter = pg.exporters.ImageExporter.ImageExporter(plt.plotItem)
exporter.parameters()['width'] = self.FILE_WIDTH
exporter.export(binwalk.core.common.unique_file_name(os.path.basename(fname), self.FILE_FORMAT))
plt.setLabel('left', self.YLABEL, units=self.YUNITS)
plt.setLabel('bottom', self.XLABEL, units=self.XUNITS)
XLABEL = 'Offset'
YLABEL = 'Entropy'
COLORS = ['r', 'g', 'c', 'b', 'm']
TITLE = "Entropy"
CLI = [
kwargs={'enabled' : True},
description='Calculate file entropy'),
kwargs={'save_plot' : True},
description='Save plot as a PNG'),
kwargs={'do_plot' : False},
description='Do not generate an entropy plot graph'),
kwargs={'show_legend' : False},
description='Omit the legend from the entropy plot graph'),
Kwarg(name='enabled', default=False),
Kwarg(name='save_plot', default=False),
Kwarg(name='display_results', default=True),
Kwarg(name='do_plot', default=True),
Kwarg(name='show_legend', default=True),
Kwarg(name='block_size', default=0),
# Run this module last so that it can process all other module's results and overlay them on the entropy graph
def init(self):
self.HEADER[-1] = "ENTROPY"
self.algorithm = self.shannon
self.max_description_length = 0
self.file_markers = {}
# Get a list of all other module's results to mark on the entropy graph
for (module, obj) in iterator(self.modules):
for result in obj.results:
if result.file and result.description:
description = result.description.split(',')[0]
if not has_key(self.file_markers,
self.file_markers[] = []
if len(description) > self.max_description_length:
self.max_description_length = len(description)
self.file_markers[].append((result.offset, description))
# If other modules have been run and they produced results, don't spam the terminal with entropy results
if self.file_markers:
self.display_results = False
if not self.block_size:
if self.config.block:
self.block_size = self.config.block
self.block_size = self.DEFAULT_BLOCK_SIZE
def run(self):
from pyqtgraph.Qt import QtGui
for fp in iter(self.next_file, None):
if self.display_results:
if self.display_results:
if self.do_plot and not self.save_plot:
def calculate_file_entropy(self, fp):
# Clear results from any previously analyzed files
while True:
file_offset = fp.tell()
(data, dlen) = fp.read_block()
if not data:
i = 0
while i < dlen:
entropy = self.algorithm(data[i:i+self.block_size])
r = self.result(offset=(file_offset + i), file=fp, entropy=entropy, description=("%f" % entropy), display=self.display_results)
i += self.block_size
if self.do_plot:
def shannon(self, data):
Performs a Shannon entropy analysis on a given block of data.
entropy = 0
if data:
for x in range(0, 256):
p_x = float(data.count(chr(x))) / len(data)
if p_x > 0:
entropy += - p_x*math.log(p_x, 2)
return (entropy / 8)
def gzip(self, data, truncate=True):
Performs an entropy analysis based on zlib compression ratio.
This is faster than the shannon entropy analysis, but not as accurate.
# Entropy is a simple ratio of: <zlib compressed size> / <original size>
e = float(float(len(zlib.compress(data, 9))) / float(len(data)))
if truncate and e > 1.0:
e = 1.0
return e
def plot_entropy(self, fname):
import numpy as np
import pyqtgraph as pg
i = 0
x = []
y = []
plotted_colors = {}
for r in self.results:
plt = pg.plot(title=fname, clear=True)
if self.show_legend and has_key(self.file_markers, fname):
plt.addLegend(size=(self.max_description_length*10, 0))
for (offset, description) in self.file_markers[fname]:
# If this description has already been plotted at a different offset, we need to
# use the same color for the marker, but set the description to None to prevent
# duplicate entries in the graph legend.
# Else, get the next color and use it to mark descriptions of this type.
if has_key(plotted_colors, description):
color = plotted_colors[description]
description = None
color = self.COLORS[i]
plotted_colors[description] = color
i += 1
if i >= len(self.COLORS):
i = 0
plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5))
# Plot data points
plt.plot(x, y, pen='y')
# TODO: legend is not displayed properly when saving plots to disk
if self.save_plot:
exporter = pg.exporters.ImageExporter.ImageExporter(plt.plotItem)
exporter.parameters()['width'] = self.FILE_WIDTH
exporter.export(binwalk.core.common.unique_file_name(os.path.basename(fname), self.FILE_FORMAT))
plt.setLabel('left', self.YLABEL, units=self.YUNITS)
plt.setLabel('bottom', self.XLABEL, units=self.XUNITS)
......@@ -9,560 +9,560 @@ from binwalk.core.module import Module, Option, Kwarg
from binwalk.core.common import file_size, unique_file_name, BlockFile
class Extractor(Module):
Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
# Extract rules are delimited with a colon.
# <case insensitive matching string>:<file extension>[:<command to run>]
# Comments in the extract.conf files start with a pound
# Place holder for the extracted file name in the command
TITLE = 'Extraction'
CLI = [
kwargs={'load_default_rules' : True, 'enabled' : True},
description='Automatically extract known file types'),
kwargs={'manual_rules' : [], 'enabled' : True},
description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'),
kwargs={'matryoshka' : 8},
description='Recursively scan extracted files'),
kwargs={'matryoshka' : 0},
description='Limit matryoshka recursion depth (default: 8 levels deep)'),
kwargs={'max_size' : 0},
description='Limit the size of each extracted file'),
kwargs={'remove_after_execute' : True},
description='Cleanup extracted / zero-size files after extraction'),
kwargs={'run_extractors' : False},
description="Carve data from files, but don't execute extraction utilities"),
Kwarg(name='max_size', default=None),
Kwarg(name='remove_after_execute', default=False),
Kwarg(name='load_default_rules', default=False),
Kwarg(name='run_extractors', default=True),
Kwarg(name='manual_rules', default=[]),
Kwarg(name='matryoshka', default=0),
Kwarg(name='enabled', default=False),
def load(self):
# Holds a list of extraction rules loaded either from a file or when manually specified.
self.extract_rules = []
if self.load_default_rules:
for manual_rule in self.manual_rules:
def reset(self):
# Holds a list of pending files that should be scanned; only populated if self.matryoshka == True
self.pending = []
# Holds a dictionary of extraction directories created for each scanned file.
self.extraction_directories = {}
# Holds a dictionary of the last directory listing for a given directory; used for identifying
# newly created/extracted files that need to be appended to self.pending.
self.last_directory_listing = {}
# Set to the directory path of the first extracted directory; this allows us to track recursion depth.
self.base_recursion_dir = ""
def callback(self, r):
# Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile
except KeyboardInterrupt as e:
except Exception as e:
if not r.size:
size = r.file.size - r.offset
size = r.size
# Only extract valid results
if r.valid:
# Do the extraction
(extraction_directory, dd_file) = self.extract(r.offset, r.description,, size,
# If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
if extraction_directory and dd_file:
# Get the full path to the dd'd file
dd_file_path = os.path.join(extraction_directory, dd_file)
# Do a directory listing of the output directory
directory_listing = set(os.listdir(extraction_directory))
# If this is a newly created output directory, self.last_directory_listing won't have a record of it.
# If we've extracted other files to this directory before, it will.
if not has_key(self.last_directory_listing, extraction_directory):
self.last_directory_listing[extraction_directory] = set()
# Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing)
for f in directory_listing.difference(self.last_directory_listing[extraction_directory]):
# Build the full file path and add it to the extractor results
file_path = os.path.join(extraction_directory, f)
real_file_path = os.path.realpath(file_path)
self.result(description=file_path, display=False)
# If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory
if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path):
# If the recursion level of this file is less than or equal to our desired recursion level
if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka:
# Add the file to our list of pending files
# Update the last directory listing for the next time we extract a file to this same output directory
self.last_directory_listing[extraction_directory] = directory_listing
def append_rule(self, r):
def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None):
Adds a set of rules to the extraction rule list.
@txtrule - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>]
@regex - If rule string is not specified, this is the regular expression string to use.
@extension - If rule string is not specified, this is the file extension to use.
@cmd - If rule string is not specified, this is the command to run.
Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract.
Returns None.
rules = []
match = False
r = {
'extension' : '',
'cmd' : '',
'regex' : None
# Process single explicitly specified rule
if not txtrule and regex and extension:
r['extension'] = extension
r['regex'] = re.compile(regex)
if cmd:
r['cmd'] = cmd
# Process rule string, or list of rule strings
if not isinstance(txtrule, type([])):
rules = [txtrule]
rules = txtrule
for rule in rules:
r['cmd'] = ''
r['extension'] = ''
values = self._parse_rule(rule)
match = values[0]
r['regex'] = re.compile(values[0])
r['extension'] = values[1]
r['cmd'] = values[2]
except KeyboardInterrupt as e:
raise e
except Exception:
# Verify that the match string was retrieved.
if match:
def remove_rule(self, text):
Remove all rules that match a specified text.
@text - The text to match against.
Returns the number of rules removed.
rm = []
for i in range(0, len(self.extract_rules)):
if self.extract_rules[i]['regex'].match(text):
for i in rm:
return len(rm)
def clear_rules(self):
Deletes all extraction rules.
Returns None.
self.extract_rules = []
def get_rules(self):
Returns a list of all extraction rules.
return self.extract_rules
def load_from_file(self, fname):
Loads extraction rules from the specified file.
@fname - Path to the extraction rule file.
Returns None.
# Process each line from the extract file, ignoring comments
with open(fname, 'r') as f:
for rule in f.readlines():
self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0])
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e)))
def load_defaults(self):
Loads default extraction rules from the user and system extract.conf files.
Returns None.
# Load the user extract file first to ensure its rules take precedence.
extract_files = [
for extract_file in extract_files:
except KeyboardInterrupt as e:
raise e
except Exception as e:
if self.config.verbose:
raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e)))
def build_output_directory(self, path):
Set the output directory for extracted files.
@path - The path to the file that data will be extracted from.
Returns None.
# If we have not already created an output directory for this target file, create one now
if not has_key(self.extraction_directories, path):
output_directory = os.path.join(os.path.dirname(path), unique_file_name('_' + os.path.basename(path), extension='extracted'))
if not os.path.exists(output_directory):
self.extraction_directories[path] = output_directory
# Else, just use the already created directory
output_directory = self.extraction_directories[path]
# Set the initial base extraction directory for later determining the level of recusion
if not self.base_recursion_dir:
self.base_recursion_dir = os.path.realpath(output_directory) + os.path.sep
return output_directory
def cleanup_extracted_files(self, tf=None):
Set the action to take after a file is extracted.
@tf - If set to True, extracted files will be cleaned up after running a command against them.
If set to False, extracted files will not be cleaned up after running a command against them.
If set to None or not specified, the current setting will not be changed.
Returns the current cleanup status (True/False).
if tf is not None:
self.remove_after_execute = tf
return self.remove_after_execute
def extract(self, offset, description, file_name, size, name=None):
Extract an embedded file from the target file, if it matches an extract rule.
Called automatically by Binwalk.scan().
@offset - Offset inside the target file to begin the extraction.
@description - Description of the embedded file to extract, as returned by libmagic.
@file_name - Path to the target file.
@size - Number of bytes to extract.
@name - Name to save the file as.
Returns the name of the extracted file (blank string if nothing was extracted).
fname = ''
cleanup_extracted_fname = True
original_dir = os.getcwd()
rules = self._match(description)
file_path = os.path.realpath(file_name)
# No extraction rules for this file
if not rules:
return (None, None)
output_directory = self.build_output_directory(file_name)
# Extract to end of file if no size was specified
if not size:
size = file_size(file_path) - offset
if os.path.isfile(file_path):
# Loop through each extraction rule until one succeeds
for i in range(0, len(rules)):
rule = rules[i]
# Copy out the data to disk, if we haven't already
fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)
# If there was a command specified for this rule, try to execute it.
# If execution fails, the next rule will be attempted.
if rule['cmd']:
# Many extraction utilities will extract the file to a new file, just without
# the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
# file name already exists before executing the extract command, do not attempt
# to clean it up even if its resulting file size is 0.
if self.remove_after_execute:
extracted_fname = os.path.splitext(fname)[0]
if os.path.exists(extracted_fname):
cleanup_extracted_fname = False
# Execute the specified command against the extracted file
if self.run_extractors:
extract_ok = self.execute(rule['cmd'], fname)
extract_ok = True
# Only clean up files if remove_after_execute was specified
if extract_ok and self.remove_after_execute:
# Remove the original file that we extracted
except KeyboardInterrupt as e:
raise e
except Exception as e:
# If the command worked, assume it removed the file extension from the extracted file
# If the extracted file name file exists and is empty, remove it
if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0:
except KeyboardInterrupt as e:
raise e
except Exception as e:
# If the command executed OK, don't try any more rules
if extract_ok:
# Else, remove the extracted file if this isn't the last rule in the list.
# If it is the last rule, leave the file on disk for the user to examine.
elif i != (len(rules)-1):
except KeyboardInterrupt as e:
raise e
except Exception as e:
# If there was no command to execute, just use the first rule
return (output_directory, fname)
def _entry_offset(self, index, entries, description):
Gets the offset of the first entry that matches the description.
@index - Index into the entries list to begin searching.
@entries - Dictionary of result entries.
@description - Case insensitive description.
Returns the offset, if a matching description is found.
Returns -1 if a matching description is not found.
description = description.lower()
for (offset, infos) in entries[index:]:
for info in infos:
if info['description'].lower().startswith(description):
return offset
return -1
def _match(self, description):
Check to see if the provided description string matches an extract rule.
Called internally by self.extract().
@description - Description string to check.
Returns the associated rule dictionary if a match is found.
Returns None if no match is found.
rules = []
description = description.lower()
for rule in self.extract_rules:
if rule['regex'].search(description):
return rules
def _parse_rule(self, rule):
Parses an extraction rule.
@rule - Rule string.
Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>'].
return rule.strip().split(self.RULE_DELIM, 2)
def _dd(self, file_name, offset, size, extension, output_file_name=None):
Extracts a file embedded inside the target file.
@file_name - Path to the target file.
@offset - Offset inside the target file where the embedded file begins.
@size - Number of bytes to extract.
@extension - The file exension to assign to the extracted file on disk.
@output_file_name - The requested name of the output file.
Returns the extracted file name.
total_size = 0
# Default extracted file name is <hex offset>.<extension>
default_bname = "%X" % offset
if self.max_size and size > self.max_size:
size = self.max_size
if not output_file_name or output_file_name is None:
bname = default_bname
# Strip the output file name of invalid/dangerous characters (like file paths)
bname = os.path.basename(output_file_name)
fname = unique_file_name(bname, extension)
# Open the target file and seek to the offset
fdin = self.config.open_file(file_name, length=size, offset=offset)
# Open the output file
fdout = BlockFile(fname, 'w')
except KeyboardInterrupt as e:
raise e
except Exception as e:
# Fall back to the default name if the requested name fails
fname = unique_file_name(default_bname, extension)
fdout = BlockFile(fname, 'w')
while total_size < size:
(data, dlen) = fdin.read_block()
if not data:
total_size += dlen
# Cleanup
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
return fname
def execute(self, cmd, fname):
Execute a command against the specified file.
@cmd - Command to execute.
@fname - File to run command against.
Returns True on success, False on failure.
tmp = None
retval = True
if callable(cmd):
except KeyboardInterrupt as e:
raise e
except Exception as e:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
# If not in verbose mode, create a temporary file to redirect stdout and stderr to
if not self.config.verbose:
tmp = tempfile.TemporaryFile()
# Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname
cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname)
# Execute.
if, stdout=tmp, stderr=tmp) != 0:
retval = False
except KeyboardInterrupt as e:
raise e
except Exception as e:
# Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when
# making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's
# annoying to see this spammed out to the console every time.
if e.errno != 2:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
retval = False
if tmp is not None:
return retval
Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
# Extract rules are delimited with a colon.
# <case insensitive matching string>:<file extension>[:<command to run>]
# Comments in the extract.conf files start with a pound
# Place holder for the extracted file name in the command
TITLE = 'Extraction'
CLI = [
kwargs={'load_default_rules' : True, 'enabled' : True},
description='Automatically extract known file types'),
kwargs={'manual_rules' : [], 'enabled' : True},
description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'),
kwargs={'matryoshka' : 8},
description='Recursively scan extracted files'),
kwargs={'matryoshka' : 0},
description='Limit matryoshka recursion depth (default: 8 levels deep)'),
kwargs={'max_size' : 0},
description='Limit the size of each extracted file'),
kwargs={'remove_after_execute' : True},
description='Cleanup extracted / zero-size files after extraction'),
kwargs={'run_extractors' : False},
description="Carve data from files, but don't execute extraction utilities"),
Kwarg(name='max_size', default=None),
Kwarg(name='remove_after_execute', default=False),
Kwarg(name='load_default_rules', default=False),
Kwarg(name='run_extractors', default=True),
Kwarg(name='manual_rules', default=[]),
Kwarg(name='matryoshka', default=0),
Kwarg(name='enabled', default=False),
def load(self):
# Holds a list of extraction rules loaded either from a file or when manually specified.
self.extract_rules = []
if self.load_default_rules:
for manual_rule in self.manual_rules:
def reset(self):
# Holds a list of pending files that should be scanned; only populated if self.matryoshka == True
self.pending = []
# Holds a dictionary of extraction directories created for each scanned file.
self.extraction_directories = {}
# Holds a dictionary of the last directory listing for a given directory; used for identifying
# newly created/extracted files that need to be appended to self.pending.
self.last_directory_listing = {}
# Set to the directory path of the first extracted directory; this allows us to track recursion depth.
self.base_recursion_dir = ""
def callback(self, r):
# Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile
except KeyboardInterrupt as e:
except Exception as e:
if not r.size:
size = r.file.size - r.offset
size = r.size
# Only extract valid results
if r.valid:
# Do the extraction
(extraction_directory, dd_file) = self.extract(r.offset, r.description,, size,
# If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
if extraction_directory and dd_file:
# Get the full path to the dd'd file
dd_file_path = os.path.join(extraction_directory, dd_file)
# Do a directory listing of the output directory
directory_listing = set(os.listdir(extraction_directory))
# If this is a newly created output directory, self.last_directory_listing won't have a record of it.
# If we've extracted other files to this directory before, it will.
if not has_key(self.last_directory_listing, extraction_directory):
self.last_directory_listing[extraction_directory] = set()
# Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing)
for f in directory_listing.difference(self.last_directory_listing[extraction_directory]):
# Build the full file path and add it to the extractor results
file_path = os.path.join(extraction_directory, f)
real_file_path = os.path.realpath(file_path)
self.result(description=file_path, display=False)
# If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory
if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path):
# If the recursion level of this file is less than or equal to our desired recursion level
if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka:
# Add the file to our list of pending files
# Update the last directory listing for the next time we extract a file to this same output directory
self.last_directory_listing[extraction_directory] = directory_listing
def append_rule(self, r):
def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None):
Adds a set of rules to the extraction rule list.
@txtrule - Rule string, or list of rule strings, in the format <regular expression>:<file extension>[:<command to run>]
@regex - If rule string is not specified, this is the regular expression string to use.
@extension - If rule string is not specified, this is the file extension to use.
@cmd - If rule string is not specified, this is the command to run.
Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract.
Returns None.
rules = []
match = False
r = {
'extension' : '',
'cmd' : '',
'regex' : None
# Process single explicitly specified rule
if not txtrule and regex and extension:
r['extension'] = extension
r['regex'] = re.compile(regex)
if cmd:
r['cmd'] = cmd
# Process rule string, or list of rule strings
if not isinstance(txtrule, type([])):
rules = [txtrule]
rules = txtrule
for rule in rules:
r['cmd'] = ''
r['extension'] = ''
values = self._parse_rule(rule)
match = values[0]
r['regex'] = re.compile(values[0])
r['extension'] = values[1]
r['cmd'] = values[2]
except KeyboardInterrupt as e:
raise e
except Exception:
# Verify that the match string was retrieved.
if match:
def remove_rule(self, text):
Remove all rules that match a specified text.
@text - The text to match against.
Returns the number of rules removed.
rm = []
for i in range(0, len(self.extract_rules)):
if self.extract_rules[i]['regex'].match(text):
for i in rm:
return len(rm)
def clear_rules(self):
Deletes all extraction rules.
Returns None.
self.extract_rules = []
def get_rules(self):
Returns a list of all extraction rules.
return self.extract_rules
def load_from_file(self, fname):
Loads extraction rules from the specified file.
@fname - Path to the extraction rule file.
Returns None.
# Process each line from the extract file, ignoring comments
with open(fname, 'r') as f:
for rule in f.readlines():
self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0])
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e)))
def load_defaults(self):
Loads default extraction rules from the user and system extract.conf files.
Returns None.
# Load the user extract file first to ensure its rules take precedence.
extract_files = [
for extract_file in extract_files:
except KeyboardInterrupt as e:
raise e
except Exception as e:
if self.config.verbose:
raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e)))
def build_output_directory(self, path):
Set the output directory for extracted files.
@path - The path to the file that data will be extracted from.
Returns None.
# If we have not already created an output directory for this target file, create one now
if not has_key(self.extraction_directories, path):
output_directory = os.path.join(os.path.dirname(path), unique_file_name('_' + os.path.basename(path), extension='extracted'))
if not os.path.exists(output_directory):
self.extraction_directories[path] = output_directory
# Else, just use the already created directory
output_directory = self.extraction_directories[path]
# Set the initial base extraction directory for later determining the level of recusion
if not self.base_recursion_dir:
self.base_recursion_dir = os.path.realpath(output_directory) + os.path.sep
return output_directory
def cleanup_extracted_files(self, tf=None):
Set the action to take after a file is extracted.
@tf - If set to True, extracted files will be cleaned up after running a command against them.
If set to False, extracted files will not be cleaned up after running a command against them.
If set to None or not specified, the current setting will not be changed.
Returns the current cleanup status (True/False).
if tf is not None:
self.remove_after_execute = tf
return self.remove_after_execute
def extract(self, offset, description, file_name, size, name=None):
Extract an embedded file from the target file, if it matches an extract rule.
Called automatically by Binwalk.scan().
@offset - Offset inside the target file to begin the extraction.
@description - Description of the embedded file to extract, as returned by libmagic.
@file_name - Path to the target file.
@size - Number of bytes to extract.
@name - Name to save the file as.
Returns the name of the extracted file (blank string if nothing was extracted).
fname = ''
cleanup_extracted_fname = True
original_dir = os.getcwd()
rules = self._match(description)
file_path = os.path.realpath(file_name)
# No extraction rules for this file
if not rules:
return (None, None)
output_directory = self.build_output_directory(file_name)
# Extract to end of file if no size was specified
if not size:
size = file_size(file_path) - offset
if os.path.isfile(file_path):
# Loop through each extraction rule until one succeeds
for i in range(0, len(rules)):
rule = rules[i]
# Copy out the data to disk, if we haven't already
fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)
# If there was a command specified for this rule, try to execute it.
# If execution fails, the next rule will be attempted.
if rule['cmd']:
# Many extraction utilities will extract the file to a new file, just without
# the file extension (i.e., myfile.7z -> myfile). If the presumed resulting
# file name already exists before executing the extract command, do not attempt
# to clean it up even if its resulting file size is 0.
if self.remove_after_execute:
extracted_fname = os.path.splitext(fname)[0]
if os.path.exists(extracted_fname):
cleanup_extracted_fname = False
# Execute the specified command against the extracted file
if self.run_extractors:
extract_ok = self.execute(rule['cmd'], fname)
extract_ok = True
# Only clean up files if remove_after_execute was specified
if extract_ok and self.remove_after_execute:
# Remove the original file that we extracted
except KeyboardInterrupt as e:
raise e
except Exception as e:
# If the command worked, assume it removed the file extension from the extracted file
# If the extracted file name file exists and is empty, remove it
if cleanup_extracted_fname and os.path.exists(extracted_fname) and file_size(extracted_fname) == 0:
except KeyboardInterrupt as e:
raise e
except Exception as e:
# If the command executed OK, don't try any more rules
if extract_ok:
# Else, remove the extracted file if this isn't the last rule in the list.
# If it is the last rule, leave the file on disk for the user to examine.
elif i != (len(rules)-1):
except KeyboardInterrupt as e:
raise e
except Exception as e:
# If there was no command to execute, just use the first rule
return (output_directory, fname)
def _entry_offset(self, index, entries, description):
Gets the offset of the first entry that matches the description.
@index - Index into the entries list to begin searching.
@entries - Dictionary of result entries.
@description - Case insensitive description.
Returns the offset, if a matching description is found.
Returns -1 if a matching description is not found.
description = description.lower()
for (offset, infos) in entries[index:]:
for info in infos:
if info['description'].lower().startswith(description):
return offset
return -1
def _match(self, description):
Check to see if the provided description string matches an extract rule.
Called internally by self.extract().
@description - Description string to check.
Returns the associated rule dictionary if a match is found.
Returns None if no match is found.
rules = []
description = description.lower()
for rule in self.extract_rules:
if rule['regex'].search(description):
return rules
def _parse_rule(self, rule):
Parses an extraction rule.
@rule - Rule string.
Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>'].
return rule.strip().split(self.RULE_DELIM, 2)
def _dd(self, file_name, offset, size, extension, output_file_name=None):
Extracts a file embedded inside the target file.
@file_name - Path to the target file.
@offset - Offset inside the target file where the embedded file begins.
@size - Number of bytes to extract.
@extension - The file exension to assign to the extracted file on disk.
@output_file_name - The requested name of the output file.
Returns the extracted file name.
total_size = 0
# Default extracted file name is <hex offset>.<extension>
default_bname = "%X" % offset
if self.max_size and size > self.max_size:
size = self.max_size
if not output_file_name or output_file_name is None:
bname = default_bname
# Strip the output file name of invalid/dangerous characters (like file paths)
bname = os.path.basename(output_file_name)
fname = unique_file_name(bname, extension)
# Open the target file and seek to the offset
fdin = self.config.open_file(file_name, length=size, offset=offset)
# Open the output file
fdout = BlockFile(fname, 'w')
except KeyboardInterrupt as e:
raise e
except Exception as e:
# Fall back to the default name if the requested name fails
fname = unique_file_name(default_bname, extension)
fdout = BlockFile(fname, 'w')
while total_size < size:
(data, dlen) = fdin.read_block()
if not data:
total_size += dlen
# Cleanup
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
return fname
def execute(self, cmd, fname):
Execute a command against the specified file.
@cmd - Command to execute.
@fname - File to run command against.
Returns True on success, False on failure.
tmp = None
retval = True
if callable(cmd):
except KeyboardInterrupt as e:
raise e
except Exception as e:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
# If not in verbose mode, create a temporary file to redirect stdout and stderr to
if not self.config.verbose:
tmp = tempfile.TemporaryFile()
# Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname
cmd = cmd.replace(self.FILE_NAME_PLACEHOLDER, fname)
# Execute.
if, stdout=tmp, stderr=tmp) != 0:
retval = False
except KeyboardInterrupt as e:
raise e
except Exception as e:
# Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when
# making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's
# annoying to see this spammed out to the console every time.
if e.errno != 2:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
retval = False
if tmp is not None:
return retval
......@@ -10,177 +10,177 @@ from binwalk.core.module import Module, Option, Kwarg, show_help
class General(Module):
TITLE = "General"
CLI = [
kwargs={'length' : 0},
description='Number of bytes to scan'),
kwargs={'offset' : 0},
description='Start scan at this file offset'),
kwargs={'block' : 0},
description='Set file block size'),
kwargs={'swap_size' : 0},
description='Reverse every n bytes before scanning'),
kwargs={'show_invalid' : True},
description='Show results marked as invalid'),
kwargs={'exclude_filters' : []},
description='Exclude results that match <str>'),
kwargs={'include_filters' : []},
description='Only show results that match <str>'),
kwargs={'log_file' : None},
description='Log results to file'),
kwargs={'csv' : True},
description='Log results to file in CSV format'),
kwargs={'format_to_terminal' : True},
description='Format output to fit the terminal window'),
kwargs={'quiet' : True},
description='Supress output to stdout'),
kwargs={'verbose' : True},
description='Enable verbose output'),
kwargs={'show_help' : True},
description='Show help output'),
kwargs={'files' : []}),
Kwarg(name='length', default=0),
Kwarg(name='offset', default=0),
Kwarg(name='block', default=0),
Kwarg(name='swap_size', default=0),
Kwarg(name='show_invalid', default=False),
Kwarg(name='include_filters', default=[]),
Kwarg(name='exclude_filters', default=[]),
Kwarg(name='log_file', default=None),
Kwarg(name='csv', default=False),
Kwarg(name='format_to_terminal', default=False),
Kwarg(name='quiet', default=False),
Kwarg(name='verbose', default=False),
Kwarg(name='files', default=[]),
Kwarg(name='show_help', default=False),
def load(self):
self.target_files = []
# Order is important with these two methods
self.filter = binwalk.core.filter.Filter(self.show_invalid)
# Set any specified include/exclude filters
for regex in self.exclude_filters:
for regex in self.include_filters:
self.settings = binwalk.core.settings.Settings()
self.display = binwalk.core.display.Display(log=self.log_file,
if self.show_help:
def reset(self):
for fp in self.target_files:
def __del__(self):
def __exit__(self, a, b, c):
def _cleanup(self):
if hasattr(self, 'target_files'):
for fp in self.target_files:
def _set_verbosity(self):
Sets the appropriate verbosity.
Must be called after self._test_target_files so that self.target_files is properly set.
# If more than one target file was specified, enable verbose mode; else, there is
# nothing in some outputs to indicate which scan corresponds to which file.
if len(self.target_files) > 1 and not self.verbose:
self.verbose = True
def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None):
Opens the specified file with all pertinent configuration settings.
if length is None:
length = self.length
if offset is None:
offset = self.offset
if swap is None:
swap = self.swap_size
return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)
def _open_target_files(self):
Checks if the target files can be opened.
Any files that cannot be opened are removed from the self.target_files list.
# Validate the target files listed in target_files
for tfile in self.files:
# Ignore directories.
if not os.path.isdir(tfile):
# Make sure we can open the target files
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.error(description="Cannot open file : %s" % str(e))
TITLE = "General"
CLI = [
kwargs={'length' : 0},
description='Number of bytes to scan'),
kwargs={'offset' : 0},
description='Start scan at this file offset'),
kwargs={'block' : 0},
description='Set file block size'),
kwargs={'swap_size' : 0},
description='Reverse every n bytes before scanning'),
kwargs={'show_invalid' : True},
description='Show results marked as invalid'),
kwargs={'exclude_filters' : []},
description='Exclude results that match <str>'),
kwargs={'include_filters' : []},
description='Only show results that match <str>'),
kwargs={'log_file' : None},
description='Log results to file'),
kwargs={'csv' : True},
description='Log results to file in CSV format'),
kwargs={'format_to_terminal' : True},
description='Format output to fit the terminal window'),
kwargs={'quiet' : True},
description='Supress output to stdout'),
kwargs={'verbose' : True},
description='Enable verbose output'),
kwargs={'show_help' : True},
description='Show help output'),
kwargs={'files' : []}),
Kwarg(name='length', default=0),
Kwarg(name='offset', default=0),
Kwarg(name='block', default=0),
Kwarg(name='swap_size', default=0),
Kwarg(name='show_invalid', default=False),
Kwarg(name='include_filters', default=[]),
Kwarg(name='exclude_filters', default=[]),
Kwarg(name='log_file', default=None),
Kwarg(name='csv', default=False),
Kwarg(name='format_to_terminal', default=False),
Kwarg(name='quiet', default=False),
Kwarg(name='verbose', default=False),
Kwarg(name='files', default=[]),
Kwarg(name='show_help', default=False),
def load(self):
self.target_files = []
# Order is important with these two methods
self.filter = binwalk.core.filter.Filter(self.show_invalid)
# Set any specified include/exclude filters
for regex in self.exclude_filters:
for regex in self.include_filters:
self.settings = binwalk.core.settings.Settings()
self.display = binwalk.core.display.Display(log=self.log_file,
if self.show_help:
def reset(self):
for fp in self.target_files:
def __del__(self):
def __exit__(self, a, b, c):
def _cleanup(self):
if hasattr(self, 'target_files'):
for fp in self.target_files:
def _set_verbosity(self):
Sets the appropriate verbosity.
Must be called after self._test_target_files so that self.target_files is properly set.
# If more than one target file was specified, enable verbose mode; else, there is
# nothing in some outputs to indicate which scan corresponds to which file.
if len(self.target_files) > 1 and not self.verbose:
self.verbose = True
def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None):
Opens the specified file with all pertinent configuration settings.
if length is None:
length = self.length
if offset is None:
offset = self.offset
if swap is None:
swap = self.swap_size
return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)
def _open_target_files(self):
Checks if the target files can be opened.
Any files that cannot be opened are removed from the self.target_files list.
# Validate the target files listed in target_files
for tfile in self.files:
# Ignore directories.
if not os.path.isdir(tfile):
# Make sure we can open the target files
except KeyboardInterrupt as e:
raise e
except Exception as e:
self.error(description="Cannot open file : %s" % str(e))
......@@ -8,311 +8,311 @@ from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
class HashResult(object):
Class for storing libfuzzy hash results.
For internal use only.
Class for storing libfuzzy hash results.
For internal use only.
def __init__(self, name, hash=None, strings=None): = name
self.hash = hash
self.strings = strings
def __init__(self, name, hash=None, strings=None): = name
self.hash = hash
self.strings = strings
class HashMatch(Module):
Class for fuzzy hash matching of files and directories.
TITLE = "Fuzzy Hash"
CLI = [
kwargs={'enabled' : True},
description='Perform fuzzy hash matching on files/directories'),
kwargs={'cutoff' : DEFAULT_CUTOFF},
description='Set the cutoff percentage'),
kwargs={'strings' : True},
description='Diff strings inside files instead of the entire file'),
kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are the same'),
kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are different'),
kwargs={'filter_by_name' : True},
description='Only compare files whose base names are the same'),
kwargs={'symlinks' : True},
description="Don't ignore symlinks"),
Kwarg(name='cutoff', default=DEFAULT_CUTOFF),
Kwarg(name='strings', default=False),
Kwarg(name='same', default=True),
Kwarg(name='symlinks', default=False),
Kwarg(name='name', default=False),
Kwarg(name='max_results', default=None),
Kwarg(name='abspath', default=False),
Kwarg(name='filter_by_name', default=False),
Kwarg(name='symlinks', default=False),
Kwarg(name='enabled', default=False),
# Requires
LIBRARY_NAME = "fuzzy"
binwalk.core.C.Function(name="fuzzy_hash_buf", type=int),
binwalk.core.C.Function(name="fuzzy_hash_filename", type=int),
binwalk.core.C.Function(name="fuzzy_compare", type=int),
# Max result is 148 (
# Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h)
HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n"
RESULT_FORMAT = "%4d%%" + " " * 16 + "%s\n"
RESULT = ["percentage", "description"]
def init(self): = 0
self.last_file1 = HashResult(None)
self.last_file2 = HashResult(None)
self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS)
def _get_strings(self, fname):
return ''.join(list(binwalk.core.common.strings(fname, minimum=10)))
def _show_result(self, match, fname):
if self.abspath:
fname = os.path.abspath(fname)
self.result(percentage=match, description=fname)
def _compare_files(self, file1, file2):
Fuzzy diff two files.
@file1 - The first file to diff.
@file2 - The second file to diff.
Returns the match percentage.
Returns None on error.
status = 0
file1_dup = False
file2_dup = False
if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2):
if os.path.exists(file1) and os.path.exists(file2):
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
# Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match.
if file1 == and self.last_file1.hash:
file1_dup = True
else: = file1
if file2 == and self.last_file2.hash:
file2_dup = True
else: = file2
if self.strings:
if file1_dup:
file1_strings = self.last_file1.strings
self.last_file1.strings = file1_strings = self._get_strings(file1)
if file2_dup:
file2_strings = self.last_file2.strings
self.last_file2.strings = file2_strings = self._get_strings(file2)
if file1_strings == file2_strings:
return 100
if file1_dup:
hash1 = self.last_file1.hash
status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1)
if file2_dup:
hash2 = self.last_file2.hash
status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2)
if file1_dup:
hash1 = self.last_file1.hash
status |= self.lib.fuzzy_hash_filename(file1, hash1)
if file2_dup:
hash2 = self.last_file2.hash
status |= self.lib.fuzzy_hash_filename(file2, hash2)
if status == 0:
if not file1_dup:
self.last_file1.hash = hash1
if not file2_dup:
self.last_file2.hash = hash2
if hash1.raw == hash2.raw:
return 100
return self.lib.fuzzy_compare(hash1, hash2)
except Exception as e:
print ("WARNING: Exception while doing fuzzy hash: %s" % e)
return None
def is_match(self, match):
Returns True if this is a good match.
Returns False if his is not a good match.
return (match is not None and ((match >= self.cutoff and self.same) or (match < self.cutoff and not self.same)))
def _get_file_list(self, directory):
Generates a directory tree.
@directory - The root directory to start from.
Returns a set of file paths, excluding the root directory.
file_list = []
# Normalize directory path so that we can exclude it from each individual file path
directory = os.path.abspath(directory) + os.path.sep
for (root, dirs, files) in os.walk(directory):
# Don't include the root directory in the file paths
root = ''.join(root.split(directory, 1)[1:])
# Get a list of files, with or without symlinks as specified during __init__
files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)]
file_list += files
return set(file_list)
def hash_files(self, needle, haystack):
Compare one file against a list of other files.
Returns a list of tuple results.
''' = 0
for f in haystack:
m = self._compare_files(needle, f)
if m is not None and self.is_match(m):
self._show_result(m, f) += 1
if self.max_results and >= self.max_results:
def hash_file(self, needle, haystack):
Search for one file inside one or more directories.
Returns a list of tuple results.
matching_files = [] = 0
done = False
for directory in haystack:
for f in self._get_file_list(directory):
f = os.path.join(directory, f)
m = self._compare_files(needle, f)
if m is not None and self.is_match(m):
self._show_result(m, f)
matching_files.append((m, f)) += 1
if self.max_results and >= self.max_results:
done = True
if done:
return matching_files
def hash_directories(self, needle, haystack):
Compare the contents of one directory with the contents of other directories.
Returns a list of tuple results.
done = False = 0
source_files = self._get_file_list(needle)
for directory in haystack:
dir_files = self._get_file_list(directory)
for f in source_files:
if f in dir_files:
file1 = os.path.join(needle, f)
file2 = os.path.join(directory, f)
m = self._compare_files(file1, file2)
if m is not None and self.is_match(m):
self._show_result(m, file2) += 1
if self.max_results and >= self.max_results:
done = True
if done:
def run(self):
Main module method.
needle = self.next_file().name
haystack = []
for fp in iter(self.next_file, None):
if os.path.isfile(needle):
if os.path.isfile(haystack[0]):
self.hash_files(needle, haystack)
self.hash_file(needle, haystack)
self.hash_directories(needle, haystack)
return True
Class for fuzzy hash matching of files and directories.
TITLE = "Fuzzy Hash"
CLI = [
kwargs={'enabled' : True},
description='Perform fuzzy hash matching on files/directories'),
kwargs={'cutoff' : DEFAULT_CUTOFF},
description='Set the cutoff percentage'),
kwargs={'strings' : True},
description='Diff strings inside files instead of the entire file'),
kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are the same'),
kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF},
description='Only show files that are different'),
kwargs={'filter_by_name' : True},
description='Only compare files whose base names are the same'),
kwargs={'symlinks' : True},
description="Don't ignore symlinks"),
Kwarg(name='cutoff', default=DEFAULT_CUTOFF),
Kwarg(name='strings', default=False),
Kwarg(name='same', default=True),
Kwarg(name='symlinks', default=False),
Kwarg(name='name', default=False),
Kwarg(name='max_results', default=None),
Kwarg(name='abspath', default=False),
Kwarg(name='filter_by_name', default=False),
Kwarg(name='symlinks', default=False),
Kwarg(name='enabled', default=False),
# Requires
LIBRARY_NAME = "fuzzy"
binwalk.core.C.Function(name="fuzzy_hash_buf", type=int),
binwalk.core.C.Function(name="fuzzy_hash_filename", type=int),
binwalk.core.C.Function(name="fuzzy_compare", type=int),
# Max result is 148 (
# Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h)
HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n"
RESULT_FORMAT = "%4d%%" + " " * 16 + "%s\n"
RESULT = ["percentage", "description"]
def init(self): = 0
self.last_file1 = HashResult(None)
self.last_file2 = HashResult(None)
self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS)
def _get_strings(self, fname):
return ''.join(list(binwalk.core.common.strings(fname, minimum=10)))
def _show_result(self, match, fname):
if self.abspath:
fname = os.path.abspath(fname)
self.result(percentage=match, description=fname)
def _compare_files(self, file1, file2):
Fuzzy diff two files.
@file1 - The first file to diff.
@file2 - The second file to diff.
Returns the match percentage.
Returns None on error.
status = 0
file1_dup = False
file2_dup = False
if not self.filter_by_name or os.path.basename(file1) == os.path.basename(file2):
if os.path.exists(file1) and os.path.exists(file2):
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
# Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match.
if file1 == and self.last_file1.hash:
file1_dup = True
else: = file1
if file2 == and self.last_file2.hash:
file2_dup = True
else: = file2
if self.strings:
if file1_dup:
file1_strings = self.last_file1.strings
self.last_file1.strings = file1_strings = self._get_strings(file1)
if file2_dup:
file2_strings = self.last_file2.strings
self.last_file2.strings = file2_strings = self._get_strings(file2)
if file1_strings == file2_strings:
return 100
if file1_dup:
hash1 = self.last_file1.hash
status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1)
if file2_dup:
hash2 = self.last_file2.hash
status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2)
if file1_dup:
hash1 = self.last_file1.hash
status |= self.lib.fuzzy_hash_filename(file1, hash1)
if file2_dup:
hash2 = self.last_file2.hash
status |= self.lib.fuzzy_hash_filename(file2, hash2)
if status == 0:
if not file1_dup:
self.last_file1.hash = hash1
if not file2_dup:
self.last_file2.hash = hash2
if hash1.raw == hash2.raw:
return 100
return self.lib.fuzzy_compare(hash1, hash2)
except Exception as e:
print ("WARNING: Exception while doing fuzzy hash: %s" % e)
return None
def is_match(self, match):
Returns True if this is a good match.
Returns False if his is not a good match.
return (match is not None and ((match >= self.cutoff and self.same) or (match < self.cutoff and not self.same)))
def _get_file_list(self, directory):
Generates a directory tree.
@directory - The root directory to start from.
Returns a set of file paths, excluding the root directory.
file_list = []
# Normalize directory path so that we can exclude it from each individual file path
directory = os.path.abspath(directory) + os.path.sep
for (root, dirs, files) in os.walk(directory):
# Don't include the root directory in the file paths
root = ''.join(root.split(directory, 1)[1:])
# Get a list of files, with or without symlinks as specified during __init__
files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)]
file_list += files
return set(file_list)
def hash_files(self, needle, haystack):
Compare one file against a list of other files.
Returns a list of tuple results.
''' = 0
for f in haystack:
m = self._compare_files(needle, f)
if m is not None and self.is_match(m):
self._show_result(m, f) += 1
if self.max_results and >= self.max_results:
def hash_file(self, needle, haystack):
Search for one file inside one or more directories.
Returns a list of tuple results.
matching_files = [] = 0
done = False
for directory in haystack:
for f in self._get_file_list(directory):
f = os.path.join(directory, f)
m = self._compare_files(needle, f)
if m is not None and self.is_match(m):
self._show_result(m, f)
matching_files.append((m, f)) += 1
if self.max_results and >= self.max_results:
done = True
if done:
return matching_files
def hash_directories(self, needle, haystack):
Compare the contents of one directory with the contents of other directories.
Returns a list of tuple results.
done = False = 0
source_files = self._get_file_list(needle)
for directory in haystack:
dir_files = self._get_file_list(directory)
for f in source_files:
if f in dir_files:
file1 = os.path.join(needle, f)
file2 = os.path.join(directory, f)
m = self._compare_files(file1, file2)
if m is not None and self.is_match(m):
self._show_result(m, file2) += 1
if self.max_results and >= self.max_results:
done = True
if done:
def run(self):
Main module method.
needle = self.next_file().name
haystack = []
for fp in iter(self.next_file, None):
if os.path.isfile(needle):
if os.path.isfile(haystack[0]):
self.hash_files(needle, haystack)
self.hash_file(needle, haystack)
self.hash_directories(needle, haystack)
return True
......@@ -8,186 +8,186 @@ from binwalk.core.compat import *
from binwalk.core.module import Module, Kwarg, Option, Dependency
class ChiSquare(object):
Performs a Chi Squared test against the provided data.
Performs a Chi Squared test against the provided data.
IDEAL = 256.0
IDEAL = 256.0
def __init__(self):
Class constructor.
def __init__(self):
Class constructor.
Returns None.
self.bytes = {}
self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0
Returns None.
self.bytes = {}
self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0
def reset(self):
self.xc2 = 0.0
self.byte_count = 0
def reset(self):
self.xc2 = 0.0
self.byte_count = 0
for key in self.bytes.keys():
self.bytes[key] = 0
for key in self.bytes.keys():
self.bytes[key] = 0
def update(self, data):
Updates the current byte counts with new data.
def update(self, data):
Updates the current byte counts with new data.
@data - String of bytes to update.
@data - String of bytes to update.
Returns None.
# Count the number of occurances of each byte value
for i in data:
self.bytes[i] += 1
Returns None.
# Count the number of occurances of each byte value
for i in data:
self.bytes[i] += 1
self.byte_count += len(data)
self.byte_count += len(data)
def chisq(self):
Calculate the Chi Square critical value.
def chisq(self):
Calculate the Chi Square critical value.
Returns the critical value.
expected = self.byte_count / self.IDEAL
Returns the critical value.
expected = self.byte_count / self.IDEAL
if expected:
for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected
if expected:
for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected
return self.xc2
return self.xc2
class EntropyBlock(object):
def __init__(self, **kwargs):
self.start = None
self.end = None
self.length = None
for (k,v) in iterator(kwargs):
setattr(self, k, v)
def __init__(self, **kwargs):
self.start = None
self.end = None
self.length = None
for (k,v) in iterator(kwargs):
setattr(self, k, v)
class HeuristicCompressionAnalyzer(Module):
Performs analysis and attempts to interpret the results.
TITLE = "Heuristic Compression"
kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}),
CLI = [
kwargs={'enabled' : True},
description='Heuristically classify high entropy data'),
kwargs={'trigger_level' : 0},
description='Set the entropy trigger level (0.0 - 1.0)'),
Kwarg(name='enabled', default=False),
Kwarg(name='trigger_level', default=ENTROPY_TRIGGER),
def init(self):
self.blocks = {}
if self.config.block:
self.block_size = self.config.block
self.block_size = self.BLOCK_SIZE
for result in self.entropy.results:
if not has_key(self.blocks,
self.blocks[] = []
if result.entropy >= self.trigger_level and (not self.blocks[] or self.blocks[][-1].end is not None):
self.blocks[].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET))
elif result.entropy < self.trigger_level and self.blocks[] and self.blocks[][-1].end is None:
self.blocks[][-1].end = result.offset - self.BLOCK_OFFSET
def run(self):
for fp in iter(self.next_file, None):
if has_key(self.blocks,
for block in self.blocks[]:
if block.end is None:
block.length = fp.offset + fp.length - block.start
block.length = block.end - block.start
if block.length >= self.MIN_BLOCK_SIZE:
self.analyze(fp, block)
def analyze(self, fp, block):
Perform analysis and interpretation.
i = 0
num_error = 0
analyzer_results = []
chi = ChiSquare()
while i < block.length:
j = 0
(d, dlen) = fp.read_block()
if not d:
while j < dlen:
data = d[j:j+self.block_size]
if len(data) < self.block_size:
if chi.chisq() >= self.CHI_CUTOFF:
num_error += 1
j += self.block_size
if (j + i) > block.length:
i += dlen
if num_error > 0:
verdict = 'Moderate entropy data, best guess: compressed'
verdict = 'High entropy data, best guess: encrypted'
desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error)
self.result(offset=block.start, description=desc, file=fp)
Performs analysis and attempts to interpret the results.
TITLE = "Heuristic Compression"
kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}),
CLI = [
kwargs={'enabled' : True},
description='Heuristically classify high entropy data'),
kwargs={'trigger_level' : 0},
description='Set the entropy trigger level (0.0 - 1.0)'),
Kwarg(name='enabled', default=False),
Kwarg(name='trigger_level', default=ENTROPY_TRIGGER),
def init(self):
self.blocks = {}
if self.config.block:
self.block_size = self.config.block
self.block_size = self.BLOCK_SIZE
for result in self.entropy.results:
if not has_key(self.blocks,
self.blocks[] = []
if result.entropy >= self.trigger_level and (not self.blocks[] or self.blocks[][-1].end is not None):
self.blocks[].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET))
elif result.entropy < self.trigger_level and self.blocks[] and self.blocks[][-1].end is None:
self.blocks[][-1].end = result.offset - self.BLOCK_OFFSET
def run(self):
for fp in iter(self.next_file, None):
if has_key(self.blocks,
for block in self.blocks[]:
if block.end is None:
block.length = fp.offset + fp.length - block.start
block.length = block.end - block.start
if block.length >= self.MIN_BLOCK_SIZE:
self.analyze(fp, block)
def analyze(self, fp, block):
Perform analysis and interpretation.
i = 0
num_error = 0
analyzer_results = []
chi = ChiSquare()
while i < block.length:
j = 0
(d, dlen) = fp.read_block()
if not d:
while j < dlen:
data = d[j:j+self.block_size]
if len(data) < self.block_size:
if chi.chisq() >= self.CHI_CUTOFF:
num_error += 1
j += self.block_size
if (j + i) > block.length:
i += dlen
if num_error > 0:
verdict = 'Moderate entropy data, best guess: compressed'
verdict = 'High entropy data, best guess: encrypted'
desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error)
self.result(offset=block.start, description=desc, file=fp)
......@@ -2,32 +2,32 @@ import binwalk.core.C
from binwalk.core.common import *
class Plugin(object):
Searches for and validates compress'd data.
COMPRESS42 = "compress42"
binwalk.core.C.Function(name="is_compressed", type=bool),
def __init__(self, module):
self.fd = None
self.comp = None
if == 'Signature':
self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS)
def scan(self, result):
if self.comp:
if result.file and result.description.lower().startswith("compress'd data"):
fd = BlockFile(, "r", offset=result.offset, length=self.READ_SIZE)
compressed_data =
Searches for and validates compress'd data.
COMPRESS42 = "compress42"
binwalk.core.C.Function(name="is_compressed", type=bool),
def __init__(self, module):
self.fd = None
self.comp = None
if == 'Signature':
self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS)
def scan(self, result):
if self.comp:
if result.file and result.description.lower().startswith("compress'd data"):
fd = BlockFile(, "r", offset=result.offset, length=self.READ_SIZE)
compressed_data =
if not self.comp.is_compressed(compressed_data, len(compressed_data)):
result.valid = False
if not self.comp.is_compressed(compressed_data, len(compressed_data)):
result.valid = False
class Plugin(object):
Ensures that ASCII CPIO archive entries only get extracted once.
Ensures that ASCII CPIO archive entries only get extracted once.
def __init__(self, module):
self.found_archive = False
self.enabled = ( == 'Signature')
def pre_scan(self, module):
# Be sure to re-set this at the beginning of every scan
self.found_archive = False
def __init__(self, module):
self.found_archive = False
self.enabled = ( == 'Signature')
def pre_scan(self, module):
# Be sure to re-set this at the beginning of every scan
self.found_archive = False
def scan(self, result):
if self.enabled and result.valid:
# ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'.
# Displaying each entry is useful, as it shows what files are contained in the archive,
# but we only want to extract the archive when the first entry is found.
if result.description.startswith('ASCII cpio archive'):
if not self.found_archive:
# This is the first entry. Set found_archive and allow the scan to continue normally.
self.found_archive = True
result.extract = True
elif 'TRAILER!!!' in results['description']:
# This is the last entry, un-set found_archive.
self.found_archive = False
# The first entry has already been found and this is the last entry, or the last entry
# has not yet been found. Don't extract.
result.extract = False
def scan(self, result):
if self.enabled and result.valid:
# ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'.
# Displaying each entry is useful, as it shows what files are contained in the archive,
# but we only want to extract the archive when the first entry is found.
if result.description.startswith('ASCII cpio archive'):
if not self.found_archive:
# This is the first entry. Set found_archive and allow the scan to continue normally.
self.found_archive = True
result.extract = True
elif 'TRAILER!!!' in results['description']:
# This is the last entry, un-set found_archive.
self.found_archive = False
# The first entry has already been found and this is the last entry, or the last entry
# has not yet been found. Don't extract.
result.extract = False
......@@ -4,61 +4,61 @@ from binwalk.core.compat import *
from binwalk.core.common import BlockFile
class Plugin(object):
Finds and extracts modified LZMA files commonly found in cable modems.
Based on Bernardo Rodrigues' work:
Finds and extracts modified LZMA files commonly found in cable modems.
Based on Bernardo Rodrigues' work:
FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00"
SIGNATURE = "lzma compressed data"
FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00"
SIGNATURE = "lzma compressed data"
def __init__(self, module):
self.original_cmd = ''
self.enabled = ( == 'Signature')
self.module = module
def __init__(self, module):
self.original_cmd = ''
self.enabled = ( == 'Signature')
self.module = module
if self.enabled:
# Replace the existing LZMA extraction command with our own
rules = self.module.extractor.get_rules()
for i in range(0, len(rules)):
if rules[i]['regex'].match(self.SIGNATURE):
self.original_cmd = rules[i]['cmd']
rules[i]['cmd'] = self.lzma_cable_extractor
if self.enabled:
# Replace the existing LZMA extraction command with our own
rules = self.module.extractor.get_rules()
for i in range(0, len(rules)):
if rules[i]['regex'].match(self.SIGNATURE):
self.original_cmd = rules[i]['cmd']
rules[i]['cmd'] = self.lzma_cable_extractor
def lzma_cable_extractor(self, fname):
# Try extracting the LZMA file without modification first
if not self.module.extractor.execute(self.original_cmd, fname):
out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1]
fp_out = BlockFile(out_name, 'w')
# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
fp_in = self.module.config.open_file(fname, offset=0, length=0)
i = 0
def lzma_cable_extractor(self, fname):
# Try extracting the LZMA file without modification first
if not self.module.extractor.execute(self.original_cmd, fname):
out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1]
fp_out = BlockFile(out_name, 'w')
# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
fp_in = self.module.config.open_file(fname, offset=0, length=0)
i = 0
while i < fp_in.length:
(data, dlen) = fp_in.read_block()
if i == 0:
out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:]
out_data = data
i += dlen
while i < fp_in.length:
(data, dlen) = fp_in.read_block()
if i == 0:
out_data = data[0:5] + self.FAKE_LZMA_SIZE + data[5:]
out_data = data
i += dlen
# Overwrite the original file so that it can be cleaned up if -r was specified
shutil.move(out_name, fname)
self.module.extractor.execute(self.original_cmd, fname)
# Overwrite the original file so that it can be cleaned up if -r was specified
shutil.move(out_name, fname)
self.module.extractor.execute(self.original_cmd, fname)
def scan(self, result):
# The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D.
if self.enabled and result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description:
if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description:
result.valid = True
result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size"
def scan(self, result):
# The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D.
if self.enabled and result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description:
if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description:
result.valid = True
result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size"
......@@ -2,39 +2,39 @@ import binwalk.core.C
from binwalk.core.common import BlockFile
class Plugin(object):
Searches for and validates zlib compressed data.
MIN_DECOMP_SIZE = 16 * 1024
MAX_DATA_SIZE = 33 * 1024
TINFL = "tinfl"
binwalk.core.C.Function(name="is_deflated", type=int),
def __init__(self, module):
self.tinfl = None
self.module = module
# Only initialize this plugin if this is a signature scan
if == 'Signature':
# Load
self.tinfl = binwalk.core.C.Library(self.TINFL, self.TINFL_FUNCTIONS)
def scan(self, result):
# If this result is a zlib signature match, try to decompress the data
if self.tinfl and result.file and result.description.lower().startswith('zlib'):
# Seek to and read the suspected zlib data
fd = self.module.config.open_file(, offset=result.offset, length=self.MAX_DATA_SIZE)
data =
# Check if this is valid zlib data
decomp_size = self.tinfl.is_deflated(data, len(data), 1)
if decomp_size > 0:
result.description += ", uncompressed size >= %d" % decomp_size
result.valid = False
Searches for and validates zlib compressed data.
MIN_DECOMP_SIZE = 16 * 1024
MAX_DATA_SIZE = 33 * 1024
TINFL = "tinfl"
binwalk.core.C.Function(name="is_deflated", type=int),
def __init__(self, module):
self.tinfl = None
self.module = module
# Only initialize this plugin if this is a signature scan
if == 'Signature':
# Load
self.tinfl = binwalk.core.C.Library(self.TINFL, self.TINFL_FUNCTIONS)
def scan(self, result):
# If this result is a zlib signature match, try to decompress the data
if self.tinfl and result.file and result.description.lower().startswith('zlib'):
# Seek to and read the suspected zlib data
fd = self.module.config.open_file(, offset=result.offset, length=self.MAX_DATA_SIZE)
data =
# Check if this is valid zlib data
decomp_size = self.tinfl.is_deflated(data, len(data), 1)
if decomp_size > 0:
result.description += ", uncompressed size >= %d" % decomp_size
result.valid = False
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment