Commit 4bcef6c6 by Craig Heffner

Modified code to use PEP8 formatting via autopep8

parent 1eab95fb
......@@ -4,9 +4,13 @@ from binwalk.core.module import Modules
from binwalk.core.exceptions import ModuleException
# Convenience functions
def scan(*args, **kwargs):
with Modules(*args, **kwargs) as m:
objs = m.execute()
return objs
def execute(*args, **kwargs):
return scan(*args, **kwargs)
......@@ -6,10 +6,13 @@ import ctypes.util
import binwalk.core.common
from binwalk.core.compat import *
class Function(object):
'''
Container class for defining library functions.
'''
def __init__(self, **kwargs):
self.name = None
self.type = int
......@@ -17,26 +20,28 @@ class Function(object):
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class FunctionHandler(object):
'''
Class for abstracting function calls via ctypes and handling Python 2/3 compatibility issues.
'''
PY2CTYPES = {
bytes : ctypes.c_char_p,
str : ctypes.c_char_p,
int : ctypes.c_int,
float : ctypes.c_float,
bool : ctypes.c_int,
None : ctypes.c_int,
bytes: ctypes.c_char_p,
str: ctypes.c_char_p,
int: ctypes.c_int,
float: ctypes.c_float,
bool: ctypes.c_int,
None: ctypes.c_int,
}
RETVAL_CONVERTERS = {
None : int,
int : int,
float : float,
bool : bool,
str : bytes2str,
bytes : str2bytes,
None: int,
int: int,
float: float,
bool: bool,
str: bytes2str,
bytes: str2bytes,
}
def __init__(self, library, function):
......@@ -58,7 +63,7 @@ class FunctionHandler(object):
else:
self.function.restype = self.retype
self.retval_converter = None
#raise Exception("Unknown return type: '%s'" % self.retype)
# raise Exception("Unknown return type: '%s'" % self.retype)
def run(self, *args):
'''
......@@ -82,7 +87,9 @@ class FunctionHandler(object):
return retval
class Library(object):
'''
Class for loading the specified library via ctypes.
'''
......@@ -122,13 +129,14 @@ class Library(object):
for library in libraries:
system_paths = {
'linux' : [os.path.join(prefix, 'lib%s.so' % library), '/usr/local/lib/lib%s.so' % library],
'cygwin' : [os.path.join(prefix, 'lib%s.so' % library), '/usr/local/lib/lib%s.so' % library],
'win32' : [os.path.join(prefix, 'lib%s.dll' % library), '%s.dll' % library],
'darwin' : [os.path.join(prefix, 'lib%s.dylib' % library),
'linux': [os.path.join(prefix, 'lib%s.so' % library), '/usr/local/lib/lib%s.so' % library],
'cygwin': [os.path.join(prefix, 'lib%s.so' % library), '/usr/local/lib/lib%s.so' % library],
'win32': [os.path.join(prefix, 'lib%s.dll' % library), '%s.dll' % library],
'darwin': [os.path.join(prefix, 'lib%s.dylib' % library),
'/opt/local/lib/lib%s.dylib' % library,
'/usr/local/lib/lib%s.dylib' % library,
] + glob.glob('/usr/local/Cellar/*%s*/*/lib/lib%s.dylib' % (library, library)),
] + glob.glob(
'/usr/local/Cellar/*%s*/*/lib/lib%s.dylib' % (library, library)),
}
for i in range(2, 4):
......@@ -136,27 +144,30 @@ class Library(object):
# Search the common install directories first; these are usually not in the library search path
# Search these *first*, since a) they are the most likely locations and b) there may be a
# discrepency between where ctypes.util.find_library and ctypes.cdll.LoadLibrary search for libs.
# discrepency between where ctypes.util.find_library and
# ctypes.cdll.LoadLibrary search for libs.
for path in system_paths[sys.platform]:
binwalk.core.common.debug("Searching for '%s'" % path)
if os.path.exists(path):
lib_path = path
break
# If we failed to find the library, check the standard library search paths
# If we failed to find the library, check the standard library
# search paths
if not lib_path:
lib_path = ctypes.util.find_library(library)
# Use the first library that we can find
if lib_path:
binwalk.core.common.debug("Found library '%s' at: %s" % (library, lib_path))
binwalk.core.common.debug(
"Found library '%s' at: %s" % (library, lib_path))
break
else:
binwalk.core.common.debug("Could not find library '%s'" % library)
binwalk.core.common.debug(
"Could not find library '%s'" % library)
# If we still couldn't find the library, error out
if not lib_path:
raise Exception("Failed to locate libraries '%s'" % str(libraries))
return lib_path
......@@ -21,10 +21,12 @@ if not __debug__:
else:
DEBUG = False
def MSWindows():
# Returns True if running in a Microsoft Windows OS
return (platform.system() == 'Windows')
def debug(msg):
'''
Displays debug messages to stderr only if the Python interpreter was invoked with the -O flag.
......@@ -33,27 +35,32 @@ def debug(msg):
sys.stderr.write("DEBUG: " + msg + "\n")
sys.stderr.flush()
def warning(msg):
'''
Prints warning messages to stderr
'''
sys.stderr.write("\nWARNING: " + msg + "\n")
def error(msg):
'''
Prints error messages to stderr
'''
sys.stderr.write("\nERROR: " + msg + "\n")
def get_module_path():
root = __file__
if os.path.islink(root):
root = os.path.realpath(root)
return os.path.dirname(os.path.dirname(os.path.abspath(root)))
def get_libs_path():
return os.path.join(get_module_path(), "libs")
def file_md5(file_name):
'''
Generate an MD5 hash of the specified file.
......@@ -65,11 +72,12 @@ def file_md5(file_name):
md5 = hashlib.md5()
with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(128*md5.block_size), b''):
for chunk in iter(lambda: f.read(128 * md5.block_size), b''):
md5.update(chunk)
return md5.hexdigest()
def file_size(filename):
'''
Obtains the size of a given file.
......@@ -85,10 +93,12 @@ def file_size(filename):
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
raise Exception(
"file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
finally:
os.close(fd)
def strip_quoted_strings(string):
'''
Strips out data in between double quotes.
......@@ -101,9 +111,11 @@ def strip_quoted_strings(string):
# Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.
# data between two quoted strings (ex: '"quote 1" you won't see me "quote
# 2"') will also be stripped.
return re.sub(r'\"(.*)\"', "", string)
def get_quoted_strings(string):
'''
Returns a string comprised of all data in between double quotes.
......@@ -118,13 +130,15 @@ def get_quoted_strings(string):
# Note that this gets everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included.
# data between two quoted strings (ex: '"quote 1" non-quoted data
# "quote 2"') will also be included.
return re.findall(r'\"(.*)\"', string)[0]
except KeyboardInterrupt as e:
raise e
except Exception:
return ''
def unique_file_name(base_name, extension=''):
'''
Creates a unique file name based on the specified base name.
......@@ -147,6 +161,7 @@ def unique_file_name(base_name, extension=''):
return fname
def strings(filename, minimum=4):
'''
A strings generator, similar to the Unix strings utility.
......@@ -174,13 +189,16 @@ def strings(filename, minimum=4):
else:
result = ""
class GenericContainer(object):
def __init__(self, **kwargs):
for (k,v) in iterator(kwargs):
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class MathExpression(object):
'''
Class for safely evaluating mathematical expressions from a string.
Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string
......@@ -224,11 +242,14 @@ class MathExpression(object):
else:
raise TypeError(node)
class StringFile(object):
'''
A class to allow access to strings as if they were read from a file.
Used internally as a conditional superclass to InternalBlockFile.
'''
def __init__(self, fname, mode='r'):
self.string = fname
self.name = "String"
......@@ -238,7 +259,7 @@ class StringFile(object):
if n == -1:
data = self.string[self.total_read:]
else:
data = self.string[self.total_read:self.total_read+n]
data = self.string[self.total_read:self.total_read + n]
return data
def tell(self):
......@@ -253,10 +274,12 @@ class StringFile(object):
def close(self):
pass
def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
# Defining a class inside a function allows it to be dynamically subclassed
class InternalBlockFile(subclass):
'''
Abstraction class for accessing binary files.
......@@ -289,7 +312,8 @@ def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024
# Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks.
# limit disk I/O, but small enough to limit the size of processed data
# blocks.
DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
......@@ -310,7 +334,8 @@ def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
# This is so that custom parent classes can access/modify arguments as necessary
# This is so that custom parent classes can access/modify arguments
# as necessary
self.args = GenericContainer(fname=fname,
mode=mode,
length=length,
......@@ -390,7 +415,7 @@ def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
if self.swap_size > 0:
while i < len(block):
data += block[i:i+self.swap_size][::-1]
data += block[i:i + self.swap_size][::-1]
i += self.swap_size
else:
data = block
......@@ -398,7 +423,8 @@ def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
return data
def reset(self):
self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
self.set_block_size(
block=self.base_block_size, peek=self.base_peek_size)
self.seek(self.offset)
def set_block_size(self, block=None, peek=None):
......@@ -444,7 +470,7 @@ def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
n = self.length - self.total_read
while n < 0 or l < n:
tmp = super(self.__class__, self).read(n-l)
tmp = super(self.__class__, self).read(n - l)
if tmp:
data += tmp
l += len(tmp)
......@@ -487,4 +513,3 @@ def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
return (data, dlen)
return InternalBlockFile(fname, mode=mode, **kwargs)
......@@ -9,6 +9,7 @@ PY_MAJOR_VERSION = sys.version_info[0]
if PY_MAJOR_VERSION > 2:
string.letters = string.ascii_letters
def iterator(dictionary):
'''
For cross compatibility between Python 2 and Python 3 dictionaries.
......@@ -18,6 +19,7 @@ def iterator(dictionary):
else:
return dictionary.iteritems()
def has_key(dictionary, key):
'''
For cross compatibility between Python 2 and Python 3 dictionaries.
......@@ -27,6 +29,7 @@ def has_key(dictionary, key):
else:
return dictionary.has_key(key)
def get_keys(dictionary):
'''
For cross compatibility between Python 2 and Python 3 dictionaries.
......@@ -36,6 +39,7 @@ def get_keys(dictionary):
else:
return dictionary.keys()
def str2bytes(string):
'''
For cross compatibility between Python 2 and Python 3 strings.
......@@ -45,6 +49,7 @@ def str2bytes(string):
else:
return string
def bytes2str(bs):
'''
For cross compatibility between Python 2 and Python 3 strings.
......@@ -54,6 +59,7 @@ def bytes2str(bs):
else:
return bs
def string_decode(string):
'''
For cross compatibility between Python 2 and Python 3 strings.
......@@ -63,6 +69,7 @@ def string_decode(string):
else:
return string.decode('string_escape')
def user_input(prompt=''):
'''
For getting raw user input in Python 2 and 3.
......@@ -71,4 +78,3 @@ def user_input(prompt=''):
return input(prompt)
else:
return raw_input(prompt)
......@@ -7,7 +7,9 @@ import datetime
import binwalk.core.common
from binwalk.core.compat import *
class Display(object):
'''
Class to handle display of output and writing to log files.
This class is instantiated for all modules implicitly and should not need to be invoked directly by most modules.
......@@ -100,26 +102,32 @@ class Display(object):
self.log("", [file_name, md5sum, timestamp])
self._fprint("%s", "\n", csv=False)
self._fprint("Scan Time: %s\n", [timestamp], csv=False, filter=False)
self._fprint("Target File: %s\n", [file_name], csv=False, filter=False)
self._fprint("MD5 Checksum: %s\n", [md5sum], csv=False, filter=False)
self._fprint("Scan Time: %s\n", [
timestamp], csv=False, filter=False)
self._fprint("Target File: %s\n", [
file_name], csv=False, filter=False)
self._fprint(
"MD5 Checksum: %s\n", [md5sum], csv=False, filter=False)
if self.custom_verbose_format and self.custom_verbose_args:
self._fprint(self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False)
self._fprint(
self.custom_verbose_format, self.custom_verbose_args, csv=False, filter=False)
self._fprint("%s", "\n", csv=False, filter=False)
self._fprint(self.header_format, args, filter=False)
self._fprint("%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False)
self._fprint(
"%s", ["-" * self.HEADER_WIDTH + "\n"], csv=False, filter=False)
def result(self, *args):
# Convert to list for item assignment
args = list(args)
# Replace multiple spaces with single spaces. This is to prevent accidentally putting
# four spaces in the description string, which would break auto-formatting.
# four spaces in the description string, which would break
# auto-formatting.
for i in range(len(args)):
if isinstance(args[i], str):
while " " in args[i]:
args[i] = args[i].replace(" " , " ")
args[i] = args[i].replace(" ", " ")
self._fprint(self.result_format, tuple(args))
......@@ -177,13 +185,15 @@ class Display(object):
offset = 0
self.string_parts = []
# Split the line into an array of columns, e.g., ['0', '0x00000000', 'Some description here']
line_columns = line.split(None, self.num_columns-1)
# Split the line into an array of columns, e.g., ['0', '0x00000000',
# 'Some description here']
line_columns = line.split(None, self.num_columns - 1)
if line_columns:
# Find where the start of the last column (description) starts in the line of text.
# All line wraps need to be aligned to this offset.
offset = line.rfind(line_columns[-1])
# The delimiter will be a newline followed by spaces padding out the line wrap to the alignment offset.
# The delimiter will be a newline followed by spaces padding out
# the line wrap to the alignment offset.
delim += ' ' * offset
if line_columns and self.fit_to_screen and len(line) > self.SCREEN_WIDTH:
......@@ -194,19 +204,25 @@ class Display(object):
# Loop to split up line into multiple max_line_wrap_length pieces
while len(line[offset:]) > max_line_wrap_length:
# Find the nearest space to wrap the line at (so we don't split a word across two lines)
split_offset = line[offset:offset+max_line_wrap_length].rfind(' ')
# If there were no good places to split the line, just truncate it at max_line_wrap_length
# Find the nearest space to wrap the line at (so we don't split
# a word across two lines)
split_offset = line[
offset:offset + max_line_wrap_length].rfind(' ')
# If there were no good places to split the line, just truncate
# it at max_line_wrap_length
if split_offset < 1:
split_offset = max_line_wrap_length
self._append_to_data_parts(line, offset, offset+split_offset)
self._append_to_data_parts(line, offset, offset + split_offset)
offset += split_offset
# Add any remaining data (guarunteed to be max_line_wrap_length long or shorter) to self.string_parts
self._append_to_data_parts(line, offset, offset+len(line[offset:]))
# Add any remaining data (guarunteed to be max_line_wrap_length
# long or shorter) to self.string_parts
self._append_to_data_parts(
line, offset, offset + len(line[offset:]))
# Append self.string_parts to formatted_line; each part seperated by delim
# Append self.string_parts to formatted_line; each part seperated
# by delim
formatted_line += delim.join(self.string_parts)
else:
formatted_line = line
......@@ -228,10 +244,10 @@ class Display(object):
import termios
# Get the terminal window width
hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
hw = struct.unpack(
'hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
self.SCREEN_WIDTH = self.HEADER_WIDTH = hw[1]
except KeyboardInterrupt as e:
raise e
except Exception:
pass
class ParserException(Exception):
'''
Exception thrown specifically for signature file parsing errors.
'''
pass
class ModuleException(Exception):
'''
Module exception class.
Nothing special here except the name.
'''
pass
class IgnoreFileException(Exception):
'''
Special exception class used by the load_file plugin method
to indicate that the file that we are attempting to load
......
......@@ -4,11 +4,14 @@ import io
import os
import logging
class ShutUpHashlib(logging.Filter):
'''
This is used to suppress hashlib exception messages
if using the Python interpreter bundled with IDA.
'''
def filter(self, record):
return not record.getMessage().startswith("code for hash")
......@@ -21,9 +24,11 @@ try:
except ImportError:
LOADED_IN_IDA = False
def start_address():
return idaapi.get_first_seg().startEA
def end_address():
last_ea = idc.BADADDR
seg = idaapi.get_first_seg()
......@@ -34,7 +39,9 @@ def end_address():
return last_ea
class IDBFileIO(io.FileIO):
'''
A custom class to override binwalk.core.common.Blockfile in order to
read data directly out of the IDB, rather than reading from the original
......@@ -98,7 +105,8 @@ class IDBFileIO(io.FileIO):
try:
data += idc.GetManyBytes(self.idb_pos, read_count)
except TypeError as e:
# This happens when trying to read from uninitialized segments (e.g., .bss)
# This happens when trying to read from uninitialized
# segments (e.g., .bss)
data += "\x00" * read_count
n -= read_count
......@@ -136,4 +144,3 @@ class IDBFileIO(io.FileIO):
return super(IDBFileIO, self).tell()
else:
return self.idb_pos
......@@ -11,10 +11,13 @@ import binwalk.core.common
import binwalk.core.compat
from binwalk.core.exceptions import ParserException
class SignatureResult(binwalk.core.module.Result):
'''
Container class for signature results.
'''
def __init__(self, **kwargs):
# These are set by signature keyword tags.
# Keyword tags can also set any other object attributes,
......@@ -36,7 +39,9 @@ class SignatureResult(binwalk.core.module.Result):
self.valid = (not self.invalid)
class SignatureLine(object):
'''
Responsible for parsing signature lines from magic signature files.
'''
......@@ -101,16 +106,19 @@ class SignatureLine(object):
self.operator = operator
# Try to convert the operator value into an integer. This works for
# simple operator values, but not for complex types (e.g., '(4.l+12)').
# simple operator values, but not for complex types (e.g.,
# '(4.l+12)').
try:
self.opvalue = int(self.opvalue, 0)
except ValueError as e:
pass
# Only one operator type is supported, so break as soon as one is found
# Only one operator type is supported, so break as soon as one
# is found
break
# If the specified type starts with 'u' (e.g., 'ubelong'), then it is unsigned; else, it is signed
# If the specified type starts with 'u' (e.g., 'ubelong'), then it is
# unsigned; else, it is signed
if self.type[0] == 'u':
self.signed = False
self.type = self.type[1:]
......@@ -118,7 +126,8 @@ class SignatureLine(object):
self.signed = True
# Big endian values start with 'be' ('belong'), little endian values start with 'le' ('lelong').
# The struct module uses '>' to denote big endian and '<' to denote little endian.
# The struct module uses '>' to denote big endian and '<' to denote
# little endian.
if self.type.startswith('be'):
self.type = self.type[2:]
self.endianess = '>'
......@@ -141,9 +150,11 @@ class SignatureLine(object):
# If this is a wildcard value, explicitly set self.value to None
if self.value == 'x':
self.value = None
# String values need to be decoded, as they may contain escape characters (e.g., '\x20')
# String values need to be decoded, as they may contain escape
# characters (e.g., '\x20')
elif self.type == 'string':
# String types support multiplication to easily match large repeating byte sequences
# String types support multiplication to easily match large
# repeating byte sequences
if '*' in self.value:
try:
p = self.value.split('*')
......@@ -153,11 +164,13 @@ class SignatureLine(object):
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise ParserException("Failed to expand string '%s' with integer '%s' in line '%s'" % (self.value, n, line))
raise ParserException(
"Failed to expand string '%s' with integer '%s' in line '%s'" % (self.value, n, line))
try:
self.value = binwalk.core.compat.string_decode(self.value)
except ValueError as e:
raise ParserException("Failed to decode string value '%s' in line '%s'" % (self.value, line))
raise ParserException(
"Failed to decode string value '%s' in line '%s'" % (self.value, line))
# If a regex was specified, compile it
elif self.type == 'regex':
self.regex = True
......@@ -167,34 +180,43 @@ class SignatureLine(object):
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise ParserException("Invalid regular expression '%s': %s" % (self.value, str(e)))
raise ParserException(
"Invalid regular expression '%s': %s" % (self.value, str(e)))
# Non-string types are integer values
else:
try:
self.value = int(self.value, 0)
except ValueError as e:
raise ParserException("Failed to convert value '%s' to an integer on line '%s'" % (self.value, line))
raise ParserException(
"Failed to convert value '%s' to an integer on line '%s'" % (self.value, line))
# Sanity check to make sure the first line of a signature has an explicit value
# Sanity check to make sure the first line of a signature has an
# explicit value
if self.level == 0 and self.value is None:
raise ParserException("First element of a signature must specify a non-wildcard value: '%s'" % (line))
raise ParserException(
"First element of a signature must specify a non-wildcard value: '%s'" % (line))
# Set the size and struct format value for the specified data type.
# This must be done, obviously, after the value has been parsed out above.
# This must be done, obviously, after the value has been parsed out
# above.
if self.type == 'string':
# Strings don't have a struct format value, since they don't have to be unpacked
# Strings don't have a struct format value, since they don't have
# to be unpacked
self.fmt = None
# If a string type has a specific value, set the comparison size to the length of that string
# If a string type has a specific value, set the comparison size to
# the length of that string
if self.value:
self.size = len(self.value)
# Else, truncate the string to self.MAX_STRING_SIZE
else:
self.size = self.MAX_STRING_SIZE
elif self.type == 'regex':
# Regular expressions don't have a struct format value, since they don't have to be unpacked
# Regular expressions don't have a struct format value, since they
# don't have to be unpacked
self.fmt = None
# The size of a matching regex is unknown until it is applied to some data
# The size of a matching regex is unknown until it is applied to
# some data
self.size = self.MAX_STRING_SIZE
elif self.type == 'byte':
self.fmt = 'b'
......@@ -210,7 +232,8 @@ class SignatureLine(object):
self.fmt = 'i'
self.size = 4
else:
raise ParserException("Unknown data type '%s' in line '%s'" % (self.type, line))
raise ParserException(
"Unknown data type '%s' in line '%s'" % (self.type, line))
# The struct module uses the same characters for specifying signed and unsigned data types,
# except that signed data types are upper case. The above if-else code sets self.fmt to the
......@@ -238,7 +261,8 @@ class SignatureLine(object):
# Get rid of the curly braces.
tag = match.group().replace('{', '').replace('}', '')
# If the tag specifies a value, it will be colon delimited (e.g., '{name:%s}')
# If the tag specifies a value, it will be colon delimited
# (e.g., '{name:%s}')
if ':' in tag:
(n, v) = tag.split(':', 1)
else:
......@@ -253,7 +277,9 @@ class SignatureLine(object):
else:
self.format = ""
class Signature(object):
'''
Class to hold signature data and generate signature regular expressions.
'''
......@@ -347,8 +373,9 @@ class Signature(object):
# spit out a warning about any self-overlapping signatures.
if not binwalk.core.compat.has_key(line.tags, 'overlap'):
for i in range(1, line.size):
if restr[i:] == restr[0:(line.size-i)]:
binwalk.core.common.warning("Signature '%s' is a self-overlapping signature!" % line.text)
if restr[i:] == restr[0:(line.size - i)]:
binwalk.core.common.warning(
"Signature '%s' is a self-overlapping signature!" % line.text)
break
return re.compile(re.escape(restr))
......@@ -361,10 +388,13 @@ class Signature(object):
Returns None.
'''
# This method is kind of useless, but may be a nice wrapper for future code.
# This method is kind of useless, but may be a nice wrapper for future
# code.
self.lines.append(line)
class Magic(object):
'''
Primary class for loading signature files and scanning
blocks of arbitrary data for matching signatures.
......@@ -380,11 +410,14 @@ class Magic(object):
Returns None.
'''
# Used to save the block of data passed to self.scan (see additional comments in self.scan)
# Used to save the block of data passed to self.scan (see additional
# comments in self.scan)
self.data = ""
# A list of Signature class objects, populated by self.parse (see also: self.load)
# A list of Signature class objects, populated by self.parse (see also:
# self.load)
self.signatures = []
# A set of signatures with the 'once' keyword that have already been displayed once
# A set of signatures with the 'once' keyword that have already been
# displayed once
self.display_once = set()
self.dirty = True
......@@ -454,53 +487,65 @@ class Magic(object):
replacements = {}
for period in [match.start() for match in self.period.finditer(expression)]:
# Separate the offset field into the integer offset and type values (o and t respsectively)
# Separate the offset field into the integer offset and type
# values (o and t respsectively)
s = expression[:period].rfind('(') + 1
# The offset address may be an evaluatable expression, such as '(4+0.L)', typically the result
# of the original offset being something like '(&0.L)'.
o = binwalk.core.common.MathExpression(expression[s:period]).value
t = expression[period+1]
o = binwalk.core.common.MathExpression(
expression[s:period]).value
t = expression[period + 1]
# Re-build just the parsed offset portion of the expression
text = "%s.%c" % (expression[s:period], t)
# Have we already evaluated this offset expression? If so, skip it.
# Have we already evaluated this offset expression? If so, skip
# it.
if binwalk.core.common.has_key(replacements, text):
continue
# The offset specified in the expression is relative to the starting offset inside self.data
# The offset specified in the expression is relative to the
# starting offset inside self.data
o += offset
# Read the value from self.data at the specified offset
try:
# Big and little endian byte format
if t in ['b', 'B']:
v = struct.unpack('b', binwalk.core.compat.str2bytes(self.data[o:o+1]))[0]
v = struct.unpack(
'b', binwalk.core.compat.str2bytes(self.data[o:o + 1]))[0]
# Little endian short format
elif t == 's':
v = struct.unpack('<h', binwalk.core.compat.str2bytes(self.data[o:o+2]))[0]
v = struct.unpack(
'<h', binwalk.core.compat.str2bytes(self.data[o:o + 2]))[0]
# Little endian long format
elif t == 'l':
v = struct.unpack('<i', binwalk.core.compat.str2bytes(self.data[o:o+4]))[0]
v = struct.unpack(
'<i', binwalk.core.compat.str2bytes(self.data[o:o + 4]))[0]
# Big endian short format
elif t == 'S':
v = struct.unpack('>h', binwalk.core.compat.str2bytes(self.data[o:o+2]))[0]
v = struct.unpack(
'>h', binwalk.core.compat.str2bytes(self.data[o:o + 2]))[0]
# Bit endian long format
elif t == 'L':
v = struct.unpack('>i', binwalk.core.compat.str2bytes(self.data[o:o+4]))[0]
# struct.error is thrown if there is not enough bytes in self.data for the specified format type
v = struct.unpack(
'>i', binwalk.core.compat.str2bytes(self.data[o:o + 4]))[0]
# struct.error is thrown if there is not enough bytes in
# self.data for the specified format type
except struct.error as e:
v = 0
# Keep track of all the recovered values from self.data
replacements[text] = v
# Finally, replace all offset expressions with their corresponding text value
# Finally, replace all offset expressions with their corresponding
# text value
v = expression
for (text, value) in binwalk.core.common.iterator(replacements):
v = v.replace(text, "%d" % value)
# If no offset, then it's just an evaluatable math expression (e.g., "(32+0x20)")
# If no offset, then it's just an evaluatable math expression (e.g.,
# "(32+0x20)")
else:
v = expression
......@@ -522,15 +567,18 @@ class Magic(object):
tag_strlen = None
max_line_level = 0
previous_line_end = 0
tags = {'id' : signature.id, 'offset' : offset, 'invalid' : False, 'once' : False}
tags = {'id': signature.id, 'offset':
offset, 'invalid': False, 'once': False}
# Apply each line of the signature to self.data, starting at the specified offset
# Apply each line of the signature to self.data, starting at the
# specified offset
for n in range(0, len(signature.lines)):
line = signature.lines[n]
# Ignore indentation levels above the current max indent level
if line.level <= max_line_level:
# If the relative offset of this signature line is just an integer value, use it
# If the relative offset of this signature line is just an
# integer value, use it
if isinstance(line.offset, int):
line_offset = line.offset
# Else, evaluate the complex expression
......@@ -541,13 +589,15 @@ class Magic(object):
ple = '%d+' % previous_line_end
# Allow users to use either the '&0' (libmagic) or '&+0' (explcit addition) sytaxes;
# replace both with the ple text.
line_offset_text = line.offset.replace('&+', ple).replace('&', ple)
line_offset_text = line.offset.replace(
'&+', ple).replace('&', ple)
# Evaluate the expression
line_offset = self._do_math(offset, line_offset_text)
# Sanity check
if not isinstance(line_offset, int):
raise ParserException("Failed to convert offset '%s' to a number: '%s'" % (line.offset, line.text))
raise ParserException(
"Failed to convert offset '%s' to a number: '%s'" % (line.offset, line.text))
# The start of the data needed by this line is at offset + line_offset.
# The end of the data will be line.size bytes later.
......@@ -557,8 +607,10 @@ class Magic(object):
# If the line has a packed format string, unpack it
if line.pkfmt:
try:
dvalue = struct.unpack(line.pkfmt, binwalk.core.compat.str2bytes(self.data[start:end]))[0]
# Not enough bytes left in self.data for the specified format size
dvalue = struct.unpack(
line.pkfmt, binwalk.core.compat.str2bytes(self.data[start:end]))[0]
# Not enough bytes left in self.data for the specified
# format size
except struct.error as e:
dvalue = 0
# Else, this is a string
......@@ -568,21 +620,25 @@ class Magic(object):
# Check to see if this is a string whose size is known and has been specified on a previous
# signature line.
if binwalk.core.compat.has_key(tags, 'strlen') and binwalk.core.compat.has_key(line.tags, 'string'):
dvalue = self.data[start:(start+tags['strlen'])]
# Else, just terminate the string at the first newline, carriage return, or NULL byte
dvalue = self.data[start:(start + tags['strlen'])]
# Else, just terminate the string at the first newline,
# carriage return, or NULL byte
else:
dvalue = self.data[start:end].split('\x00')[0].split('\r')[0].split('\n')[0]
# Non-wildcard strings have a known length, specified in the signature line
dvalue = self.data[start:end].split(
'\x00')[0].split('\r')[0].split('\n')[0]
# Non-wildcard strings have a known length, specified in
# the signature line
else:
dvalue = self.data[start:end]
# Some integer values have special operations that need to be performed on them
# before comparison (e.g., "belong&0x0000FFFF"). Complex math expressions are
# supported here as well.
#if isinstance(dvalue, int) and line.operator:
# if isinstance(dvalue, int) and line.operator:
if line.operator:
try:
# If the operator value of this signature line is just an integer value, use it
# If the operator value of this signature line is just
# an integer value, use it
if isinstance(line.opvalue, int) or isinstance(line.opvalue, long):
opval = line.opvalue
# Else, evaluate the complex expression
......@@ -609,7 +665,8 @@ class Magic(object):
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise ParserException("Operation '" + str(dvalue) + " " + str(line.operator) + "= " + str(line.opvalue) + "' failed: " + str(e))
raise ParserException("Operation '" + str(dvalue) + " " + str(
line.operator) + "= " + str(line.opvalue) + "' failed: " + str(e))
# Does the data (dvalue) match the specified comparison?
if ((line.value is None) or
......@@ -637,12 +694,14 @@ class Magic(object):
# Format the description string
desc = line.format % dvalue_tuple
# If there was any description string, append it to the list of description string parts
# If there was any description string, append it to the
# list of description string parts
if desc:
description.append(desc)
# Process tag keywords specified in the signature line. These have already been parsed out of the
# original format string so that they can be processed separately from the printed description string.
# original format string so that they can be processed
# separately from the printed description string.
for (tag_name, tag_value) in binwalk.core.compat.iterator(line.tags):
# If the tag value is a string, try to format it
if isinstance(tag_value, str):
......@@ -657,7 +716,8 @@ class Magic(object):
else:
tags[tag_name] = tag_value
# Some tag values are intended to be integer values, so try to convert them as such
# Some tag values are intended to be integer values, so
# try to convert them as such
try:
tags[tag_name] = int(tags[tag_name], 0)
except KeyboardInterrupt as e:
......@@ -667,7 +727,8 @@ class Magic(object):
# Abort processing soon as this signature is marked invalid, unless invalid results
# were explicitly requested. This means that the sooner invalid checks are made in a
# given signature, the faster the scan can filter out false positives.
# given signature, the faster the scan can filter out false
# positives.
if not self.show_invalid and tags['invalid']:
break
......@@ -676,7 +737,7 @@ class Magic(object):
# so that subsequent lines can use the '>>&0' offset syntax to specify relative offsets
# from previous lines.
try:
next_line = signature.lines[n+1]
next_line = signature.lines[n + 1]
if next_line.level > line.level:
if line.type == 'string':
previous_line_end = line_offset + len(dvalue)
......@@ -685,7 +746,8 @@ class Magic(object):
except IndexError as e:
pass
# If this line satisfied its comparison, +1 the max indentation level
# If this line satisfied its comparison, +1 the max
# indentation level
max_line_level = line.level + 1
else:
# No match on the first line, abort
......@@ -696,7 +758,8 @@ class Magic(object):
# indentation levels will not be accepted.
max_line_level = line.level
# Join the formatted description strings and remove backspace characters (plus the preceeding character as well)
# Join the formatted description strings and remove backspace
# characters (plus the preceeding character as well)
tags['description'] = self.bspace.sub('', " ".join(description))
# This should never happen
......@@ -704,7 +767,8 @@ class Magic(object):
tags['display'] = False
tags['invalid'] = True
# If the formatted string contains non-printable characters, consider it invalid
# If the formatted string contains non-printable characters, consider
# it invalid
if self.printable.match(tags['description']).group() != tags['description']:
tags['invalid'] = True
......@@ -742,7 +806,8 @@ class Magic(object):
dlen = len(data)
for signature in self.signatures:
# Use regex to search the data block for potential signature matches (fast)
# Use regex to search the data block for potential signature
# matches (fast)
for match in signature.regex.finditer(data):
# Take the offset of the start of the signature into account
offset = match.start() - signature.offset
......@@ -750,10 +815,12 @@ class Magic(object):
# Signatures are ordered based on the length of their magic bytes (largest first).
# If this offset has already been matched to a previous signature, ignore it unless
# self.show_invalid has been specified. Also ignore obviously invalid offsets (<1)
# as well as those outside the specified self.data range (dlen).
# as well as those outside the specified self.data range
# (dlen).
if (offset not in matched_offsets or self.show_invalid) and offset >= 0 and offset < dlen:
#if offset >= 0 and offset < dlen:
# Analyze the data at this offset using the current signature rule
# if offset >= 0 and offset < dlen:
# Analyze the data at this offset using the current
# signature rule
tags = self._analyze(signature, offset)
# Generate a SignatureResult object and append it to the results list if the
......@@ -803,7 +870,8 @@ class Magic(object):
signature = None
for line in lines:
# Split at the first comment delimiter (if any) and strip the result
# Split at the first comment delimiter (if any) and strip the
# result
line = line.split('#')[0].strip()
# Ignore blank lines and lines that are nothing but comments.
# We also don't support the '!mime' style line entries.
......@@ -823,18 +891,20 @@ class Magic(object):
signature = Signature(len(self.signatures), sigline)
# Else, just append this line to the existing signature
elif signature:
#signature.append(sigline)
# signature.append(sigline)
signature.lines.append(sigline)
# If this is not the first line of a signature entry and there is no other
# existing signature entry, something is very wrong with the signature file.
# existing signature entry, something is very wrong with the
# signature file.
else:
raise ParserException("Invalid signature line: '%s'" % line)
raise ParserException(
"Invalid signature line: '%s'" % line)
# Add the final signature to the signature list
if signature:
if not self._filtered(signature.lines[0].format):
self.signatures.append(signature)
# Sort signatures by confidence (aka, length of their magic bytes), largest first
# Sort signatures by confidence (aka, length of their magic bytes),
# largest first
self.signatures.sort(key=lambda x: x.confidence, reverse=True)
......@@ -19,7 +19,9 @@ from threading import Thread
from binwalk.core.compat import *
from binwalk.core.exceptions import *
class Option(object):
'''
A container class that allows modules to declare command line options.
'''
......@@ -59,7 +61,8 @@ class Option(object):
def convert(self, value, default_value):
if self.type and (self.type.__name__ == self.dtype):
# Be sure to specify a base of 0 for int() so that the base is auto-detected
# Be sure to specify a base of 0 for int() so that the base is
# auto-detected
if self.type == int:
t = self.type(value, 0)
else:
......@@ -71,7 +74,9 @@ class Option(object):
return t
class Kwarg(object):
'''
A container class allowing modules to specify their expected __init__ kwarg(s).
'''
......@@ -90,7 +95,9 @@ class Kwarg(object):
self.default = default
self.description = description
class Dependency(object):
'''
A container class for declaring module dependencies.
'''
......@@ -101,7 +108,9 @@ class Dependency(object):
self.kwargs = kwargs
self.module = None
class Result(object):
'''
Generic class for storing and accessing scan results.
'''
......@@ -138,7 +147,9 @@ class Result(object):
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class Error(Result):
'''
A subclass of binwalk.core.module.Result.
'''
......@@ -154,7 +165,9 @@ class Error(Result):
self.exception = None
Result.__init__(self, **kwargs)
class Module(object):
'''
All module classes must be subclassed from this.
'''
......@@ -176,7 +189,8 @@ class Module(object):
attribute='extractor'),
]
# A list of binwalk.core.module.Dependency instances that can be filled in as needed by each individual module.
# A list of binwalk.core.module.Dependency instances that can be filled in
# as needed by each individual module.
DEPENDS = []
# Format string for printing the header during a scan.
......@@ -218,7 +232,8 @@ class Module(object):
# Modules with a higher order are displayed first in help output
ORDER = 5
# Set to False if this is not a primary module (e.g., General, Extractor modules)
# Set to False if this is not a primary module (e.g., General, Extractor
# modules)
PRIMARY = True
def __init__(self, parent, **kwargs):
......@@ -360,7 +375,8 @@ class Module(object):
# Calls the unload method for all dependency modules.
# These modules cannot be unloaded immediately after being run, as
# they must persist until the module that depends on them is finished.
# As such, this must be done separately from the Modules.run 'unload' call.
# As such, this must be done separately from the Modules.run 'unload'
# call.
for dependency in self.dependencies:
try:
getattr(self, dependency.attribute).unload()
......@@ -384,7 +400,8 @@ class Module(object):
except Exception:
pass
# Add any pending extracted files to the target_files list and reset the extractor's pending file list
# Add any pending extracted files to the target_files list and reset
# the extractor's pending file list
self.target_file_list += self.extractor.pending
# Reset all dependencies prior to continuing with another file.
......@@ -453,14 +470,16 @@ class Module(object):
# Add the name of the current module to the result
r.module = self.__class__.__name__
# Any module that is reporting results, valid or not, should be marked as enabled
# Any module that is reporting results, valid or not, should be marked
# as enabled
if not self.enabled:
self.enabled = True
self.validate(r)
self._plugins_result(r)
# Update the progress status automatically if it is not being done manually by the module
# Update the progress status automatically if it is not being done
# manually by the module
if r.offset and r.file and self.AUTO_UPDATE_STATUS:
self.status.total = r.file.length
self.status.completed = r.offset
......@@ -478,7 +497,8 @@ class Module(object):
if r.display:
display_args = self._build_display_args(r)
if display_args:
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.format_strings(
self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.result(*display_args)
return r
......@@ -499,12 +519,14 @@ class Module(object):
self.errors.append(e)
if e.exception:
sys.stderr.write("\n" + e.module + " Exception: " + str(e.exception) + "\n")
sys.stderr.write(
"\n" + e.module + " Exception: " + str(e.exception) + "\n")
sys.stderr.write("-" * exception_header_width + "\n")
traceback.print_exc(file=sys.stderr)
sys.stderr.write("-" * exception_header_width + "\n\n")
elif e.description:
sys.stderr.write("\n" + e.module + " Error: " + e.description + "\n\n")
sys.stderr.write(
"\n" + e.module + " Error: " + e.description + "\n\n")
def header(self):
'''
......@@ -512,13 +534,17 @@ class Module(object):
Returns None.
'''
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.add_custom_header(self.VERBOSE_FORMAT, self.VERBOSE)
self.config.display.format_strings(
self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.add_custom_header(
self.VERBOSE_FORMAT, self.VERBOSE)
if type(self.HEADER) == type([]):
self.config.display.header(*self.HEADER, file_name=self.current_target_file_name)
self.config.display.header(
*self.HEADER, file_name=self.current_target_file_name)
elif self.HEADER:
self.config.display.header(self.HEADER, file_name=self.current_target_file_name)
self.config.display.header(
self.HEADER, file_name=self.current_target_file_name)
def footer(self):
'''
......@@ -544,12 +570,14 @@ class Module(object):
self.modules = self.parent.executed_modules
# A special exception for the extractor module, which should be allowed to
# override the verbose setting, e.g., if --matryoshka has been specified
# override the verbose setting, e.g., if --matryoshka has been
# specified
if hasattr(self, "extractor") and self.extractor.config.verbose:
self.config.verbose = self.config.display.verbose = True
if not self.config.files:
binwalk.core.common.debug("No target files specified, module %s terminated" % self.name)
binwalk.core.common.debug(
"No target files specified, module %s terminated" % self.name)
return False
self.reset_dependencies()
......@@ -563,7 +591,8 @@ class Module(object):
return False
try:
self.config.display.format_strings(self.HEADER_FORMAT, self.RESULT_FORMAT)
self.config.display.format_strings(
self.HEADER_FORMAT, self.RESULT_FORMAT)
except KeyboardInterrupt as e:
raise e
except Exception as e:
......@@ -584,7 +613,9 @@ class Module(object):
return retval
class Status(object):
'''
Class used for tracking module status (e.g., % complete).
'''
......@@ -594,10 +625,12 @@ class Status(object):
self.clear()
def clear(self):
for (k,v) in iterator(self.kwargs):
for (k, v) in iterator(self.kwargs):
setattr(self, k, v)
class Modules(object):
'''
Main class used for running and managing modules.
'''
......@@ -614,7 +647,8 @@ class Modules(object):
self.arguments = []
self.executed_modules = {}
self.default_dependency_modules = {}
self.status = Status(completed=0, total=0, fp=None, running=False, shutdown=False, finished=False)
self.status = Status(
completed=0, total=0, fp=None, running=False, shutdown=False, finished=False)
self.status_server_started = False
self.status_service = None
......@@ -632,7 +666,7 @@ class Modules(object):
self.cleanup()
def _set_arguments(self, argv=[], kargv={}):
for (k,v) in iterator(kargv):
for (k, v) in iterator(kargv):
k = self._parse_api_opt(k)
if v not in [True, False, None]:
if not isinstance(v, list):
......@@ -651,7 +685,8 @@ class Modules(object):
self.arguments = argv
def _parse_api_opt(self, opt):
# If the argument already starts with a hyphen, don't add hyphens in front of it
# If the argument already starts with a hyphen, don't add hyphens in
# front of it
if opt.startswith('-'):
return opt
# Short options are only 1 character
......@@ -683,11 +718,13 @@ class Modules(object):
continue
module_name = file_name[:-3]
try:
user_module = imp.load_source(module_name, os.path.join(user_modules, file_name))
user_module = imp.load_source(
module_name, os.path.join(user_modules, file_name))
except KeyboardInterrupt as e:
raise e
except Exception as e:
binwalk.core.common.warning("Error loading module '%s': %s" % (file_name, str(e)))
binwalk.core.common.warning(
"Error loading module '%s': %s" % (file_name, str(e)))
for (name, module) in inspect.getmembers(user_module):
if inspect.isclass(module) and hasattr(module, attribute):
......@@ -706,7 +743,8 @@ class Modules(object):
help_string += "\nUsage: binwalk [OPTIONS] [FILE1] [FILE2] [FILE3] ...\n"
# Build a dictionary of modules and their ORDER attributes.
# This makes it easy to sort modules by their ORDER attribute for display.
# This makes it easy to sort modules by their ORDER attribute for
# display.
for module in self.list(attribute="CLI"):
if module.CLI:
modules[module] = module.ORDER
......@@ -728,8 +766,9 @@ class Modules(object):
else:
short_opt = " "
fmt = " %%s %%s%%-%ds%%s\n" % (25-len(long_opt))
help_string += fmt % (short_opt, long_opt, optargs, module_option.description)
fmt = " %%s %%s%%-%ds%%s\n" % (25 - len(long_opt))
help_string += fmt % (
short_opt, long_opt, optargs, module_option.description)
return help_string + "\n"
......@@ -749,9 +788,11 @@ class Modules(object):
for module in self.list():
obj = self.run(module)
# Add all loaded modules that marked themselves as enabled to the run_modules list
# Add all loaded modules that marked themselves as enabled to the
# run_modules list
for (module, obj) in iterator(self.executed_modules):
# Report the results if the module is enabled and if it is a primary module or if it reported any results/errors
# Report the results if the module is enabled and if it is a
# primary module or if it reported any results/errors
if obj.enabled and (obj.PRIMARY or obj.results or obj.errors):
run_modules.append(obj)
......@@ -771,16 +812,19 @@ class Modules(object):
self.status.clear()
# If the module is not being loaded as a dependency, add it to the executed modules dictionary.
# This is used later in self.execute to determine which objects should be returned.
# This is used later in self.execute to determine which objects
# should be returned.
if not dependency:
self.executed_modules[module] = obj
# The unload method tells the module that we're done with it, and gives it a chance to do
# any cleanup operations that may be necessary. We still retain the object instance in self.executed_modules.
# any cleanup operations that may be necessary. We still retain
# the object instance in self.executed_modules.
obj._unload_dependencies()
obj.unload()
except KeyboardInterrupt as e:
# Tell the status server to shut down, and give it time to clean up.
# Tell the status server to shut down, and give it time to clean
# up.
if self.status.running:
self.status.shutdown = True
while not self.status.finished:
......@@ -799,13 +843,15 @@ class Modules(object):
import binwalk.modules
attributes = {}
for dependency in module.DEFAULT_DEPENDS+module.DEPENDS:
for dependency in module.DEFAULT_DEPENDS + module.DEPENDS:
# The dependency module must be imported by binwalk.modules.__init__.py
# The dependency module must be imported by
# binwalk.modules.__init__.py
if hasattr(binwalk.modules, dependency.name):
dependency.module = getattr(binwalk.modules, dependency.name)
else:
raise ModuleException("%s depends on %s which was not found in binwalk.modules.__init__.py\n" % (str(module), dependency.name))
raise ModuleException(
"%s depends on %s which was not found in binwalk.modules.__init__.py\n" % (str(module), dependency.name))
# No recursive dependencies, thanks
if dependency.module == module:
......@@ -818,11 +864,14 @@ class Modules(object):
# Modules that are not enabled (e.g., extraction module) can load any dependency as long as they don't
# set any custom kwargs for those dependencies.
if module_enabled or not dependency.kwargs:
depobj = self.run(dependency.module, dependency=True, kwargs=dependency.kwargs)
depobj = self.run(
dependency.module, dependency=True, kwargs=dependency.kwargs)
# If a dependency failed, consider this a non-recoverable error and raise an exception
# If a dependency failed, consider this a non-recoverable error and
# raise an exception
if depobj.errors:
raise ModuleException("Failed to load " + dependency.name + " module")
raise ModuleException(
"Failed to load " + dependency.name + " module")
else:
attributes[dependency.attribute] = depobj
......@@ -837,19 +886,21 @@ class Modules(object):
Returns a dictionary of kwargs for the specified module.
'''
kwargs = {'enabled' : False}
kwargs = {'enabled': False}
last_priority = {}
longs = []
shorts = ""
parser = argparse.ArgumentParser(add_help=False)
# Hack: This allows the ListActionParser class to correllate short options to long options.
# There is probably a built-in way to do this in the argparse.ArgumentParser class?
# There is probably a built-in way to do this in the
# argparse.ArgumentParser class?
parser.short_to_long = {}
# Must build arguments from all modules so that:
#
# 1) Any conflicting arguments will raise an exception
# 2) The only unknown arguments will be the target files, making them easy to identify
# 2) The only unknown arguments will be the target files, making them
# easy to identify
for m in self.list(attribute="CLI"):
for module_option in m.CLI:
......@@ -869,7 +920,8 @@ class Modules(object):
parser_kwargs['action'] = 'store_true'
elif module_option.type is list:
parser_kwargs['action'] = 'append'
parser.short_to_long[module_option.short] = module_option.long
parser.short_to_long[
module_option.short] = module_option.long
parser.add_argument(*parser_args, **parser_kwargs)
......@@ -892,20 +944,24 @@ class Modules(object):
for (name, default_value) in iterator(module_option.kwargs):
# If this kwarg has not been previously processed, or if its priority is equal to or
# greater than the previously processed kwarg's priority, then let's process it.
# greater than the previously processed kwarg's priority,
# then let's process it.
if not has_key(last_priority, name) or last_priority[name] <= module_option.priority:
# Track the priority for future iterations that may process the same kwarg name
# Track the priority for future iterations that may
# process the same kwarg name
last_priority[name] = module_option.priority
try:
kwargs[name] = module_option.convert(args[module_option.long], default_value)
kwargs[name] = module_option.convert(
args[module_option.long], default_value)
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise ModuleException("Invalid usage: %s" % str(e))
binwalk.core.common.debug("%s :: %s => %s" % (module.TITLE, str(argv), str(kwargs)))
binwalk.core.common.debug("%s :: %s => %s" %
(module.TITLE, str(argv), str(kwargs)))
return kwargs
def kwargs(self, obj, kwargs):
......@@ -930,7 +986,8 @@ class Modules(object):
if not hasattr(obj, k):
setattr(obj, k, v)
else:
raise Exception("binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj))
raise Exception(
"binwalk.core.module.Modules.process_kwargs: %s has no attribute 'KWARGS'" % str(obj))
def status_server(self, port):
'''
......@@ -944,9 +1001,12 @@ class Modules(object):
if self.status_server_started == False:
self.status_server_started = True
try:
self.status_service = binwalk.core.statuserver.StatusServer(port, self)
self.status_service = binwalk.core.statuserver.StatusServer(
port, self)
except Exception as e:
binwalk.core.common.warning("Failed to start status server on port %d: %s" % (port, str(e)))
binwalk.core.common.warning(
"Failed to start status server on port %d: %s" % (port, str(e)))
def process_kwargs(obj, kwargs):
'''
......@@ -961,6 +1021,7 @@ def process_kwargs(obj, kwargs):
kwargs = m.kwargs(obj, kwargs)
return kwargs
def show_help(fd=sys.stdout):
'''
Convenience wrapper around binwalk.core.module.Modules.help.
......@@ -971,5 +1032,3 @@ def show_help(fd=sys.stdout):
'''
with Modules() as m:
fd.write(m.help())
......@@ -9,12 +9,15 @@ import binwalk.core.settings
from binwalk.core.compat import *
from binwalk.core.exceptions import IgnoreFileException
class Plugin(object):
'''
Class from which all plugin classes are based.
'''
# A list of case-sensitive module names for which this plugin should be loaded.
# If no module names are specified, the plugin will be loaded for all modules.
# If no module names are specified, the plugin will be loaded for all
# modules.
MODULES = []
def __init__(self, module):
......@@ -64,7 +67,9 @@ class Plugin(object):
'''
pass
class Plugins(object):
'''
Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan.
An instance of this class is available during a scan via the Binwalk.plugins object.
......@@ -114,7 +119,8 @@ class Plugins(object):
except IgnoreFileException as e:
raise e
except Exception as e:
binwalk.core.common.warning("%s.%s failed: %s" % (callback.__module__, callback.__name__, e))
binwalk.core.common.warning(
"%s.%s failed: %s" % (callback.__module__, callback.__name__, e))
def _find_plugin_class(self, plugin):
for (name, klass) in inspect.getmembers(plugin, inspect.isclass):
......@@ -145,17 +151,17 @@ class Plugins(object):
'''
plugins = {
'user' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
'user': {
'modules': [],
'descriptions': {},
'enabled': {},
'path': None,
},
'system' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
'system': {
'modules': [],
'descriptions': {},
'enabled': {},
'path': None,
}
}
......@@ -171,7 +177,8 @@ class Plugins(object):
module = file_name[:-len(self.MODULE_EXTENSION)]
try:
plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name))
plugin = imp.load_source(
module, os.path.join(plugins[key]['path'], file_name))
plugin_class = self._find_plugin_class(plugin)
plugins[key]['enabled'][module] = True
......@@ -179,15 +186,18 @@ class Plugins(object):
except KeyboardInterrupt as e:
raise e
except Exception as e:
binwalk.core.common.warning("Error loading plugin '%s': %s" % (file_name, str(e)))
binwalk.core.common.warning(
"Error loading plugin '%s': %s" % (file_name, str(e)))
plugins[key]['enabled'][module] = False
try:
plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0]
plugins[key]['descriptions'][
module] = plugin_class.__doc__.strip().split('\n')[0]
except KeyboardInterrupt as e:
raise e
except Exception as e:
plugins[key]['descriptions'][module] = 'No description'
plugins[key]['descriptions'][
module] = 'No description'
return plugins
def load_plugins(self):
......@@ -198,7 +208,8 @@ class Plugins(object):
def _load_plugin_modules(self, plugins):
for module in plugins['modules']:
try:
file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION)
file_path = os.path.join(
plugins['path'], module + self.MODULE_EXTENSION)
except KeyboardInterrupt as e:
raise e
except Exception:
......@@ -220,7 +231,8 @@ class Plugins(object):
pass
try:
self.load_file.append(getattr(class_instance, self.LOADFILE))
self.load_file.append(
getattr(class_instance, self.LOADFILE))
except KeyboardInterrupt as e:
raise e
except Exception as e:
......@@ -234,7 +246,8 @@ class Plugins(object):
pass
try:
self.post_scan.append(getattr(class_instance, self.POSTSCAN))
self.post_scan.append(
getattr(class_instance, self.POSTSCAN))
except KeyboardInterrupt as e:
raise e
except Exception as e:
......@@ -250,7 +263,8 @@ class Plugins(object):
except KeyboardInterrupt as e:
raise e
except Exception as e:
binwalk.core.common.warning("Failed to load plugin module '%s': %s" % (module, str(e)))
binwalk.core.common.warning(
"Failed to load plugin module '%s': %s" % (module, str(e)))
def pre_scan_callbacks(self, obj):
return self._call_plugins(self.pre_scan)
......@@ -266,4 +280,3 @@ class Plugins(object):
def scan_callbacks(self, obj):
return self._call_plugins(self.scan, obj)
# Code for loading and accessing binwalk settings (extraction rules, signature files, etc).
# Code for loading and accessing binwalk settings (extraction rules,
# signature files, etc).
import os
import binwalk.core.common as common
from binwalk.core.compat import *
class Settings:
'''
Binwalk settings class, used for accessing user and system file paths and general configuration settings.
......@@ -41,17 +44,25 @@ class Settings:
self.system_dir = common.get_module_path()
# Build the paths to all user-specific files
self.user = common.GenericContainer(binarch=self._user_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE),
magic=self._magic_signature_files(user_only=True),
extract=self._user_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE),
modules=self._user_path(self.BINWALK_MODULES_DIR),
self.user = common.GenericContainer(
binarch=self._user_path(
self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE),
magic=self._magic_signature_files(
user_only=True),
extract=self._user_path(
self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE),
modules=self._user_path(
self.BINWALK_MODULES_DIR),
plugins=self._user_path(self.BINWALK_PLUGINS_DIR))
# Build the paths to all system-wide files
self.system = common.GenericContainer(binarch=self._system_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE),
magic=self._magic_signature_files(system_only=True),
extract=self._system_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE),
self.system = common.GenericContainer(
binarch=self._system_path(
self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE),
magic=self._magic_signature_files(
system_only=True),
extract=self._system_path(
self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE),
plugins=self._system_path(self.BINWALK_PLUGINS_DIR))
def _magic_signature_files(self, system_only=False, user_only=False):
......@@ -64,15 +75,18 @@ class Settings:
Returns a list of user/system magic signature files.
'''
files = []
user_binarch = self._user_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
system_binarch = self._system_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
user_binarch = self._user_path(
self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
system_binarch = self._system_path(
self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
def list_files(dir_path):
# Ignore hidden dotfiles.
return [os.path.join(dir_path, x) for x in os.listdir(dir_path) if not x.startswith('.')]
if not system_only:
user_dir = os.path.join(self.user_dir, self.BINWALK_USER_DIR, self.BINWALK_MAGIC_DIR)
user_dir = os.path.join(
self.user_dir, self.BINWALK_USER_DIR, self.BINWALK_MAGIC_DIR)
files += list_files(user_dir)
if not user_only:
system_dir = os.path.join(self.system_dir, self.BINWALK_MAGIC_DIR)
......@@ -175,7 +189,7 @@ class Settings:
'''
try:
return self._file_path(os.path.join(self.user_dir, self.BINWALK_USER_DIR, subdir), basename)
except KeyboardInterrupt as e :
except KeyboardInterrupt as e:
raise e
except Exception:
return None
......@@ -191,8 +205,7 @@ class Settings:
'''
try:
return self._file_path(os.path.join(self.system_dir, subdir), basename)
except KeyboardInterrupt as e :
except KeyboardInterrupt as e:
raise e
except Exception:
return None
......@@ -13,6 +13,7 @@ try:
except ImportError:
import socketserver as SocketServer
class StatusRequestHandler(SocketServer.BaseRequestHandler):
def handle(self):
......@@ -27,17 +28,22 @@ class StatusRequestHandler(SocketServer.BaseRequestHandler):
time.sleep(0.1)
try:
self.request.send(binwalk.core.compat.str2bytes('\b' * last_status_message_len))
self.request.send(binwalk.core.compat.str2bytes(' ' * last_status_message_len))
self.request.send(binwalk.core.compat.str2bytes('\b' * last_status_message_len))
self.request.send(
binwalk.core.compat.str2bytes('\b' * last_status_message_len))
self.request.send(
binwalk.core.compat.str2bytes(' ' * last_status_message_len))
self.request.send(
binwalk.core.compat.str2bytes('\b' * last_status_message_len))
if self.server.binwalk.status.shutdown:
self.server.binwalk.status.finished = True
break
if self.server.binwalk.status.total != 0:
percentage = ((float(self.server.binwalk.status.completed) / float(self.server.binwalk.status.total)) * 100)
status_message = message_format % (self.server.binwalk.status.fp.path,
percentage = (
(float(self.server.binwalk.status.completed) / float(self.server.binwalk.status.total)) * 100)
status_message = message_format % (
self.server.binwalk.status.fp.path,
percentage,
self.server.binwalk.status.completed,
self.server.binwalk.status.total)
......@@ -47,27 +53,32 @@ class StatusRequestHandler(SocketServer.BaseRequestHandler):
continue
last_status_message_len = len(status_message)
self.request.send(binwalk.core.compat.str2bytes(status_message))
self.request.send(
binwalk.core.compat.str2bytes(status_message))
message_sent = True
except IOError as e:
if e.errno == errno.EPIPE:
break
except Exception as e:
binwalk.core.common.debug('StatusRequestHandler exception: ' + str(e) + '\n')
binwalk.core.common.debug(
'StatusRequestHandler exception: ' + str(e) + '\n')
except KeyboardInterrupt as e:
raise e
self.server.binwalk.status.running = False
return
class ThreadedStatusServer(SocketServer.ThreadingMixIn, SocketServer.TCPServer):
daemon_threads = True
allow_reuse_address = True
class StatusServer(object):
def __init__(self, port, binwalk):
self.server = ThreadedStatusServer(('127.0.0.1', port), StatusRequestHandler)
self.server = ThreadedStatusServer(
('127.0.0.1', port), StatusRequestHandler)
self.server.binwalk = binwalk
t = threading.Thread(target=self.server.serve_forever)
......
......@@ -17,6 +17,6 @@ from binwalk.modules.extractor import Extractor
from binwalk.modules.entropy import Entropy
# These are depreciated.
#from binwalk.modules.binvis import Plotter
#from binwalk.modules.hashmatch import HashMatch
#from binwalk.modules.heuristics import HeuristicCompressionAnalyzer
# from binwalk.modules.binvis import Plotter
# from binwalk.modules.hashmatch import HashMatch
# from binwalk.modules.heuristics import HeuristicCompressionAnalyzer
......@@ -5,7 +5,9 @@ from binwalk.core.compat import *
from binwalk.core.common import BlockFile
from binwalk.core.module import Module, Option, Kwarg
class Plotter(Module):
'''
Base class for visualizing binaries in Qt.
Other plotter classes are derived from this.
......@@ -19,21 +21,21 @@ class Plotter(Module):
CLI = [
Option(short='3',
long='3D',
kwargs={'axis' : 3, 'enabled' : True},
kwargs={'axis': 3, 'enabled': True},
description='Generate a 3D binary visualization'),
Option(short='2',
long='2D',
kwargs={'axis' : 2, 'enabled' : True},
kwargs={'axis': 2, 'enabled': True},
description='Project data points onto 3D cube walls only'),
Option(short='V',
long='points',
type=int,
kwargs={'max_points' : 0},
kwargs={'max_points': 0},
description='Set the maximum number of plotted data points'),
# Option(short='V',
# long='grids',
# kwargs={'show_grids' : True},
# description='Display the x-y-z grids in the resulting plot'),
# Option(short='V',
# long='grids',
# kwargs={'show_grids' : True},
# description='Display the x-y-z grids in the resulting plot'),
]
KWARGS = [
......@@ -43,7 +45,8 @@ class Plotter(Module):
Kwarg(name='enabled', default=False),
]
# There isn't really any useful data to print to console. Disable header and result output.
# There isn't really any useful data to print to console. Disable header
# and result output.
HEADER = None
RESULT = None
......@@ -64,7 +67,8 @@ class Plotter(Module):
self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS
self._generate_data_point = self._generate_3d_data_point
else:
raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis)
raise Exception(
"Invalid Plotter axis specified: %d. Must be one of: [2,3]" % self.axis)
if not self.max_points:
self.max_points = self.MAX_PLOT_POINTS
......@@ -106,7 +110,8 @@ class Plotter(Module):
# Go through every data point and how many times that point occurs
for (point, count) in iterator(data_points):
# For each data point, compare it to each remaining weight value
# For each data point, compare it to each remaining weight
# value
for w in get_keys(weightings):
# If the number of times this data point occurred is >= the weight value,
......@@ -119,18 +124,21 @@ class Plotter(Module):
else:
break
# Throw out weight values that exceed the maximum number of data points
# Throw out weight values that exceed the maximum number of
# data points
if weightings[w] > self.max_points:
del weightings[w]
# If there's only one weight value left, no sense in continuing the loop...
# If there's only one weight value left, no sense in continuing
# the loop...
if len(weightings) == 1:
break
# The least weighted value is our minimum weight
min_weight = min(weightings)
# Get rid of all data points that occur less frequently than our minimum weight
# Get rid of all data points that occur less frequently than our
# minimum weight
for point in get_keys(data_points):
if data_points[point] < min_weight:
del data_points[point]
......@@ -138,7 +146,8 @@ class Plotter(Module):
for point in sorted(data_points, key=data_points.get, reverse=True):
plot_points[point] = data_points[point]
# Register this as a result in case future modules need access to the raw point information,
# but mark plot as False to prevent the entropy module from attempting to overlay this data on its graph.
# but mark plot as False to prevent the entropy module from
# attempting to overlay this data on its graph.
self.result(point=point, plot=False)
total += 1
if total >= self.max_points:
......@@ -154,7 +163,7 @@ class Plotter(Module):
Returns a data point tuple.
'''
return (0,0,0)
return (0, 0, 0)
def _generate_data_points(self, fp):
'''
......@@ -178,8 +187,8 @@ class Plotter(Module):
break
i = 0
while (i+(self.axis-1)) < dlen:
point = self._generate_data_point(data[i:i+self.axis])
while (i + (self.axis - 1)) < dlen:
point = self._generate_data_point(data[i:i + self.axis])
if has_key(data_points, point):
data_points[point] += 1
else:
......@@ -208,7 +217,8 @@ class Plotter(Module):
frequency_percentage = (weight / nitems)
# Give points that occur more frequently a brighter color and larger point size.
# Frequency is determined as a percentage of total unique data points.
# Frequency is determined as a percentage of total unique data
# points.
if frequency_percentage > .010:
size[i] = .20
r = 1.0
......@@ -227,7 +237,8 @@ class Plotter(Module):
i += 1
scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False)
scatter_plot = gl.GLScatterPlotItem(
pos=pos, size=size, color=color, pxMode=False)
scatter_plot.translate(-127.5, -127.5, -127.5)
return scatter_plot
......@@ -258,12 +269,14 @@ class Plotter(Module):
for fd in iter(self.next_file, None):
data_points = self._generate_data_points(fd)
self._print("Generating plot points from %d data points" % len(data_points))
self._print("Generating plot points from %d data points" %
len(data_points))
self.plot_points = self._generate_plot_points(data_points)
del data_points
self._print("Generating graph from %d plot points" % len(self.plot_points))
self._print("Generating graph from %d plot points" %
len(self.plot_points))
self.window.addItem(self._generate_plot(self.plot_points))
......@@ -307,4 +320,3 @@ class Plotter(Module):
def run(self):
self.plot()
return True
# Performs raw decompression of various compression algorithms (currently, only deflate).
# Performs raw decompression of various compression algorithms (currently,
# only deflate).
import os
import zlib
......@@ -11,17 +12,20 @@ try:
except ImportError:
from backports import lzma
class LZMAHeader(object):
def __init__(self, **kwargs):
for (k,v) in binwalk.core.compat.iterator(kwargs):
for (k, v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class LZMA(object):
DESCRIPTION = "Raw LZMA compression stream"
COMMON_PROPERTIES = [0x5D, 0x6E]
MAX_PROP = ((4 * 5 + 4) * 9 + 8)
BLOCK_SIZE = 32*1024
BLOCK_SIZE = 32 * 1024
def __init__(self, module):
self.module = module
......@@ -33,22 +37,27 @@ class LZMA(object):
# Add an extraction rule
if self.module.extractor.enabled:
self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="7z", cmd=self.extractor)
self.module.extractor.add_rule(
regex='^%s' % self.DESCRIPTION.lower(), extension="7z", cmd=self.extractor)
def extractor(self, file_name):
# Open and read the file containing the raw compressed data.
# This is not terribly efficient, especially for large files...
compressed_data = binwalk.core.common.BlockFile(file_name).read()
# Re-run self.decompress to detect the properties for this compressed data (stored in self.properties)
# Re-run self.decompress to detect the properties for this compressed
# data (stored in self.properties)
if self.decompress(compressed_data[:self.BLOCK_SIZE]):
# Build an LZMA header on top of the raw compressed data and write it back to disk.
# Header consists of the detected properties values, the largest possible dictionary size,
# and a fake output file size field.
header = chr(self.properties) + self.dictionaries[-1] + ("\xFF" * 8)
binwalk.core.common.BlockFile(file_name, "wb").write(header + compressed_data)
header = chr(self.properties) + \
self.dictionaries[-1] + ("\xFF" * 8)
binwalk.core.common.BlockFile(
file_name, "wb").write(header + compressed_data)
# Try to extract it with all the normal lzma extractors until one works
# Try to extract it with all the normal lzma extractors until one
# works
for exrule in self.module.extractor.match("lzma compressed data"):
if self.module.extractor.execute(exrule['cmd'], file_name) == True:
break
......@@ -65,16 +74,17 @@ class LZMA(object):
if prop > self.MAX_PROP:
return None
pb = prop / (9 * 5);
prop -= pb * 9 * 5;
lp = prop / 9;
lc = prop - lp * 9;
pb = prop / (9 * 5)
prop -= pb * 9 * 5
lp = prop / 9
lc = prop - lp * 9
return (pb, lp, lc)
def parse_header(self, header):
(pb, lp, lc) = self.parse_property(header[0])
dictionary = struct.unpack("<I", binwalk.core.compat.str2bytes(header[1:5]))[0]
dictionary = struct.unpack(
"<I", binwalk.core.compat.str2bytes(header[1:5]))[0]
return LZMAHeader(pb=pb, lp=lp, lc=lc, dictionary=dictionary)
def build_properties(self):
......@@ -97,10 +107,12 @@ class LZMA(object):
if self.module.partial_scan == True:
# For partial scans, only use the largest dictionary value
self.dictionaries.append(binwalk.core.compat.bytes2str(struct.pack("<I", 2**25)))
self.dictionaries.append(
binwalk.core.compat.bytes2str(struct.pack("<I", 2 ** 25)))
else:
for n in range(16, 26):
self.dictionaries.append(binwalk.core.compat.bytes2str(struct.pack("<I", 2**n)))
self.dictionaries.append(
binwalk.core.compat.bytes2str(struct.pack("<I", 2 ** n)))
def build_headers(self):
self.headers = set()
......@@ -116,7 +128,8 @@ class LZMA(object):
for header in self.headers:
i += 1
# The only acceptable exceptions are those indicating that the input data was truncated.
# The only acceptable exceptions are those indicating that the
# input data was truncated.
try:
final_data = binwalk.core.compat.str2bytes(header + data)
lzma.decompress(final_data)
......@@ -135,7 +148,8 @@ class LZMA(object):
break
if result is not None:
self.properties = self.build_property(result.pb, result.lp, result.lc)
self.properties = self.build_property(
result.pb, result.lp, result.lc)
description = "%s, properties: 0x%.2X [pb: %d, lp: %d, lc: %d], dictionary size: %d" % (self.DESCRIPTION,
self.properties,
result.pb,
......@@ -145,13 +159,15 @@ class LZMA(object):
return description
class Deflate(object):
'''
Finds and extracts raw deflate compression streams.
'''
ENABLED = False
BLOCK_SIZE = 33*1024
BLOCK_SIZE = 33 * 1024
DESCRIPTION = "Raw deflate compression stream"
def __init__(self, module):
......@@ -159,7 +175,8 @@ class Deflate(object):
# Add an extraction rule
if self.module.extractor.enabled:
self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self.extractor)
self.module.extractor.add_rule(
regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self.extractor)
def extractor(self, file_name):
in_data = ""
......@@ -176,7 +193,8 @@ class Deflate(object):
in_data += data[:dlen]
try:
out_data = zlib.decompress(binwalk.core.compat.str2bytes(in_data), -15)
out_data = zlib.decompress(
binwalk.core.compat.str2bytes(in_data), -15)
with binwalk.core.common.BlockFile(out_file, 'w') as fp_out:
fp_out.write(out_data)
retval = True
......@@ -190,9 +208,11 @@ class Deflate(object):
valid = True
description = None
# Looking for either a valid decompression, or an error indicating truncated input data
# Looking for either a valid decompression, or an error indicating
# truncated input data
try:
# Negative window size (e.g., -15) indicates that raw decompression should be performed
# Negative window size (e.g., -15) indicates that raw decompression
# should be performed
zlib.decompress(binwalk.core.compat.str2bytes(data), -15)
except zlib.error as e:
if not str(e).startswith("Error -5"):
......@@ -201,6 +221,7 @@ class Deflate(object):
return self.DESCRIPTION
class RawCompression(Module):
TITLE = 'Raw Compression'
......@@ -208,19 +229,19 @@ class RawCompression(Module):
CLI = [
Option(short='X',
long='deflate',
kwargs={'enabled' : True, 'scan_for_deflate' : True},
kwargs={'enabled': True, 'scan_for_deflate': True},
description='Scan for raw deflate compression streams'),
Option(short='Z',
long='lzma',
kwargs={'enabled' : True, 'scan_for_lzma' : True},
kwargs={'enabled': True, 'scan_for_lzma': True},
description='Scan for raw LZMA compression streams'),
Option(short='P',
long='partial',
kwargs={'partial_scan' : True},
kwargs={'partial_scan': True},
description='Perform a superficial, but faster, scan'),
Option(short='S',
long='stop',
kwargs={'stop_on_first_hit' : True},
kwargs={'stop_on_first_hit': True},
description='Stop after the first result'),
]
......@@ -254,9 +275,11 @@ class RawCompression(Module):
for i in range(0, dlen):
for decompressor in self.decompressors:
description = decompressor.decompress(data[i:i+decompressor.BLOCK_SIZE])
description = decompressor.decompress(
data[i:i + decompressor.BLOCK_SIZE])
if description:
self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
self.result(
description=description, file=fp, offset=fp.tell() - dlen + i)
if self.stop_on_first_hit:
file_done = True
break
......@@ -269,4 +292,3 @@ class RawCompression(Module):
self.status.completed = fp.tell() - fp.offset
self.footer()
......@@ -3,16 +3,21 @@ import binwalk.core.common
import binwalk.core.compat
from binwalk.core.module import Module, Option, Kwarg
class ArchResult(object):
def __init__(self, **kwargs):
for (k,v) in binwalk.core.compat.iterator(kwargs):
for (k, v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class Architecture(object):
def __init__(self, **kwargs):
for (k, v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class Disasm(Module):
THRESHOLD = 10
......@@ -24,16 +29,16 @@ class Disasm(Module):
CLI = [
Option(short='Y',
long='disasm',
kwargs={'enabled' : True},
kwargs={'enabled': True},
description='Identify the CPU architecture of a file using the capstone disassembler'),
Option(short='T',
long='minsn',
type=int,
kwargs={'min_insn_count' : 0},
kwargs={'min_insn_count': 0},
description='Minimum number of consecutive instructions to be considered valid (default: %d)' % DEFAULT_MIN_INSN_COUNT),
Option(long='continue',
short='k',
kwargs={'keep_going' : True},
kwargs={'keep_going': True},
description="Don't stop at the first match"),
]
......@@ -94,7 +99,8 @@ class Disasm(Module):
self.disasm_data_size = self.min_insn_count * 10
for arch in self.ARCHITECTURES:
self.disassemblers.append((capstone.Cs(arch.type, (arch.mode + arch.endianess)), arch.description))
self.disassemblers.append(
(capstone.Cs(arch.type, (arch.mode + arch.endianess)), arch.description))
def scan_file(self, fp):
total_read = 0
......@@ -107,34 +113,46 @@ class Disasm(Module):
break
# If this data block doesn't contain at least two different bytes, skip it
# to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
# to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in
# MIPS).
if len(set(data)) >= 2:
block_offset = 0
# Loop through the entire block, or until we're pretty sure we've found some valid code in this block
# Loop through the entire block, or until we're pretty sure
# we've found some valid code in this block
while (block_offset < dlen) and (result is None or result.count < self.THRESHOLD):
# Don't pass the entire data block into disasm_lite, it's horribly inefficient
# to pass large strings around in Python. Break it up into smaller code blocks instead.
code_block = binwalk.core.compat.str2bytes(data[block_offset:block_offset+self.disasm_data_size])
# to pass large strings around in Python. Break it up into
# smaller code blocks instead.
code_block = binwalk.core.compat.str2bytes(
data[block_offset:block_offset + self.disasm_data_size])
# If this code block doesn't contain at least two different bytes, skip it
# to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in MIPS).
# to prevent false positives (e.g., "\x00\x00\x00\x00" is a
# nop in MIPS).
if len(set(code_block)) >= 2:
for (md, description) in self.disassemblers:
insns = [insn for insn in md.disasm_lite(code_block, (total_read+block_offset))]
binwalk.core.common.debug("0x%.8X %s, at least %d valid instructions" % ((total_read+block_offset),
insns = [insn for insn in md.disasm_lite(
code_block, (total_read + block_offset))]
binwalk.core.common.debug(
"0x%.8X %s, at least %d valid instructions" % ((total_read + block_offset),
description,
len(insns)))
# Did we disassemble at least self.min_insn_count instructions?
# Did we disassemble at least self.min_insn_count
# instructions?
if len(insns) >= self.min_insn_count:
# If we've already found the same type of code in this block, simply update the result counter
# If we've already found the same type of code
# in this block, simply update the result
# counter
if result and result.description == description:
result.count += 1
if result.count >= self.THRESHOLD:
break
else:
result = ArchResult(offset=total_read+block_offset+fp.offset,
result = ArchResult(
offset=total_read +
block_offset + fp.offset,
description=description,
insns=insns,
count=1)
......@@ -150,7 +168,8 @@ class Disasm(Module):
if r.valid and r.display:
if self.config.verbose:
for (position, size, mnem, opnds) in result.insns:
self.result(offset=position, file=fp, description="%s %s" % (mnem, opnds))
self.result(
offset=position, file=fp, description="%s %s" % (mnem, opnds))
if not self.keep_going:
return
......@@ -162,4 +181,3 @@ class Disasm(Module):
self.header()
self.scan_file(fp)
self.footer()
......@@ -7,6 +7,7 @@ import binwalk.core.common
from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
class Entropy(Module):
XLABEL = 'Offset'
......@@ -33,33 +34,33 @@ class Entropy(Module):
CLI = [
Option(short='E',
long='entropy',
kwargs={'enabled' : True},
kwargs={'enabled': True},
description='Calculate file entropy'),
Option(short='F',
long='fast',
kwargs={'use_zlib' : True},
kwargs={'use_zlib': True},
description='Use faster, but less detailed, entropy analysis'),
Option(short='J',
long='save',
kwargs={'save_plot' : True},
kwargs={'save_plot': True},
description='Save plot as a PNG'),
Option(short='Q',
long='nlegend',
kwargs={'show_legend' : False},
kwargs={'show_legend': False},
description='Omit the legend from the entropy plot graph'),
Option(short='N',
long='nplot',
kwargs={'do_plot' : False},
kwargs={'do_plot': False},
description='Do not generate an entropy plot graph'),
Option(short='H',
long='high',
type=float,
kwargs={'trigger_high' : DEFAULT_TRIGGER_HIGH},
kwargs={'trigger_high': DEFAULT_TRIGGER_HIGH},
description='Set the rising edge entropy trigger threshold (default: %.2f)' % DEFAULT_TRIGGER_HIGH),
Option(short='L',
long='low',
type=float,
kwargs={'trigger_low' : DEFAULT_TRIGGER_LOW},
kwargs={'trigger_low': DEFAULT_TRIGGER_LOW},
description='Set the falling edge entropy trigger threshold (default: %.2f)' % DEFAULT_TRIGGER_LOW),
]
......@@ -75,7 +76,8 @@ class Entropy(Module):
Kwarg(name='block_size', default=0),
]
# Run this module last so that it can process all other module's results and overlay them on the entropy graph
# Run this module last so that it can process all other module's results
# and overlay them on the entropy graph
PRIORITY = 0
def init(self):
......@@ -100,9 +102,11 @@ class Entropy(Module):
if len(description) > self.max_description_length:
self.max_description_length = len(description)
self.file_markers[result.file.name].append((result.offset, description))
self.file_markers[result.file.name].append(
(result.offset, description))
# If other modules have been run and they produced results, don't spam the terminal with entropy results
# If other modules have been run and they produced results, don't spam
# the terminal with entropy results
if self.file_markers:
self.display_results = False
......@@ -127,7 +131,8 @@ class Entropy(Module):
try:
import pyqtgraph as pg
except ImportError as e:
binwalk.core.common.warning("Failed to import pyqtgraph module, visual entropy graphing will be disabled")
binwalk.core.common.warning(
"Failed to import pyqtgraph module, visual entropy graphing will be disabled")
self.do_plot = False
for fp in iter(self.next_file, None):
......@@ -147,19 +152,23 @@ class Entropy(Module):
pg.exit()
def calculate_file_entropy(self, fp):
# Tracks the last displayed rising/falling edge (0 for falling, 1 for rising, None if nothing has been printed yet)
# Tracks the last displayed rising/falling edge (0 for falling, 1 for
# rising, None if nothing has been printed yet)
last_edge = None
# Auto-reset the trigger; if True, an entropy above/below self.trigger_high/self.trigger_low will be printed
# Auto-reset the trigger; if True, an entropy above/below
# self.trigger_high/self.trigger_low will be printed
trigger_reset = True
# Clear results from any previously analyzed files
self.clear(results=True)
# If -K was not specified, calculate the block size to create DEFAULT_DATA_POINTS data points
# If -K was not specified, calculate the block size to create
# DEFAULT_DATA_POINTS data points
if self.block_size is None:
block_size = fp.size / self.DEFAULT_DATA_POINTS
# Round up to the nearest DEFAULT_BLOCK_SIZE (1024)
block_size = int(block_size + ((self.DEFAULT_BLOCK_SIZE - block_size) % self.DEFAULT_BLOCK_SIZE))
block_size = int(
block_size + ((self.DEFAULT_BLOCK_SIZE - block_size) % self.DEFAULT_BLOCK_SIZE))
else:
block_size = self.block_size
......@@ -167,7 +176,8 @@ class Entropy(Module):
if block_size <= 0:
block_size = self.DEFAULT_BLOCK_SIZE
binwalk.core.common.debug("Entropy block size (%d data points): %d" % (self.DEFAULT_DATA_POINTS, block_size))
binwalk.core.common.debug("Entropy block size (%d data points): %d" %
(self.DEFAULT_DATA_POINTS, block_size))
while True:
file_offset = fp.tell()
......@@ -178,7 +188,7 @@ class Entropy(Module):
i = 0
while i < dlen:
entropy = self.algorithm(data[i:i+block_size])
entropy = self.algorithm(data[i:i + block_size])
display = self.display_results
description = "%f" % entropy
......@@ -238,8 +248,10 @@ class Entropy(Module):
Performs an entropy analysis based on zlib compression ratio.
This is faster than the shannon entropy analysis, but not as accurate.
'''
# Entropy is a simple ratio of: <zlib compressed size> / <original size>
e = float(float(len(zlib.compress(str2bytes(data), 9))) / float(len(data)))
# Entropy is a simple ratio of: <zlib compressed size> / <original
# size>
e = float(
float(len(zlib.compress(str2bytes(data), 9))) / float(len(data)))
if truncate and e > 1.0:
e = 1.0
......@@ -267,18 +279,19 @@ class Entropy(Module):
# Disable auto-ranging of the Y (entropy) axis, as it
# can cause some very un-intuitive graphs, particularly
#for files with only high-entropy data.
# for files with only high-entropy data.
plt.setYRange(0, 1)
if self.show_legend and has_key(self.file_markers, fname):
plt.addLegend(size=(self.max_description_length*10, 0))
plt.addLegend(size=(self.max_description_length * 10, 0))
for (offset, description) in self.file_markers[fname]:
# If this description has already been plotted at a different offset, we need to
# use the same color for the marker, but set the description to None to prevent
# duplicate entries in the graph legend.
#
# Else, get the next color and use it to mark descriptions of this type.
# Else, get the next color and use it to mark descriptions of
# this type.
if has_key(plotted_colors, description):
color = plotted_colors[description]
description = None
......@@ -290,7 +303,8 @@ class Entropy(Module):
if i >= len(self.COLORS):
i = 0
plt.plot(x=[offset,offset], y=[0,1.1], name=description, pen=pg.mkPen(color, width=2.5))
plt.plot(x=[offset, offset], y=[0, 1.1],
name=description, pen=pg.mkPen(color, width=2.5))
# Plot data points
plt.plot(x, y, pen='y')
......@@ -300,14 +314,15 @@ class Entropy(Module):
# Save graph to CWD
out_file = os.path.join(os.getcwd(), os.path.basename(fname))
# exporters.ImageExporter is different in different versions of pyqtgraph
# exporters.ImageExporter is different in different versions of
# pyqtgraph
try:
exporter = exporters.ImageExporter(plt.plotItem)
except TypeError:
exporter = exporters.ImageExporter.ImageExporter(plt.plotItem)
exporter.parameters()['width'] = self.FILE_WIDTH
exporter.export(binwalk.core.common.unique_file_name(out_file, self.FILE_FORMAT))
exporter.export(
binwalk.core.common.unique_file_name(out_file, self.FILE_FORMAT))
else:
plt.setLabel('left', self.YLABEL, units=self.YUNITS)
plt.setLabel('bottom', self.XLABEL, units=self.XUNITS)
......@@ -14,13 +14,17 @@ from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
from binwalk.core.common import file_size, file_md5, unique_file_name, BlockFile
class ExtractInfo(object):
def __init__(self):
self.carved = {}
self.extracted = {}
self.directory = None
class Extractor(Module):
'''
Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
'''
......@@ -29,13 +33,14 @@ class Extractor(Module):
RULE_DELIM = ':'
# Comments in the extract.conf files start with a pound
COMMENT_DELIM ='#'
COMMENT_DELIM = '#'
# Place holder for the extracted file name in the command
FILE_NAME_PLACEHOLDER = '%e'
# Unique path delimiter, used for generating unique output file/directory names.
# Useful when, for example, extracting two squashfs images (squashfs-root, squashfs-root-0).
# Useful when, for example, extracting two squashfs images (squashfs-root,
# squashfs-root-0).
UNIQUE_PATH_DELIMITER = '%%'
TITLE = 'Extraction'
......@@ -45,45 +50,45 @@ class Extractor(Module):
CLI = [
Option(short='e',
long='extract',
kwargs={'load_default_rules' : True, 'enabled' : True},
kwargs={'load_default_rules': True, 'enabled': True},
description='Automatically extract known file types'),
Option(short='D',
long='dd',
type=list,
dtype='type:ext:cmd',
kwargs={'manual_rules' : [], 'enabled' : True},
kwargs={'manual_rules': [], 'enabled': True},
description='Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>'),
Option(short='M',
long='matryoshka',
kwargs={'matryoshka' : 8},
kwargs={'matryoshka': 8},
description='Recursively scan extracted files'),
Option(short='d',
long='depth',
type=int,
kwargs={'matryoshka' : 0},
kwargs={'matryoshka': 0},
description='Limit matryoshka recursion depth (default: 8 levels deep)'),
Option(short='C',
long='directory',
type=str,
kwargs={'base_directory' : 0},
kwargs={'base_directory': 0},
description='Extract files/folders to a custom directory (default: current working directory)'),
Option(short='j',
long='size',
type=int,
kwargs={'max_size' : 0},
kwargs={'max_size': 0},
description='Limit the size of each extracted file'),
Option(short='n',
long='count',
type=int,
kwargs={'max_count' : 0},
kwargs={'max_count': 0},
description='Limit the number of extracted files'),
Option(short='r',
long='rm',
kwargs={'remove_after_execute' : True},
kwargs={'remove_after_execute': True},
description='Delete carved files after extraction'),
Option(short='z',
long='carve',
kwargs={'run_extractors' : False},
kwargs={'run_extractors': False},
description="Carve data from files, but don't execute extraction utilities"),
]
......@@ -100,7 +105,8 @@ class Extractor(Module):
]
def load(self):
# Holds a list of extraction rules loaded either from a file or when manually specified.
# Holds a list of extraction rules loaded either from a file or when
# manually specified.
self.extract_rules = []
# The input file specific output directory path (default to CWD)
if self.base_directory:
......@@ -149,21 +155,27 @@ class Extractor(Module):
fp.close()
self.pending.append(f)
except IOError as e:
binwalk.core.common.warning("Ignoring file '%s': %s" % (f, str(e)))
binwalk.core.common.warning(
"Ignoring file '%s': %s" % (f, str(e)))
else:
binwalk.core.common.warning("Ignoring file '%s': Not a regular file" % f)
binwalk.core.common.warning(
"Ignoring file '%s': Not a regular file" % f)
def reset(self):
# Holds a list of pending files that should be scanned; only populated if self.matryoshka == True
# Holds a list of pending files that should be scanned; only populated
# if self.matryoshka == True
self.pending = []
# Holds a dictionary of extraction directories created for each scanned file.
# Holds a dictionary of extraction directories created for each scanned
# file.
self.extraction_directories = {}
# Holds a dictionary of the last directory listing for a given directory; used for identifying
# newly created/extracted files that need to be appended to self.pending.
# newly created/extracted files that need to be appended to
# self.pending.
self.last_directory_listing = {}
def callback(self, r):
# Make sure the file attribute is set to a compatible instance of binwalk.core.common.BlockFile
# Make sure the file attribute is set to a compatible instance of
# binwalk.core.common.BlockFile
try:
r.file.size
except KeyboardInterrupt as e:
......@@ -180,20 +192,25 @@ class Extractor(Module):
# Note that r.display is still True even if --quiet has been specified; it is False if the result has been
# explicitly excluded via the -y/-x options.
if r.valid and r.extract and r.display and (not self.max_count or self.extraction_count < self.max_count):
# Create some extract output for this file, it it doesn't already exist
# Create some extract output for this file, it it doesn't already
# exist
if not binwalk.core.common.has_key(self.output, r.file.path):
self.output[r.file.path] = ExtractInfo()
# Attempt extraction
binwalk.core.common.debug("Extractor callback for %s @%d [%s]" % (r.file.name, r.offset, r.description))
(extraction_directory, dd_file, scan_extracted_files) = self.extract(r.offset, r.description, r.file.path, size, r.name)
binwalk.core.common.debug(
"Extractor callback for %s @%d [%s]" % (r.file.name, r.offset, r.description))
(extraction_directory, dd_file, scan_extracted_files) = self.extract(
r.offset, r.description, r.file.path, size, r.name)
# If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
# If the extraction was successful, self.extract will have returned
# the output directory and name of the dd'd file
if extraction_directory and dd_file:
# Track the number of extracted files
self.extraction_count += 1
# Get the full path to the dd'd file and save it in the output info for this file
# Get the full path to the dd'd file and save it in the output
# info for this file
dd_file_path = os.path.join(extraction_directory, dd_file)
self.output[r.file.path].carved[r.offset] = dd_file_path
self.output[r.file.path].extracted[r.offset] = []
......@@ -202,41 +219,52 @@ class Extractor(Module):
directory_listing = set(os.listdir(extraction_directory))
# If this is a newly created output directory, self.last_directory_listing won't have a record of it.
# If we've extracted other files to this directory before, it will.
# If we've extracted other files to this directory before, it
# will.
if not has_key(self.last_directory_listing, extraction_directory):
self.last_directory_listing[extraction_directory] = set()
# Loop through a list of newly created files (i.e., files that weren't listed in the last directory listing)
# Loop through a list of newly created files (i.e., files that
# weren't listed in the last directory listing)
for f in directory_listing.difference(self.last_directory_listing[extraction_directory]):
# Build the full file path and add it to the extractor results
# Build the full file path and add it to the extractor
# results
file_path = os.path.join(extraction_directory, f)
real_file_path = os.path.realpath(file_path)
self.result(description=file_path, display=False)
# Also keep a list of files created by the extraction utility
# Also keep a list of files created by the extraction
# utility
if real_file_path != dd_file_path:
self.output[r.file.path].extracted[r.offset].append(real_file_path)
self.output[r.file.path].extracted[
r.offset].append(real_file_path)
# If recursion was specified, and the file is not the same one we just dd'd
# If recursion was specified, and the file is not the same
# one we just dd'd
if (self.matryoshka and
file_path != dd_file_path and
scan_extracted_files and
self.directory in real_file_path):
# If the recursion level of this file is less than or equal to our desired recursion level
# If the recursion level of this file is less than or
# equal to our desired recursion level
if len(real_file_path.split(self.directory)[1].split(os.path.sep)) <= self.matryoshka:
# If this is a directory and we are supposed to process directories for this extractor,
# then add all files under that directory to the list of pending files.
# then add all files under that directory to the
# list of pending files.
if os.path.isdir(file_path):
for root, dirs, files in os.walk(file_path):
for f in files:
full_path = os.path.join(root, f)
self.add_pending(full_path)
# If it's just a file, it to the list of pending files
# If it's just a file, it to the list of pending
# files
else:
self.add_pending(file_path)
# Update the last directory listing for the next time we extract a file to this same output directory
self.last_directory_listing[extraction_directory] = directory_listing
# Update the last directory listing for the next time we
# extract a file to this same output directory
self.last_directory_listing[
extraction_directory] = directory_listing
def append_rule(self, r):
self.extract_rules.append(r.copy())
......@@ -258,11 +286,11 @@ class Extractor(Module):
rules = []
match = False
r = {
'extension' : '',
'cmd' : '',
'regex' : None,
'codes' : codes,
'recurse' : recurse,
'extension': '',
'cmd': '',
'regex': None,
'codes': codes,
'recurse': recurse,
}
# Process single explicitly specified rule
......@@ -387,7 +415,8 @@ class Extractor(Module):
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e)))
raise Exception(
"Extractor.load_from_file failed to load file '%s': %s" % (fname, str(e)))
def load_defaults(self):
'''
......@@ -409,7 +438,8 @@ class Extractor(Module):
raise e
except Exception as e:
if binwalk.core.common.DEBUG:
raise Exception("Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e)))
raise Exception(
"Extractor.load_defaults failed to load file '%s': %s" % (extract_file, str(e)))
def get_output_directory_override(self):
'''
......@@ -436,7 +466,8 @@ class Extractor(Module):
Returns None.
'''
# If we have not already created an output directory for this target file, create one now
# If we have not already created an output directory for this target
# file, create one now
if not has_key(self.extraction_directories, path):
basedir = os.path.dirname(path)
basename = os.path.basename(path)
......@@ -459,16 +490,19 @@ class Extractor(Module):
subdir = ""
if self.output_directory_override:
output_directory = os.path.join(self.directory, subdir, self.output_directory_override)
output_directory = os.path.join(
self.directory, subdir, self.output_directory_override)
else:
outdir = os.path.join(self.directory, subdir, '_' + basename)
output_directory = unique_file_name(outdir, extension='extracted')
output_directory = unique_file_name(
outdir, extension='extracted')
if not os.path.exists(output_directory):
os.mkdir(output_directory)
self.extraction_directories[path] = output_directory
self.output[path].directory = os.path.realpath(output_directory) + os.path.sep
self.output[path].directory = os.path.realpath(
output_directory) + os.path.sep
# Else, just use the already created directory
else:
output_directory = self.extraction_directories[path]
......@@ -513,9 +547,11 @@ class Extractor(Module):
if not rules:
return (None, None, False)
else:
binwalk.core.common.debug("Found %d matching extraction rules" % len(rules))
binwalk.core.common.debug(
"Found %d matching extraction rules" % len(rules))
# Generate the output directory name where extracted files will be stored
# Generate the output directory name where extracted files will be
# stored
output_directory = self.build_output_directory(file_name)
# Extract to end of file if no size was specified
......@@ -529,14 +565,16 @@ class Extractor(Module):
for i in range(0, len(rules)):
rule = rules[i]
# Make sure we don't recurse into any extracted directories if instructed not to
# Make sure we don't recurse into any extracted directories if
# instructed not to
if rule['recurse'] in [True, False]:
recurse = rule['recurse']
else:
recurse = True
# Copy out the data to disk, if we haven't already
fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)
fname = self._dd(
file_path, offset, size, rule['extension'], output_file_name=name)
# If there was a command specified for this rule, try to execute it.
# If execution fails, the next rule will be attempted.
......@@ -551,7 +589,8 @@ class Extractor(Module):
# Execute the specified command against the extracted file
if self.run_extractors:
extract_ok = self.execute(rule['cmd'], fname, rule['codes'])
extract_ok = self.execute(
rule['cmd'], fname, rule['codes'])
else:
extract_ok = True
......@@ -572,8 +611,9 @@ class Extractor(Module):
if extract_ok == True:
break
# Else, remove the extracted file if this isn't the last rule in the list.
# If it is the last rule, leave the file on disk for the user to examine.
elif i != (len(rules)-1):
# If it is the last rule, leave the file on disk for the
# user to examine.
elif i != (len(rules) - 1):
try:
os.unlink(fname)
except KeyboardInterrupt as e:
......@@ -642,7 +682,8 @@ class Extractor(Module):
try:
codes[i] = int(codes[i], 0)
except ValueError as e:
binwalk.core.common.warning("The specified return code '%s' for extractor '%s' is not a valid number!" % (codes[i], values[0]))
binwalk.core.common.warning(
"The specified return code '%s' for extractor '%s' is not a valid number!" % (codes[i], values[0]))
values[3] = codes
if len(values) >= 5:
......@@ -672,7 +713,8 @@ class Extractor(Module):
if not output_file_name or output_file_name is None:
bname = default_bname
else:
# Strip the output file name of invalid/dangerous characters (like file paths)
# Strip the output file name of invalid/dangerous characters (like
# file paths)
bname = os.path.basename(output_file_name)
fname = unique_file_name(bname, extension)
......@@ -706,7 +748,7 @@ class Extractor(Module):
if not data:
break
else:
total_size += (dlen-adjust)
total_size += (dlen - adjust)
if total_size > size:
dlen -= (total_size - size)
fdout.write(str2bytes(data[adjust:dlen]))
......@@ -718,9 +760,11 @@ class Extractor(Module):
except KeyboardInterrupt as e:
raise e
except Exception as e:
raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" % (file_name, fname, str(e)))
raise Exception("Extractor.dd failed to extract data from '%s' to '%s': %s" %
(file_name, fname, str(e)))
binwalk.core.common.debug("Carved data block 0x%X - 0x%X from '%s' to '%s'" % (offset, offset+size, file_name, fname))
binwalk.core.common.debug("Carved data block 0x%X - 0x%X from '%s' to '%s'" %
(offset, offset + size, file_name, fname))
return fname
def execute(self, cmd, fname, codes=[0, None]):
......@@ -746,50 +790,60 @@ class Extractor(Module):
except KeyboardInterrupt as e:
raise e
except Exception as e:
binwalk.core.common.warning("Internal extractor '%s' failed with exception: '%s'" % (str(cmd), str(e)))
binwalk.core.common.warning(
"Internal extractor '%s' failed with exception: '%s'" % (str(cmd), str(e)))
elif cmd:
# If not in debug mode, create a temporary file to redirect stdout and stderr to
# If not in debug mode, create a temporary file to redirect
# stdout and stderr to
if not binwalk.core.common.DEBUG:
tmp = tempfile.TemporaryFile()
# Generate unique file paths for all paths in the current command that are surrounded by UNIQUE_PATH_DELIMITER
# Generate unique file paths for all paths in the current
# command that are surrounded by UNIQUE_PATH_DELIMITER
while self.UNIQUE_PATH_DELIMITER in cmd:
need_unique_path = cmd.split(self.UNIQUE_PATH_DELIMITER)[1].split(self.UNIQUE_PATH_DELIMITER)[0]
unique_path = binwalk.core.common.unique_file_name(need_unique_path)
cmd = cmd.replace(self.UNIQUE_PATH_DELIMITER + need_unique_path + self.UNIQUE_PATH_DELIMITER, unique_path)
need_unique_path = cmd.split(self.UNIQUE_PATH_DELIMITER)[
1].split(self.UNIQUE_PATH_DELIMITER)[0]
unique_path = binwalk.core.common.unique_file_name(
need_unique_path)
cmd = cmd.replace(
self.UNIQUE_PATH_DELIMITER + need_unique_path + self.UNIQUE_PATH_DELIMITER, unique_path)
# Execute.
for command in cmd.split("&&"):
# Replace all instances of FILE_NAME_PLACEHOLDER in the command with fname
command = command.strip().replace(self.FILE_NAME_PLACEHOLDER, fname)
# Replace all instances of FILE_NAME_PLACEHOLDER in the
# command with fname
command = command.strip().replace(
self.FILE_NAME_PLACEHOLDER, fname)
binwalk.core.common.debug("subprocess.call(%s, stdout=%s, stderr=%s)" % (command, str(tmp), str(tmp)))
rval = subprocess.call(shlex.split(command), stdout=tmp, stderr=tmp)
binwalk.core.common.debug(
"subprocess.call(%s, stdout=%s, stderr=%s)" % (command, str(tmp), str(tmp)))
rval = subprocess.call(
shlex.split(command), stdout=tmp, stderr=tmp)
if rval in codes:
retval = True
else:
retval = False
binwalk.core.common.debug('External extractor command "%s" completed with return code %d (success: %s)' % (cmd, rval, str(retval)))
binwalk.core.common.debug(
'External extractor command "%s" completed with return code %d (success: %s)' % (cmd, rval, str(retval)))
# TODO: Should errors from all commands in a command string be checked? Currently we only support
# specifying one set of error codes, so at the moment, this is not done; it is up to the
# final command to return success or failure (which presumably it will if previous necessary
# commands were not successful, but this is an assumption).
#if retval == False:
# if retval == False:
# break
except KeyboardInterrupt as e:
raise e
except Exception as e:
binwalk.core.common.warning("Extractor.execute failed to run external extractor '%s': %s, '%s' might not be installed correctly" % (str(cmd), str(e), str(cmd)))
binwalk.core.common.warning(
"Extractor.execute failed to run external extractor '%s': %s, '%s' might not be installed correctly" % (str(cmd), str(e), str(cmd)))
retval = None
if tmp is not None:
tmp.close()
return retval
# Module to process general user input options (scan length, starting offset, etc).
# Module to process general user input options (scan length, starting
# offset, etc).
import io
import os
......@@ -12,6 +13,7 @@ import binwalk.core.settings
from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg, show_help
class General(Module):
TITLE = "General"
......@@ -23,77 +25,77 @@ class General(Module):
Option(long='length',
short='l',
type=int,
kwargs={'length' : 0},
kwargs={'length': 0},
description='Number of bytes to scan'),
Option(long='offset',
short='o',
type=int,
kwargs={'offset' : 0},
kwargs={'offset': 0},
description='Start scan at this file offset'),
Option(long='base',
short='O',
type=int,
kwargs={'base' : 0},
kwargs={'base': 0},
description='Add a base address to all printed offsets'),
Option(long='block',
short='K',
type=int,
kwargs={'block' : 0},
kwargs={'block': 0},
description='Set file block size'),
Option(long='swap',
short='g',
type=int,
kwargs={'swap_size' : 0},
kwargs={'swap_size': 0},
description='Reverse every n bytes before scanning'),
Option(long='log',
short='f',
type=argparse.FileType,
kwargs={'log_file' : None},
kwargs={'log_file': None},
description='Log results to file'),
Option(long='csv',
short='c',
kwargs={'csv' : True},
kwargs={'csv': True},
description='Log results to file in CSV format'),
Option(long='term',
short='t',
kwargs={'format_to_terminal' : True},
kwargs={'format_to_terminal': True},
description='Format output to fit the terminal window'),
Option(long='quiet',
short='q',
kwargs={'quiet' : True},
kwargs={'quiet': True},
description='Suppress output to stdout'),
Option(long='verbose',
short='v',
kwargs={'verbose' : True},
kwargs={'verbose': True},
description='Enable verbose output'),
Option(short='h',
long='help',
kwargs={'show_help' : True},
kwargs={'show_help': True},
description='Show help output'),
Option(short='a',
long='finclude',
type=str,
kwargs={'file_name_include_regex' : ""},
kwargs={'file_name_include_regex': ""},
description='Only scan files whose names match this regex'),
Option(short='p',
long='fexclude',
type=str,
kwargs={'file_name_exclude_regex' : ""},
kwargs={'file_name_exclude_regex': ""},
description='Do not scan files whose names match this regex'),
Option(short='s',
long='status',
type=int,
kwargs={'status_server_port' : 0},
kwargs={'status_server_port': 0},
description='Enable the status server on the specified port'),
Option(long=None,
short=None,
type=binwalk.core.common.BlockFile,
kwargs={'files' : []}),
kwargs={'files': []}),
# Hidden, API-only arguments
Option(long="string",
hidden=True,
kwargs={'subclass' : binwalk.core.common.StringFile}),
kwargs={'subclass': binwalk.core.common.StringFile}),
]
KWARGS = [
......@@ -132,9 +134,11 @@ class General(Module):
# Build file name filter regex rules
if self.file_name_include_regex:
self.file_name_include_regex = re.compile(self.file_name_include_regex)
self.file_name_include_regex = re.compile(
self.file_name_include_regex)
if self.file_name_exclude_regex:
self.file_name_exclude_regex = re.compile(self.file_name_exclude_regex)
self.file_name_exclude_regex = re.compile(
self.file_name_exclude_regex)
self.settings = binwalk.core.settings.Settings()
self.display = binwalk.core.display.Display(log=self.log_file,
......@@ -160,7 +164,8 @@ class General(Module):
Must be called after self._test_target_files so that self.target_files is properly set.
'''
# If more than one target file was specified, enable verbose mode; else, there is
# nothing in some outputs to indicate which scan corresponds to which file.
# nothing in some outputs to indicate which scan corresponds to which
# file.
if len(self.target_files) > 1 and not self.verbose:
self.verbose = True
......@@ -217,4 +222,3 @@ class General(Module):
raise e
except Exception as e:
self.error(description="Cannot open file : %s" % str(e))
......@@ -2,7 +2,8 @@
# Unlike other scans, this doesn't produce any file offsets, so its results are not applicable to
# some other scans, such as the entropy scan.
# Additionally, this module currently doesn't support certian general options (length, offset, swap, etc),
# as the libfuzzy C library is responsible for opening and scanning the specified files.
# as the libfuzzy C library is responsible for opening and scanning the
# specified files.
import os
import re
......@@ -13,7 +14,9 @@ import binwalk.core.common
from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
class HashResult(object):
'''
Class for storing libfuzzy hash results.
For internal use only.
......@@ -24,7 +27,9 @@ class HashResult(object):
self.hash = hash
self.strings = strings
class HashMatch(Module):
'''
Class for fuzzy hash matching of files and directories.
'''
......@@ -36,33 +41,33 @@ class HashMatch(Module):
CLI = [
Option(short='F',
long='fuzzy',
kwargs={'enabled' : True},
kwargs={'enabled': True},
description='Perform fuzzy hash matching on files/directories'),
Option(short='u',
long='cutoff',
priority=100,
type=int,
kwargs={'cutoff' : DEFAULT_CUTOFF},
kwargs={'cutoff': DEFAULT_CUTOFF},
description='Set the cutoff percentage'),
Option(short='S',
long='strings',
kwargs={'strings' : True},
kwargs={'strings': True},
description='Diff strings inside files instead of the entire file'),
Option(short='s',
long='same',
kwargs={'same' : True, 'cutoff' : CONSERVATIVE_CUTOFF},
kwargs={'same': True, 'cutoff': CONSERVATIVE_CUTOFF},
description='Only show files that are the same'),
Option(short='p',
long='diff',
kwargs={'same' : False, 'cutoff' : CONSERVATIVE_CUTOFF},
kwargs={'same': False, 'cutoff': CONSERVATIVE_CUTOFF},
description='Only show files that are different'),
Option(short='n',
long='name',
kwargs={'filter_by_name' : True},
kwargs={'filter_by_name': True},
description='Only compare files whose base names are the same'),
Option(short='L',
long='symlinks',
kwargs={'symlinks' : True},
kwargs={'symlinks': True},
description="Don't ignore symlinks"),
]
......@@ -87,7 +92,8 @@ class HashMatch(Module):
# Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html)
FUZZY_MAX_RESULT = 150
# Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h)
# Files smaller than this won't produce meaningful fuzzy results (from
# ssdeep.h)
FUZZY_MIN_FILE_SIZE = 4096
HEADER_FORMAT = "\n%s" + " " * 11 + "%s\n"
......@@ -100,7 +106,8 @@ class HashMatch(Module):
self.last_file1 = HashResult(None)
self.last_file2 = HashResult(None)
self.lib = binwalk.core.C.Library(self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS)
self.lib = binwalk.core.C.Library(
self.LIBRARY_NAME, self.LIBRARY_FUNCTIONS)
def _get_strings(self, fname):
return ''.join(list(binwalk.core.common.strings(fname, minimum=10)))
......@@ -137,7 +144,8 @@ class HashMatch(Module):
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
# Check if the last file1 or file2 matches this file1 or file2; no need to re-hash if they match.
# Check if the last file1 or file2 matches this file1 or file2;
# no need to re-hash if they match.
if file1 == self.last_file1.name and self.last_file1.hash:
file1_dup = True
else:
......@@ -153,12 +161,14 @@ class HashMatch(Module):
if file1_dup:
file1_strings = self.last_file1.strings
else:
self.last_file1.strings = file1_strings = self._get_strings(file1)
self.last_file1.strings = file1_strings = self._get_strings(
file1)
if file2_dup:
file2_strings = self.last_file2.strings
else:
self.last_file2.strings = file2_strings = self._get_strings(file2)
self.last_file2.strings = file2_strings = self._get_strings(
file2)
if file1_strings == file2_strings:
return 100
......@@ -166,23 +176,27 @@ class HashMatch(Module):
if file1_dup:
hash1 = self.last_file1.hash
else:
status |= self.lib.fuzzy_hash_buf(file1_strings, len(file1_strings), hash1)
status |= self.lib.fuzzy_hash_buf(
file1_strings, len(file1_strings), hash1)
if file2_dup:
hash2 = self.last_file2.hash
else:
status |= self.lib.fuzzy_hash_buf(file2_strings, len(file2_strings), hash2)
status |= self.lib.fuzzy_hash_buf(
file2_strings, len(file2_strings), hash2)
else:
if file1_dup:
hash1 = self.last_file1.hash
else:
status |= self.lib.fuzzy_hash_filename(file1, hash1)
status |= self.lib.fuzzy_hash_filename(
file1, hash1)
if file2_dup:
hash2 = self.last_file2.hash
else:
status |= self.lib.fuzzy_hash_filename(file2, hash2)
status |= self.lib.fuzzy_hash_filename(
file2, hash2)
if status == 0:
if not file1_dup:
......@@ -195,7 +209,8 @@ class HashMatch(Module):
else:
return self.lib.fuzzy_compare(hash1, hash2)
except Exception as e:
binwalk.core.common.warning("Exception while doing fuzzy hash: %s" % str(e))
binwalk.core.common.warning(
"Exception while doing fuzzy hash: %s" % str(e))
return None
......@@ -216,15 +231,18 @@ class HashMatch(Module):
'''
file_list = []
# Normalize directory path so that we can exclude it from each individual file path
# Normalize directory path so that we can exclude it from each
# individual file path
directory = os.path.abspath(directory) + os.path.sep
for (root, dirs, files) in os.walk(directory):
# Don't include the root directory in the file paths
root = ''.join(root.split(directory, 1)[1:])
# Get a list of files, with or without symlinks as specified during __init__
files = [os.path.join(root, f) for f in files if self.symlinks or not os.path.islink(f)]
# Get a list of files, with or without symlinks as specified during
# __init__
files = [os.path.join(root, f)
for f in files if self.symlinks or not os.path.islink(f)]
file_list += files
......
# Routines to perform Chi Squared tests.
# Used for fingerprinting unknown areas of high entropy (e.g., is this block of high entropy data compressed or encrypted?).
# Inspired by people who actually know what they're doing: http://www.fourmilab.ch/random/
# Inspired by people who actually know what they're doing:
# http://www.fourmilab.ch/random/
import math
from binwalk.core.compat import *
from binwalk.core.module import Module, Kwarg, Option, Dependency
class ChiSquare(object):
'''
Performs a Chi Squared test against the provided data.
'''
......@@ -22,7 +25,8 @@ class ChiSquare(object):
self.bytes = {}
self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
# Initialize the self.bytes dictionary with keys for all possible byte
# values (0 - 255)
for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0
......@@ -59,20 +63,23 @@ class ChiSquare(object):
if expected:
for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected
self.xc2 += ((byte - expected) ** 2) / expected
return self.xc2
class EntropyBlock(object):
def __init__(self, **kwargs):
self.start = None
self.end = None
self.length = None
for (k,v) in iterator(kwargs):
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class HeuristicCompressionAnalyzer(Module):
'''
Performs analysis and attempts to interpret the results.
'''
......@@ -89,17 +96,18 @@ class HeuristicCompressionAnalyzer(Module):
DEPENDS = [
Dependency(name='Entropy',
attribute='entropy',
kwargs={'enabled' : True, 'do_plot' : False, 'display_results' : False, 'block_size' : ENTROPY_BLOCK_SIZE}),
kwargs={
'enabled': True, 'do_plot': False, 'display_results': False, 'block_size': ENTROPY_BLOCK_SIZE}),
]
CLI = [
Option(short='H',
long='heuristic',
kwargs={'enabled' : True},
kwargs={'enabled': True},
description='Heuristically classify high entropy data'),
Option(short='a',
long='trigger',
kwargs={'trigger_level' : 0},
kwargs={'trigger_level': 0},
type=float,
description='Set the entropy trigger level (0.0 - 1.0, default: %.2f)' % ENTROPY_TRIGGER),
]
......@@ -130,9 +138,11 @@ class HeuristicCompressionAnalyzer(Module):
self.blocks[result.file.name] = []
if result.entropy >= self.trigger_level and (not self.blocks[result.file.name] or self.blocks[result.file.name][-1].end is not None):
self.blocks[result.file.name].append(EntropyBlock(start=result.offset + self.BLOCK_OFFSET))
self.blocks[result.file.name].append(
EntropyBlock(start=result.offset + self.BLOCK_OFFSET))
elif result.entropy < self.trigger_level and self.blocks[result.file.name] and self.blocks[result.file.name][-1].end is None:
self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET
self.blocks[result.file.name][
-1].end = result.offset - self.BLOCK_OFFSET
def run(self):
for fp in iter(self.next_file, None):
......@@ -173,7 +183,7 @@ class HeuristicCompressionAnalyzer(Module):
while j < dlen:
chi.reset()
data = d[j:j+self.block_size]
data = d[j:j + self.block_size]
if len(data) < self.block_size:
break
......@@ -194,5 +204,6 @@ class HeuristicCompressionAnalyzer(Module):
else:
verdict = 'High entropy data, best guess: encrypted'
desc = '%s, size: %d, %d low entropy blocks' % (verdict, block.length, num_error)
desc = '%s, size: %d, %d low entropy blocks' % (
verdict, block.length, num_error)
self.result(offset=block.start, description=desc, file=fp)
......@@ -5,13 +5,13 @@ import binwalk.core.common as common
from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
class HexDiff(Module):
class HexDiff(Module):
COLORS = {
'red' : '31',
'green' : '32',
'blue' : '34',
'red': '31',
'green': '32',
'blue': '34',
}
SEPERATORS = ['\\', '/']
......@@ -25,23 +25,23 @@ class HexDiff(Module):
CLI = [
Option(short='W',
long='hexdump',
kwargs={'enabled' : True},
kwargs={'enabled': True},
description='Perform a hexdump / diff of a file or files'),
Option(short='G',
long='green',
kwargs={'show_green' : True},
kwargs={'show_green': True},
description='Only show lines containing bytes that are the same among all files'),
Option(short='i',
long='red',
kwargs={'show_red' : True},
kwargs={'show_red': True},
description='Only show lines containing bytes that are different among all files'),
Option(short='U',
long='blue',
kwargs={'show_blue' : True},
kwargs={'show_blue': True},
description='Only show lines containing bytes that are different among some files'),
Option(short='w',
long='terse',
kwargs={'terse' : True},
kwargs={'terse': True},
description='Diff all files, but only display a hex dump of the first file'),
]
......@@ -98,7 +98,7 @@ class HexDiff(Module):
except IndexError as e:
diff_count += 1
if diff_count == len(target_data)-1:
if diff_count == len(target_data) - 1:
color = "red"
elif diff_count > 0:
color = "blue"
......@@ -149,7 +149,8 @@ class HexDiff(Module):
hexbyte = "XX"
asciibyte = "."
else:
(hexbyte, asciibyte) = self.hexascii(block_data, block_data[fp][i], i)
(hexbyte, asciibyte) = self.hexascii(
block_data, block_data[fp][i], i)
hexline += "%s " % hexbyte
asciiline += "%s" % asciibyte
......@@ -178,11 +179,13 @@ class HexDiff(Module):
self.status.completed += self.block
def init(self):
# To mimic expected behavior, if all options are False, we show everything
# To mimic expected behavior, if all options are False, we show
# everything
if not any([self.show_red, self.show_green, self.show_blue]):
self.show_red = self.show_green = self.show_blue = True
# Always disable terminal formatting, as it won't work properly with colorized output
# Always disable terminal formatting, as it won't work properly with
# colorized output
self.config.display.fit_to_screen = False
# Set the block size (aka, hexdump line size)
......@@ -205,7 +208,8 @@ class HexDiff(Module):
file_count = 1
else:
file_count = len(self.hex_target_files)
self.HEADER_FORMAT = "OFFSET " + (("%%-%ds " % header_width) * file_count) + "\n"
self.HEADER_FORMAT = "OFFSET " + \
(("%%-%ds " % header_width) * file_count) + "\n"
# Build the header argument list
self.HEADER = [fp.name for fp in self.hex_target_files]
......@@ -225,4 +229,3 @@ class HexDiff(Module):
self.header()
self.diff_files(self.hex_target_files)
self.footer()
# Basic signature scan module. This is the default (and primary) feature of binwalk.
# Basic signature scan module. This is the default (and primary) feature
# of binwalk.
import binwalk.core.magic
from binwalk.core.module import Module, Option, Kwarg
class Signature(Module):
TITLE = "Signature Scan"
......@@ -10,41 +12,41 @@ class Signature(Module):
CLI = [
Option(short='B',
long='signature',
kwargs={'enabled' : True, 'explicit_signature_scan' : True},
kwargs={'enabled': True, 'explicit_signature_scan': True},
description='Scan target file(s) for common file signatures'),
Option(short='R',
long='raw',
kwargs={'enabled' : True, 'raw_bytes' : []},
kwargs={'enabled': True, 'raw_bytes': []},
type=list,
dtype=str.__name__,
description='Scan target file(s) for the specified sequence of bytes'),
Option(short='A',
long='opcodes',
kwargs={'enabled' : True, 'search_for_opcodes' : True},
kwargs={'enabled': True, 'search_for_opcodes': True},
description='Scan target file(s) for common executable opcode signatures'),
Option(short='m',
long='magic',
kwargs={'enabled' : True, 'magic_files' : []},
kwargs={'enabled': True, 'magic_files': []},
type=list,
dtype='file',
description='Specify a custom magic file to use'),
Option(short='b',
long='dumb',
kwargs={'dumb_scan' : True},
kwargs={'dumb_scan': True},
description='Disable smart signature keywords'),
Option(short='I',
long='invalid',
kwargs={'show_invalid' : True},
kwargs={'show_invalid': True},
description='Show results marked as invalid'),
Option(short='x',
long='exclude',
kwargs={'exclude_filters' : []},
kwargs={'exclude_filters': []},
type=list,
dtype=str.__name__,
description='Exclude results that match <str>'),
Option(short='y',
long='include',
kwargs={'include_filters' : []},
kwargs={'include_filters': []},
type=list,
dtype=str.__name__,
description='Only show results that match <str>'),
......@@ -67,16 +69,19 @@ class Signature(Module):
def init(self):
self.one_of_many = None
# Append the user's magic file first so that those signatures take precedence
# Append the user's magic file first so that those signatures take
# precedence
if self.search_for_opcodes:
self.magic_files = [
self.config.settings.user.binarch,
self.config.settings.system.binarch,
]
# Use the system default magic file if no other was specified, or if -B was explicitly specified
# Use the system default magic file if no other was specified, or if -B
# was explicitly specified
if (not self.magic_files and not self.raw_bytes) or self.explicit_signature_scan:
self.magic_files += self.config.settings.user.magic + self.config.settings.system.magic
self.magic_files += self.config.settings.user.magic + \
self.config.settings.system.magic
# Initialize libmagic
self.magic = binwalk.core.magic.Magic(include=self.include_filters,
......@@ -87,13 +92,16 @@ class Signature(Module):
if self.raw_bytes:
raw_signatures = []
for raw_bytes in self.raw_bytes:
raw_signatures.append("0 string %s %s" % (raw_bytes, raw_bytes))
binwalk.core.common.debug("Parsing raw signatures: %s" % str(raw_signatures))
raw_signatures.append(
"0 string %s %s" % (raw_bytes, raw_bytes))
binwalk.core.common.debug(
"Parsing raw signatures: %s" % str(raw_signatures))
self.magic.parse(raw_signatures)
# Parse the magic file(s)
if self.magic_files:
binwalk.core.common.debug("Loading magic files: %s" % str(self.magic_files))
binwalk.core.common.debug(
"Loading magic files: %s" % str(self.magic_files))
for f in self.magic_files:
self.magic.load(f)
......@@ -116,7 +124,8 @@ class Signature(Module):
r.valid = False
if r.valid:
# Don't keep displaying signatures that repeat a bunch of times (e.g., JFFS2 nodes)
# Don't keep displaying signatures that repeat a bunch of times
# (e.g., JFFS2 nodes)
if r.id == self.one_of_many:
r.display = False
elif r.many:
......@@ -156,14 +165,17 @@ class Signature(Module):
r.file = fp
# Register the result for futher processing/display
# self.result automatically calls self.validate for result validation
# self.result automatically calls self.validate for result
# validation
self.result(r=r)
# Is this a valid result and did it specify a jump-to-offset keyword, and are we doing a "smart" scan?
# Is this a valid result and did it specify a jump-to-offset
# keyword, and are we doing a "smart" scan?
if r.valid and r.jump > 0 and not self.dumb_scan:
absolute_jump_offset = r.offset + r.jump
current_block_offset = relative_offset + r.jump
#print ("Jumping to: 0x%X (0x%X)..." % (absolute_jump_offset, current_block_offset))
# print ("Jumping to: 0x%X (0x%X)..." %
# (absolute_jump_offset, current_block_offset))
# If the jump-to-offset is beyond the confines of the current block, seek the file to
# that offset and quit processing this block of data.
......@@ -176,4 +188,3 @@ class Signature(Module):
self.header()
self.scan_file(fp)
self.footer()
......@@ -2,7 +2,9 @@ import os
import binwalk.core.common
import binwalk.core.plugin
class ArcadyanDeobfuscator(binwalk.core.plugin.Plugin):
'''
Deobfuscator for known Arcadyan firmware obfuscation(s).
'''
......@@ -13,7 +15,8 @@ class ArcadyanDeobfuscator(binwalk.core.plugin.Plugin):
BLOCK_SIZE = 32
BLOCK1_OFFSET = 4
BLOCK2_OFFSET = 0x68
MIN_FILE_SIZE = (OBFUSCATION_MAGIC_SIZE + BLOCK2_OFFSET + BLOCK_SIZE)
MIN_FILE_SIZE = (
OBFUSCATION_MAGIC_SIZE + BLOCK2_OFFSET + BLOCK_SIZE)
BLOCK1_START = BLOCK1_OFFSET
BLOCK1_END = BLOCK1_START + BLOCK_SIZE
......@@ -31,7 +34,8 @@ class ArcadyanDeobfuscator(binwalk.core.plugin.Plugin):
def init(self):
if self.module.extractor.enabled:
self.module.extractor.add_rule(regex="^obfuscated arcadyan firmware",
self.module.extractor.add_rule(
regex="^obfuscated arcadyan firmware",
extension="obfuscated",
cmd=self.extractor)
......@@ -55,22 +59,25 @@ class ArcadyanDeobfuscator(binwalk.core.plugin.Plugin):
# Nibble-swap each byte in block 1
nswap = ''
for i in range(self.BLOCK1_START, self.BLOCK1_END):
nswap += chr(((ord(deobfuscated[i]) & 0x0F) << 4) + ((ord(deobfuscated[i]) & 0xF0) >> 4));
deobfuscated = deobfuscated[self.P1_START:self.P1_END] + nswap + deobfuscated[self.BLOCK1_END:]
nswap += chr(((ord(deobfuscated[i]) & 0x0F) << 4) + (
(ord(deobfuscated[i]) & 0xF0) >> 4))
deobfuscated = deobfuscated[
self.P1_START:self.P1_END] + nswap + deobfuscated[self.BLOCK1_END:]
# Byte-swap each byte pair in block 1
bswap = ''
i = self.BLOCK1_START
while i < self.BLOCK1_END:
bswap += deobfuscated[i+1] + deobfuscated[i]
bswap += deobfuscated[i + 1] + deobfuscated[i]
i += 2
deobfuscated = deobfuscated[self.P1_START:self.P1_END] + bswap + deobfuscated[self.BLOCK1_END:]
deobfuscated = deobfuscated[
self.P1_START:self.P1_END] + bswap + deobfuscated[self.BLOCK1_END:]
if deobfuscated:
out = binwalk.core.common.BlockFile((os.path.splitext(fname)[0] + '.deobfuscated'), "wb")
out = binwalk.core.common.BlockFile(
(os.path.splitext(fname)[0] + '.deobfuscated'), "wb")
out.write(deobfuscated)
out.close()
return True
else:
return False
#import binwalk.core.C
# import binwalk.core.C
import binwalk.core.plugin
#from binwalk.core.common import *
# from binwalk.core.common import *
class CompressdPlugin(binwalk.core.plugin.Plugin):
# '''
......@@ -9,25 +10,25 @@ class CompressdPlugin(binwalk.core.plugin.Plugin):
MODULES = ['Signature']
#READ_SIZE = 64
# READ_SIZE = 64
#COMPRESS42 = "compress42"
#COMPRESS42_FUNCTIONS = [
# COMPRESS42 = "compress42"
# COMPRESS42_FUNCTIONS = [
# binwalk.core.C.Function(name="is_compressed", type=bool),
#]
#comp = None
# comp = None
#def init(self):
#self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS)
# def init(self):
# self.comp = binwalk.core.C.Library(self.COMPRESS42, self.COMPRESS42_FUNCTIONS)
# This plugin is currently disabled due to the need to move away from supporting C
# libraries and into a pure Python project, for cross-platform support and ease of
# installation / package maintenance. A Python implementation will likely need to
# be custom developed in the future, but for now, since this compression format is
# not very common, especially in firmware, simply disable it.
#self.comp = None
# self.comp = None
#def scan(self, result):
# def scan(self, result):
# if self.comp and result.file and result.description.lower().startswith("compress'd data"):
# fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.READ_SIZE)
# compressed_data = fd.read(self.READ_SIZE)
......@@ -35,5 +36,3 @@ class CompressdPlugin(binwalk.core.plugin.Plugin):
# if not self.comp.is_compressed(compressed_data, len(compressed_data)):
# result.valid = False
......@@ -2,7 +2,9 @@ import os
import subprocess
import binwalk.core.plugin
class CPIOPlugin(binwalk.core.plugin.Plugin):
'''
Ensures that ASCII CPIO archive entries only get extracted once.
Also provides an internal CPIO extraction wrapper around the Unix
......@@ -40,7 +42,8 @@ class CPIOPlugin(binwalk.core.plugin.Plugin):
return
try:
result = subprocess.call(['cpio', '-d', '-i', '--no-absolute-filenames'],
result = subprocess.call(
['cpio', '-d', '-i', '--no-absolute-filenames'],
stdin=fpin,
stderr=fperr,
stdout=fperr)
......@@ -70,7 +73,8 @@ class CPIOPlugin(binwalk.core.plugin.Plugin):
def _get_file_name_length(self, description):
length = 0
if 'file name length: "' in description:
length_string = description.split('file name length: "')[1].split('"')[0]
length_string = description.split(
'file name length: "')[1].split('"')[0]
length = int(length_string, 0)
return length
......@@ -78,12 +82,14 @@ class CPIOPlugin(binwalk.core.plugin.Plugin):
if result.valid:
# ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'.
# Displaying each entry is useful, as it shows what files are contained in the archive,
# but we only want to extract the archive when the first entry is found.
# but we only want to extract the archive when the first entry is
# found.
if result.description.startswith('ASCII cpio archive'):
# Validate the reported name length
file_name = self._get_file_name(result.description)
file_name_length = self._get_file_name_length(result.description)
file_name_length = self._get_file_name_length(
result.description)
if len(file_name) != file_name_length:
result.valid = False
return
......@@ -91,7 +97,8 @@ class CPIOPlugin(binwalk.core.plugin.Plugin):
self.consecutive_hits += 1
if not self.found_archive or self.found_archive_in_file != result.file.name:
# This is the first entry. Set found_archive and allow the scan to continue normally.
# This is the first entry. Set found_archive and allow the
# scan to continue normally.
self.found_archive_in_file = result.file.name
self.found_archive = True
result.extract = True
......@@ -113,5 +120,6 @@ class CPIOPlugin(binwalk.core.plugin.Plugin):
self.consecutive_hits = 0
elif self.consecutive_hits >= 4:
# Ignore other stuff until the end of CPIO is found
# TODO: It would be better to jump to the end of this CPIO entry rather than make this assumption...
# TODO: It would be better to jump to the end of this CPIO
# entry rather than make this assumption...
result.valid = False
......@@ -7,21 +7,23 @@ try:
except ImportError as e:
pass
class RomFSCommon(object):
def _read_next_word(self):
value = struct.unpack("%sL" % self.endianess, self.data[self.index:self.index+4])[0]
value = struct.unpack(
"%sL" % self.endianess, self.data[self.index:self.index + 4])[0]
self.index += 4
return value
def _read_next_uid(self):
uid = int(self.data[self.index:self.index+4])
uid = int(self.data[self.index:self.index + 4])
self.index += 4
return uid
def _read_next_block(self, size):
size = int(size)
data = self.data[self.index:self.index+size]
data = self.data[self.index:self.index + size]
self.index += size
return data
......@@ -41,6 +43,7 @@ class RomFSCommon(object):
self.index += 1
return data
class RomFSEntry(RomFSCommon):
DIR_STRUCT_MASK = 0x00000001
......@@ -61,6 +64,7 @@ class RomFSEntry(RomFSCommon):
self.unknown5 = self._read_next_word()
self.uid = self._read_next_uid()
class RomFSDirStruct(RomFSCommon):
SIZE = 0x20
......@@ -94,17 +98,20 @@ class RomFSDirStruct(RomFSCommon):
if count == 0:
mod = self.SIZE - total_size
else:
mod = self.SIZE - int(total_size - (count*self.SIZE))
mod = self.SIZE - int(total_size - (count * self.SIZE))
if mod > 0:
remainder = self._read_next_block(mod)
yield (uid, entry)
class FileContainer(object):
def __init__(self):
pass
class RomFS(object):
SUPERBLOCK_SIZE = 0x20
......@@ -145,7 +152,8 @@ class RomFS(object):
while True:
try:
entry = RomFSEntry(self.data[offset:offset+self.FILE_ENTRY_SIZE], endianess=self.endianess)
entry = RomFSEntry(
self.data[offset:offset + self.FILE_ENTRY_SIZE], endianess=self.endianess)
except ValueError as e:
break
......@@ -160,7 +168,8 @@ class RomFS(object):
if entry.type & entry.DIR_STRUCT_MASK:
entries[entry.uid].type = "directory"
ds = RomFSDirStruct(self.data[entry.offset:entry.offset+entry.size], endianess=self.endianess)
ds = RomFSDirStruct(
self.data[entry.offset:entry.offset + entry.size], endianess=self.endianess)
for (uid, name) in ds.ls:
if not uid in entries:
entries[uid] = FileContainer()
......@@ -184,7 +193,9 @@ if __name__ == '__main__':
print ("Usage: %s <input file> <output directory>" % sys.argv[0])
sys.exit(1)
class DlinkROMFSExtractPlugin(binwalk.core.plugin.Plugin):
'''
Gzip extractor plugin.
'''
......@@ -193,7 +204,8 @@ class DlinkROMFSExtractPlugin(binwalk.core.plugin.Plugin):
def init(self):
# If the extractor is enabled for the module we're currently loaded
# into, then register self.extractor as a D-Link ROMFS file system extraction rule.
# into, then register self.extractor as a D-Link ROMFS file system
# extraction rule.
if self.module.extractor.enabled:
self.module.extractor.add_rule(txtrule=None,
regex="^d-link romfs filesystem",
......
......@@ -2,7 +2,9 @@ import os
import gzip
import binwalk.core.plugin
class GzipExtractPlugin(binwalk.core.plugin.Plugin):
'''
Gzip extractor plugin.
'''
......
......@@ -3,7 +3,9 @@ import binwalk.core.compat
import binwalk.core.plugin
from binwalk.core.common import BlockFile
class GzipValidPlugin(binwalk.core.plugin.Plugin):
'''
Validates gzip compressed data. Almost identical to zlibvalid.py.
'''
......@@ -15,7 +17,8 @@ class GzipValidPlugin(binwalk.core.plugin.Plugin):
# If this result is a gzip signature match, try to decompress the data
if result.file and result.description.lower().startswith('gzip'):
# Seek to and read the suspected gzip data
fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
fd = self.module.config.open_file(
result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
data = fd.read(self.MAX_DATA_SIZE)
fd.close()
......@@ -40,8 +43,7 @@ class GzipValidPlugin(binwalk.core.plugin.Plugin):
except zlib.error as e:
error = str(e)
# Truncated input data results in error -5.
# gzip uses different checksums than zlib, which results in error -3.
# gzip uses different checksums than zlib, which results in
# error -3.
if not error.startswith("Error -5") and not error.startswith("Error -3"):
result.valid = False
......@@ -13,6 +13,7 @@ except ImportError as e:
class HilinkDecryptor(binwalk.core.plugin.Plugin):
'''
Plugin to decrypt, validate, and extract Hilink encrypted firmware.
'''
......@@ -28,12 +29,13 @@ class HilinkDecryptor(binwalk.core.plugin.Plugin):
self.enabled = True
if self.enabled is True and self.module.extractor.enabled is True:
# Add an extraction rule for encrypted Hilink firmware signature results
self.module.extractor.add_rule(regex="^%s" % self.SIGNATURE_DESCRIPTION,
# Add an extraction rule for encrypted Hilink firmware signature
# results
self.module.extractor.add_rule(
regex="^%s" % self.SIGNATURE_DESCRIPTION,
extension="enc",
cmd=self._decrypt_and_extract)
def _decrypt_and_extract(self, fname):
'''
This does the extraction (e.g., it decrypts the image and writes it to a new file on disk).
......@@ -68,25 +70,31 @@ class HilinkDecryptor(binwalk.core.plugin.Plugin):
if self.enabled is True:
if result.valid is True:
if result.description.lower().startswith(self.SIGNATURE_DESCRIPTION) is True:
# Read in the first 64 bytes of the suspected encrypted uImage header
fd = self.module.config.open_file(result.file.name, offset=result.offset)
encrypted_header_data = binwalk.core.compat.str2bytes(fd.read(64))
# Read in the first 64 bytes of the suspected encrypted
# uImage header
fd = self.module.config.open_file(
result.file.name, offset=result.offset)
encrypted_header_data = binwalk.core.compat.str2bytes(
fd.read(64))
fd.close()
# Decrypt the header
decrypted_header_data = self._hilink_decrypt(encrypted_header_data)
decrypted_header_data = self._hilink_decrypt(
encrypted_header_data)
# Pull out the image size and image name fields from the decrypted uImage header
# and add them to the printed description.
result.size = struct.unpack(b">L", decrypted_header_data[12:16])[0]
result.size = struct.unpack(
b">L", decrypted_header_data[12:16])[0]
result.description += ", size: %d" % (result.size)
# NOTE: The description field should be 32 bytes? Hilink seems to use only 24 bytes for this field,
# even though the header size is still 64 bytes?
result.description += ', image name: "%s"' % binwalk.core.compat.bytes2str(decrypted_header_data[32:56]).strip("\x00")
result.description += ', image name: "%s"' % binwalk.core.compat.bytes2str(
decrypted_header_data[32:56]).strip("\x00")
# Do some basic validation on the decrypted size and image name fields
# Do some basic validation on the decrypted size and image
# name fields
if result.size > (result.file.size - result.offset):
result.valid = False
if not all(c in string.printable for c in result.description):
result.valid = False
......@@ -2,7 +2,9 @@ import struct
import binascii
import binwalk.core.plugin
class JFFS2ValidPlugin(binwalk.core.plugin.Plugin):
'''
Helps validate JFFS2 signature results.
......@@ -23,7 +25,8 @@ class JFFS2ValidPlugin(binwalk.core.plugin.Plugin):
header_crc = struct.unpack("<I", node_header[8:12])[0]
# Calculate the actual CRC
calculated_header_crc = (binascii.crc32(node_header[0:8], -1) ^ -1) & 0xffffffff
calculated_header_crc = (
binascii.crc32(node_header[0:8], -1) ^ -1) & 0xffffffff
# Make sure they match
return (header_crc == calculated_header_crc)
......@@ -32,7 +35,8 @@ class JFFS2ValidPlugin(binwalk.core.plugin.Plugin):
if result.file and result.description.lower().startswith('jffs2 filesystem'):
# Seek to and read the suspected JFFS2 node header
fd = self.module.config.open_file(result.file.name, offset=result.offset)
fd = self.module.config.open_file(
result.file.name, offset=result.offset)
# JFFS2 headers are only 12 bytes in size, but reading larger amounts of
# data from disk speeds up repeated disk access and decreases performance
# hits (disk caching?).
......@@ -43,5 +47,3 @@ class JFFS2ValidPlugin(binwalk.core.plugin.Plugin):
fd.close()
result.valid = self._check_crc(node_header[0:12])
import os
import binwalk.core.plugin
class LZMAExtractPlugin(binwalk.core.plugin.Plugin):
'''
LZMA extractor plugin.
'''
......@@ -11,7 +13,8 @@ class LZMAExtractPlugin(binwalk.core.plugin.Plugin):
try:
# lzma package in Python 2.0 decompress() does not handle multiple
# compressed streams, only first stream is extracted.
# backports.lzma package could be used to keep consistent behaviour.
# backports.lzma package could be used to keep consistent
# behaviour.
try:
import lzma
except ImportError:
......
......@@ -4,7 +4,9 @@ import binwalk.core.plugin
from binwalk.core.compat import *
from binwalk.core.common import BlockFile
class LZMAModPlugin(binwalk.core.plugin.Plugin):
'''
Finds and extracts modified LZMA files commonly found in cable modems.
Based on Bernardo Rodrigues' work: http://w00tsec.blogspot.com/2013/11/unpacking-firmware-images-from-cable.html
......@@ -27,11 +29,14 @@ class LZMAModPlugin(binwalk.core.plugin.Plugin):
# Try extracting the LZMA file without modification first
result = self.module.extractor.execute(self.original_cmd, fname)
# If the external extractor was successul (True) or didn't exist (None), don't do anything.
# If the external extractor was successul (True) or didn't exist
# (None), don't do anything.
if result not in [True, None]:
out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1]
out_name = os.path.splitext(fname)[
0] + '-patched' + os.path.splitext(fname)[1]
fp_out = BlockFile(out_name, 'w')
# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
# Use self.module.config.open_file here to ensure that other config
# settings (such as byte-swapping) are honored
fp_in = self.module.config.open_file(fname, offset=0, length=0)
fp_in.set_block_size(peek=0)
i = 0
......@@ -51,16 +56,18 @@ class LZMAModPlugin(binwalk.core.plugin.Plugin):
fp_in.close()
fp_out.close()
# Overwrite the original file so that it can be cleaned up if -r was specified
# Overwrite the original file so that it can be cleaned up if -r
# was specified
shutil.move(out_name, fname)
result = self.module.extractor.execute(self.original_cmd, fname)
return result
def scan(self, result):
# The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D.
# The modified cable modem LZMA headers all have valid dictionary sizes
# and a properties byte of 0x5D.
if result.description.lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result.description:
if "properties: 0x5D" in result.description and "invalid dictionary size" not in result.description:
result.valid = True
result.description = result.description.split("invalid uncompressed size")[0] + "missing uncompressed size"
result.description = result.description.split(
"invalid uncompressed size")[0] + "missing uncompressed size"
......@@ -2,7 +2,9 @@ import binwalk.core.plugin
import binwalk.core.compat
from binwalk.core.common import BlockFile
class LZMAPlugin(binwalk.core.plugin.Plugin):
'''
Validates lzma signature results.
'''
......@@ -29,7 +31,8 @@ class LZMAPlugin(binwalk.core.plugin.Plugin):
valid = True
if self.decompressor is not None:
# The only acceptable exceptions are those indicating that the input data was truncated.
# The only acceptable exceptions are those indicating that the
# input data was truncated.
try:
self.decompressor(binwalk.core.compat.str2bytes(data))
except IOError as e:
......@@ -49,7 +52,8 @@ class LZMAPlugin(binwalk.core.plugin.Plugin):
if result.valid and result.file and result.description.lower().startswith('lzma compressed data'):
# Seek to and read the suspected lzma data
fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
fd = self.module.config.open_file(
result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
data = fd.read(self.MAX_DATA_SIZE)
fd.close()
......@@ -59,4 +63,3 @@ class LZMAPlugin(binwalk.core.plugin.Plugin):
data = data[:5] + self.FAKE_LZMA_SIZE + data[5:]
if not self.is_valid_lzma(data):
result.valid = False
......@@ -2,6 +2,7 @@ import time
import math
import binwalk.core.plugin
class TarPlugin(binwalk.core.plugin.Plugin):
MODULES = ['Signature']
......@@ -41,7 +42,8 @@ class TarPlugin(binwalk.core.plugin.Plugin):
if result.description.lower().startswith('posix tar archive'):
is_tar = True
file_offset = result.offset
fd = self.module.config.open_file(result.file.name, offset=result.offset)
fd = self.module.config.open_file(
result.file.name, offset=result.offset)
while is_tar:
# read in the tar header struct
......@@ -49,16 +51,18 @@ class TarPlugin(binwalk.core.plugin.Plugin):
# check to see if we are still in a tarball
if buf[257:262] == 'ustar':
# get size of tarred file convert to blocks (plus 1 to include header)
# get size of tarred file convert to blocks (plus 1 to
# include header)
try:
size = self.nti(buf[124:136])
blocks = math.ceil(size/float(self.TAR_BLOCKSIZE)) + 1
blocks = math.ceil(
size / float(self.TAR_BLOCKSIZE)) + 1
except ValueError as e:
is_tar = False
break
# update file offset for next file in tarball
file_offset += int(self.TAR_BLOCKSIZE*blocks)
file_offset += int(self.TAR_BLOCKSIZE * blocks)
if file_offset >= result.file.size:
# we hit the end of the file
......
......@@ -3,14 +3,16 @@ import binascii
import binwalk.core.plugin
import binwalk.core.compat
class UBIValidPlugin(binwalk.core.plugin.Plugin):
'''
Helps validate UBI erase count signature results.
Checks header CRC and calculates jump value
'''
MODULES = ['Signature']
current_file=None
current_file = None
last_ec_hdr_offset = None
peb_size = None
......@@ -26,15 +28,15 @@ class UBIValidPlugin(binwalk.core.plugin.Plugin):
def _process_result(self, result):
if self.current_file == result.file.name:
result.display=False
result.display = False
else:
# Reset everything in case new file is encountered
self.peb_size=None
self.last_ec_hdr_offset=None
self.peb_size=None
self.peb_size = None
self.last_ec_hdr_offset = None
self.peb_size = None
# Display result and trigger extraction
result.display=True
result.display = True
self.current_file = result.file.name
......@@ -54,7 +56,8 @@ class UBIValidPlugin(binwalk.core.plugin.Plugin):
def scan(self, result):
if result.file and result.description.lower().startswith('ubi erase count header'):
# Seek to and read the suspected UBI erase count header
fd = self.module.config.open_file(result.file.name, offset=result.offset)
fd = self.module.config.open_file(
result.file.name, offset=result.offset)
ec_header = binwalk.core.compat.str2bytes(fd.read(1024))
fd.close()
......
......@@ -4,5 +4,6 @@
import binwalk.core.plugin
class Unjffs2DepreciatedPlugin(binwalk.core.plugin.Plugin):
pass
import binwalk.core.plugin
class ZipHelperPlugin(binwalk.core.plugin.Plugin):
'''
A helper plugin for Zip files to ensure that the Zip archive
extraction rule is only executed once when the first Zip archive
......
......@@ -4,7 +4,9 @@ import binwalk.core.compat
import binwalk.core.common
import binwalk.core.plugin
class ZLIBExtractPlugin(binwalk.core.plugin.Plugin):
'''
Zlib extractor plugin.
'''
......@@ -26,7 +28,8 @@ class ZLIBExtractPlugin(binwalk.core.plugin.Plugin):
fpin = binwalk.core.common.BlockFile(fname)
fpout = binwalk.core.common.BlockFile(outfile, 'w')
plaintext = zlib.decompress(binwalk.core.compat.str2bytes(fpin.read()))
plaintext = zlib.decompress(
binwalk.core.compat.str2bytes(fpin.read()))
fpout.write(plaintext)
fpin.close()
......@@ -37,4 +40,3 @@ class ZLIBExtractPlugin(binwalk.core.plugin.Plugin):
return False
return True
......@@ -3,7 +3,9 @@ import binwalk.core.compat
import binwalk.core.plugin
from binwalk.core.common import BlockFile
class ZlibValidPlugin(binwalk.core.plugin.Plugin):
'''
Validates zlib compressed data.
'''
......@@ -40,4 +42,3 @@ class ZlibValidPlugin(binwalk.core.plugin.Plugin):
# Error -5, incomplete or truncated data input
if not str(e).startswith("Error -5"):
result.valid = False
......@@ -2,6 +2,7 @@ import idc
import idaapi
import binwalk
class binwalk_t(idaapi.plugin_t):
flags = 0
comment = "Scan the current IDB for file signatures"
......@@ -10,8 +11,10 @@ class binwalk_t(idaapi.plugin_t):
wanted_hotkey = ""
def init(self):
self.menu_context_1 = idaapi.add_menu_item("Search/", "binwalk opcodes", "", 0, self.opcode_scan, (None,))
self.menu_context_2 = idaapi.add_menu_item("Search/", "binwalk signatures", "", 0, self.signature_scan, (None,))
self.menu_context_1 = idaapi.add_menu_item(
"Search/", "binwalk opcodes", "", 0, self.opcode_scan, (None,))
self.menu_context_2 = idaapi.add_menu_item(
"Search/", "binwalk signatures", "", 0, self.signature_scan, (None,))
return idaapi.PLUGIN_KEEP
def term(self):
......@@ -28,6 +31,6 @@ class binwalk_t(idaapi.plugin_t):
def opcode_scan(self, arg):
binwalk.scan(idc.GetIdbPath(), opcode=True)
def PLUGIN_ENTRY():
return binwalk_t()
......@@ -9,7 +9,8 @@ for module in binwalk.scan(*sys.argv[1:], signature=True, quiet=True, extract=Tr
for result in module.results:
if module.extractor.output.has_key(result.file.path):
if module.extractor.output[result.file.path].extracted.has_key(result.offset):
print ("Extracted '%s' at offset 0x%X from '%s' to '%s'" % (result.description.split(',')[0],
print (
"Extracted '%s' at offset 0x%X from '%s' to '%s'" % (result.description.split(',')[0],
result.offset,
result.file.path,
str(module.extractor.output[result.file.path].extracted[result.offset])))
......@@ -4,10 +4,12 @@ import sys
import binwalk
try:
# Perform a signature scan against the files specified on the command line and suppress the usual binwalk output.
# Perform a signature scan against the files specified on the command line
# and suppress the usual binwalk output.
for module in binwalk.scan(*sys.argv[1:], signature=True, quiet=True):
print ("%s Results:" % module.name)
for result in module.results:
print ("\t%s 0x%.8X %s [%s]" % (result.file.name, result.offset, result.description, str(result.valid)))
print ("\t%s 0x%.8X %s [%s]" % (
result.file.name, result.offset, result.description, str(result.valid)))
except binwalk.ModuleException as e:
pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment