Commit 402b1332 by heffnercj

Initial check-in to git.

parent 4001b759
DESCRIPTION
The binwalk python module can be used by any python script to programatically perform binwalk scans and
obtain the results of those scans.
The classes, methods and objects in the binwalk modules are documented via pydoc, including examples,
so those interested in using the binwalk module are encouraged to look there. However, several common usage
examples are provided here to help jump-start development efforts.
BASIC SCAN
The following is an example of the simplest scan, and is equivalent to running binwalk on the command line
with no additional arguments:
import pprint
from binwalk import Binwalk
with Binwalk() as bw:
pprint.PrettyPrinter().pprint(bw.scan('firmware.bin'))
The scan() method will return a dictionary of results, and may also be passed a list of files:
from binwalk import Binwalk
with Binwalk() as bw:
for (filename, file_results) in bw.scan(['firmware1.bin', 'firmware2.bin']).iteritems():
print "Results for %s:" % filename
for (offset, results) in file_results:
for result in results:
print offset, result['description']
Alternatively, a callback function may be specified. The callback function is called as soon as a match is found.
It is passed two arguments: the offset at which the match was found, and a list of results dictionaries (one dictionary
per result found at that offset):
from binwalk import Binwalk
def my_callback(offset, results):
print "Found %d results at offset %d:" % (len(results), offset)
for result in results:
print " %s" % result['description']
with Binwalk() as bw:
bw.scan('firmware.bin', callback=my_callback)
ADDING FILTERS
Include and exclude filters may be specified which operate identically to the --include, and --exclude binwalk
command line options:
from binwalk import Binwalk
binwalk = Binwalk()
# Exclusively filters out all signatures except those containing the string 'filesystem' (same as --include)
binwalk.filter.include('filesystem')
# Excludes all results that contain the string 'jffs2' (same as --exclude)
binwalk.filter.exclude('jffs2')
binwalk.scan('firmware')
binwalk.cleanup()
EXTRACTING FILES
Extract rules may be specified which operate identically to the --dd and --extract binwalk command line options.
Extraction is automatically enabled when one or more extraction rules are specified.
To add a custom extract rule, or a list of extract rules (such as with the --dd option):
from binwalk import Binwalk
binwalk = Binwalk()
# Extract results containing the string 'gzip' with a file extension of 'gz' and run the gunzip command
binwalk.extractor.add_rule('gzip:gz:gunzip %e')
# Extract 'gzip' and 'filesystem' results
binwalk.extractor.add_rule(['gzip:gz', 'filesystem:fs'])
binwalk.scan('firmware')
binwalk.cleanup()
To load the default extraction rules from the extract.conf file (such as with the --extract option):
from binwalk import Binwalk
binwalk = Binwalk()
binwalk.extractor.load_defaults()
binwalk.scan('firmware.bin')
binwalk.cleanup()
To enabled delayed file extraction (such as with the --delay option):
from binwalk import Binwalk
binwalk = Binwalk()
binwalk.extractor.enable_delayed_extract(True)
binwalk.scan('firmware.bin')
binwalk.cleanup()
To enable file cleanup after extraction (such as with the --rm option):
from binwalk import Binwalk
binwalk = Binwalk()
binwalk.extractor.cleanup_extracted_files(True)
binwalk.scan('firmware.bin')
binwalk.cleanup()
export CC=@CC@
export CFLAGS=@CFLAGS@
export SONAME=@SONAME@
export LIBDIR=@libdir@
all: clean
make -C miniz
make -C compress
install:
make -C miniz install
make -C compress install
.PHONY: clean distclean
clean:
make -C miniz clean
make -C compress clean
distclean:
make -C miniz distclean
make -C compress distclean
rm -rf *.cache config.* Makefile
LIBNAME=libcompress42.so
all: clean $(LIBNAME)
$(LIBNAME): compress42.o
$(CC) $(CFLAGS) -shared -Wl,$(SONAME),$(LIBNAME) compress42.o -o $(LIBNAME) $(LDFLAGS)
compress42.o:
$(CC) $(CFLAGS) compress42.c -c
install:
install -D -m644 $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(LIBNAME)
.PHONY: clean distclean
clean:
rm -f *.o
distclean: clean
rm -f $(LIBNAME)
Unix compress implementation of LZW (from debian source repository).
Used by the compressd plugin to validate potential compress'd candidates.
#!/usr/bin/env python
import sys
import ctypes
import ctypes.util
SIZE = 64
try:
data = open(sys.argv[1], "rb").read(SIZE)
except:
print "Usage: %s <input file>" % sys.argv[0]
sys.exit(1)
comp = ctypes.cdll.LoadLibrary(ctypes.util.find_library("compress42"))
if comp.is_compressed(data, len(data)):
print "%s is compress'd." % (sys.argv[1])
else:
print "%s is not compress'd." % sys.argv[1]
This source diff could not be displayed because it is too large. You can view the blob instead.
AC_PREREQ([2.65])
AC_INIT()
AC_PROG_CC
AC_LANG(C)
AC_TYPE_SIZE_T
AC_FUNC_MALLOC
CFLAGS="-Wall -fPIC $CFLAGS"
if test "$(uname)" == "Darwin"
then
SONAME="-install_name"
else
SONAME="-soname"
fi
AC_SUBST(SONAME, $SONAME)
AC_CONFIG_FILES([Makefile])
AC_OUTPUT
LIBNAME=libtinfl.so
all: clean $(LIBNAME)
$(LIBNAME): tinfl.o
$(CC) $(CFLAGS) -shared -Wl,$(SONAME),$(LIBNAME) tinfl.o -o $(LIBNAME) $(LDFLAGS)
tinfl.o:
$(CC) $(CFLAGS) -c tinfl.c
install:
install -D -m644 $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(LIBNAME)
.PHONY: clean distclean
clean:
rm -f *.o
distclean: clean
rm -f $(LIBNAME)
deflate/inflate implementation library from http://code.google.com/p/miniz.
Used by the zlib plugin to validate potential zlib candidates.
#!/usr/bin/env python
import sys
import ctypes
import ctypes.util
from binwalk.common import BlockFile
class Foo:
SIZE = 33*1024
def __init__(self):
self.tinfl = ctypes.cdll.LoadLibrary(ctypes.util.find_library("tinfl"))
def _extractor(self, file_name):
processed = 0
inflated_data = ''
fd = BlockFile(file_name, 'rb')
fd.READ_BLOCK_SIZE = self.SIZE
while processed < fd.length:
(data, dlen) = fd.read_block()
inflated_block = self.tinfl.inflate_block(data, dlen)
if inflated_block:
inflated_data += ctypes.c_char_p(inflated_block).value[0:4]
else:
break
processed += dlen
fd.close()
print inflated_data
print "%s inflated to %d bytes" % (file_name, len(inflated_data))
Foo()._extractor(sys.argv[1])
#!/usr/bin/env python
import sys
import ctypes
import ctypes.util
SIZE = 33*1024
try:
data = open(sys.argv[1], "rb").read(SIZE)
except:
print "Usage: %s <input file>" % sys.argv[0]
sys.exit(1)
tinfl = ctypes.cdll.LoadLibrary(ctypes.util.find_library("tinfl"))
if tinfl.is_deflated(data, len(data), 1):
print "%s is zlib compressed." % (sys.argv[1])
else:
print "%s is not zlib compressed." % sys.argv[1]
This diff is collapsed. Click to expand it.
# Common functions.
import os
import re
def file_size(filename):
'''
Obtains the size of a given file.
@filename - Path to the file.
Returns the size of the file.
'''
# Using open/lseek works on both regular files and block devices
fd = os.open(filename, os.O_RDONLY)
try:
return os.lseek(fd, 0, os.SEEK_END)
except Exception, e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
finally:
os.close(fd)
def str2int(string):
'''
Attempts to convert string to a base 10 integer; if that fails, then base 16.
@string - String to convert to an integer.
Returns the integer value on success.
Throws an exception if the string cannot be converted into either a base 10 or base 16 integer value.
'''
try:
return int(string)
except:
return int(string, 16)
def strip_quoted_strings(string):
'''
Strips out data in between double quotes.
@string - String to strip.
Returns a sanitized string.
'''
# This regex removes all quoted data from string.
# Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.
return re.sub(r'\"(.*)\"', "", string)
def get_quoted_strings(string):
'''
Returns a string comprised of all data in between double quotes.
@string - String to get quoted data from.
Returns a string of quoted data on success.
Returns a blank string if no quoted data is present.
'''
try:
# This regex grabs all quoted data from string.
# Note that this gets everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included.
return re.findall(r'\"(.*)\"', string)[0]
except:
return ''
def unique_file_name(base_name, extension=''):
'''
Creates a unique file name based on the specified base name.
@base_name - The base name to use for the unique file name.
@extension - The file extension to use for the unique file name.
Returns a unique file string.
'''
idcount = 0
if extension and not extension.startswith('.'):
extension = '.%s' % extension
fname = base_name + extension
while os.path.exists(fname):
fname = "%s-%d%s" % (base_name, idcount, extension)
idcount += 1
return fname
class BlockFile(file):
'''
Abstraction class to handle reading data from files in blocks.
Necessary for large files.
'''
# The MAX_TRAILING_SIZE limits the amount of data available to a signature.
# While most headers/signatures are far less than this value, some may reference
# pointers in the header structure which may point well beyond the header itself.
# Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time.
MAX_TRAILING_SIZE = 8 * 1024
# Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks.
READ_BLOCK_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='rb', length=0, offset=0):
'''
Class constructor.
@fname - Path to the file to be opened.
@mode - Mode to open the file in.
@length - Maximum number of bytes to read from the file via self.block_read().
Returns None.
'''
self.total_read = 0
self.offset = offset
if length:
self.length = length
else:
try:
self.length = file_size(fname)
except:
self.length = 0
file.__init__(self, fname, mode)
self.seek(self.offset)
def read_block(self):
'''
Reads in a block of data from the target file.
Returns a tuple of (file block data, block data length).
'''
dlen = 0
data = None
if self.total_read < self.length:
# Read in READ_BLOCK_SIZE plus MAX_TRAILING_SIZE bytes, but return a max dlen value
# of READ_BLOCK_SIZE. This ensures that there is a MAX_TRAILING_SIZE buffer at the
# end of the returned data in case a signature is found at or near data[dlen].
data = self.read(self.READ_BLOCK_SIZE + self.MAX_TRAILING_SIZE)
if data and data is not None:
# Get the actual length of the read in data
dlen = len(data)
seek_offset = dlen - self.READ_BLOCK_SIZE
# If we've read in more data than the scan length, truncate the dlen value
if (self.total_read + self.READ_BLOCK_SIZE) > self.length:
dlen = self.length - self.total_read
# If dlen is the expected rlen size, it should be set to READ_BLOCK_SIZE
elif dlen == (self.READ_BLOCK_SIZE + self.MAX_TRAILING_SIZE):
dlen = self.READ_BLOCK_SIZE
# Increment self.total_read to reflect the amount of data that has been read
# for processing (actual read size is larger of course, due to the MAX_TRAILING_SIZE
# buffer of data at the end of each block).
self.total_read += dlen
# Seek to the self.total_read offset so the next read can pick up where this one left off.
if seek_offset > 0:
self.seek(self.tell() - seek_offset)
return (data, dlen)
#!/usr/bin/env python
# Routines to perform Monte Carlo Pi approximation and Chi Squared tests.
# Used for fingerprinting unknown areas of high entropy (e.g., is this block of high entropy data compressed or encrypted?).
# Inspired by people who actually know what they're doing: http://www.fourmilab.ch/random/
import math
import common
class MonteCarloPi(object):
'''
Performs a Monte Carlo Pi approximation.
Currently unused.
'''
def __init__(self):
'''
Class constructor.
Returns None.
'''
self.reset()
def reset(self):
'''
Reset state to the beginning.
'''
self.pi = 0
self.error = 0
self.m = 0
self.n = 0
def update(self, data):
'''
Update the pi approximation with new data.
@data - A string of bytes to update (length must be >= 6).
Returns None.
'''
c = 0
dlen = len(data)
while (c+6) < dlen:
# Treat 3 bytes as an x coordinate, the next 3 bytes as a y coordinate.
# Our box is 1x1, so divide by 2^24 to put the x y values inside the box.
x = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
c += 3
y = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
c += 3
# Does the x,y point lie inside the circle inscribed within our box, with diameter == 1?
if ((x**2) + (y**2)) <= 1:
self.m += 1
self.n += 1
def montecarlo(self):
'''
Approximates the value of Pi based on the provided data.
Returns a tuple of (approximated value of pi, percent deviation).
'''
if self.n:
self.pi = (float(self.m) / float(self.n) * 4.0)
if self.pi:
self.error = math.fabs(1.0 - (math.pi / self.pi)) * 100.0
return (self.pi, self.error)
else:
return (0.0, 0.0)
class ChiSquare(object):
'''
Performs a Chi Squared test against the provided data.
'''
IDEAL = 256.0
def __init__(self):
'''
Class constructor.
Returns None.
'''
self.bytes = {}
self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0
self.reset()
def reset(self):
self.xc2 = 0.0
self.byte_count = 0
for key in self.bytes.keys():
self.bytes[key] = 0
def update(self, data):
'''
Updates the current byte counts with new data.
@data - String of bytes to update.
Returns None.
'''
# Count the number of occurances of each byte value
for i in data:
self.bytes[i] += 1
self.byte_count += len(data)
def chisq(self):
'''
Calculate the Chi Square critical value.
Returns the critical value.
'''
expected = self.byte_count / self.IDEAL
if expected:
for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected
return self.xc2
class CompressionEntropyAnalyzer(object):
'''
Class wrapper around ChiSquare.
Performs analysis and attempts to interpret the results.
'''
BLOCK_SIZE = 32
CHI_CUTOFF = 512
DESCRIPTION = "Statistical Compression Analysis"
def __init__(self, fname, start, length, binwalk=None):
'''
Class constructor.
@fname - The file to scan.
@start - The start offset to begin analysis at.
@length - The number of bytes to analyze.
@binwalk - Binwalk class object.
Returns None.
'''
self.fp = common.BlockFile(fname, 'rb', offset=start, length=length)
# Read block size must be at least as large as our analysis block size
if self.fp.READ_BLOCK_SIZE < self.BLOCK_SIZE:
self.fp.READ_BLOCK_SIZE = self.BLOCK_SIZE
self.start = start
self.length = length
self.binwalk = binwalk
def __del__(self):
try:
self.fp.close()
except:
pass
def analyze(self):
'''
Perform analysis and interpretation.
Returns a descriptive string containing the results and attempted interpretation.
'''
i = 0
num_error = 0
analyzer_results = []
if self.binwalk:
self.binwalk.display.header(file_name=self.fp.name, description=self.DESCRIPTION)
chi = ChiSquare()
while i < self.length:
j = 0
(d, dlen) = self.fp.read_block()
while j < dlen:
chi.reset()
data = d[j:j+self.BLOCK_SIZE]
if len(data) < self.BLOCK_SIZE:
break
chi.update(data)
if chi.chisq() >= self.CHI_CUTOFF:
num_error += 1
j += self.BLOCK_SIZE
i += dlen
if num_error > 0:
verdict = 'Moderate entropy data, best guess: compressed'
else:
verdict = 'High entropy data, best guess: encrypted'
result = [{'offset' : self.start, 'description' : '%s, size: %d, %d low entropy blocks' % (verdict, self.length, num_error)}]
if self.binwalk:
self.binwalk.display.results(self.start, result)
self.binwalk.display.footer()
return result
import os
import common
class Config:
'''
Binwalk configuration class, used for accessing user and system file paths.
After instatiating the class, file paths can be accessed via the self.paths dictionary.
System file paths are listed under the 'system' key, user file paths under the 'user' key.
For example, to get the path to both the user and system binwalk magic files:
from binwalk import Config
conf = Config()
user_binwalk_file = conf.paths['user'][conf.BINWALK_MAGIC_FILE]
system_binwalk_file = conf.paths['system'][conf.BINWALK_MAGIC_FILE]
There is also an instance of this class available via the Binwalk.config object:
import binwalk
bw = binwalk.Binwalk()
user_binwalk_file = bw.config.paths['user'][conf.BINWALK_MAGIC_FILE]
system_binwalk_file = bw.config.paths['system'][conf.BINWALK_MAGIC_FILE]
Valid file names under both the 'user' and 'system' keys are as follows:
o BINWALK_MAGIC_FILE - Path to the default binwalk magic file.
o BINCAST_MAGIC_FILE - Path to the bincast magic file (used when -C is specified with the command line binwalk script).
o BINARCH_MAGIC_FILE - Path to the binarch magic file (used when -A is specified with the command line binwalk script).
o EXTRACT_FILE - Path to the extract configuration file (used when -e is specified with the command line binwalk script).
o PLUGINS - Path to the plugins directory.
'''
# Release version
VERSION = "1.2.3"
# Sub directories
BINWALK_USER_DIR = ".binwalk"
BINWALK_MAGIC_DIR = "magic"
BINWALK_CONFIG_DIR = "config"
BINWALK_PLUGINS_DIR = "plugins"
# File names
PLUGINS = "plugins"
EXTRACT_FILE = "extract.conf"
BINWALK_MAGIC_FILE = "binwalk"
BINCAST_MAGIC_FILE = "bincast"
BINARCH_MAGIC_FILE = "binarch"
def __init__(self):
'''
Class constructor. Enumerates file paths and populates self.paths.
'''
# Path to the user binwalk directory
self.user_dir = self._get_user_dir()
# Path to the system wide binwalk directory
self.system_dir = self._get_system_dir()
# Dictionary of all absolute user/system file paths
self.paths = {
'user' : {},
'system' : {},
}
# Build the paths to all user-specific files
self.paths['user'][self.BINWALK_MAGIC_FILE] = self._user_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['user'][self.BINCAST_MAGIC_FILE] = self._user_path(self.BINWALK_MAGIC_DIR, self.BINCAST_MAGIC_FILE)
self.paths['user'][self.BINARCH_MAGIC_FILE] = self._user_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['user'][self.EXTRACT_FILE] = self._user_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
self.paths['user'][self.PLUGINS] = self._user_path(self.BINWALK_PLUGINS_DIR)
# Build the paths to all system-wide files
self.paths['system'][self.BINWALK_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['system'][self.BINCAST_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINCAST_MAGIC_FILE)
self.paths['system'][self.BINARCH_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['system'][self.EXTRACT_FILE] = self._system_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
self.paths['system'][self.PLUGINS] = self._system_path(self.BINWALK_PLUGINS_DIR)
def find_magic_file(self, fname, system_only=False, user_only=False):
'''
Finds the specified magic file name in the system / user magic file directories.
@fname - The name of the magic file.
@system_only - If True, only the system magic file directory will be searched.
@user_only - If True, only the user magic file directory will be searched.
If system_only and user_only are not set, the user directory is always searched first.
Returns the path to the file on success; returns None on failure.
'''
loc = None
if not system_only:
fpath = self._user_path(self.BINWALK_MAGIC_DIR, fname)
if os.path.exists(fpath) and common.file_size(fpath) > 0:
loc = fpath
if loc is None and not user_only:
fpath = self._system_path(self.BINWALK_MAGIC_DIR, fname)
if os.path.exists(fpath) and common.file_size(fpath) > 0:
loc = fpath
return fpath
def _get_system_dir(self):
'''
Find the directory where the binwalk module is installed on the system.
'''
try:
root = __file__
if os.path.islink(root):
root = os.path.realpath(root)
return os.path.dirname(os.path.abspath(root))
except:
return ''
def _get_user_dir(self):
'''
Get the user's home directory.
'''
try:
# This should work in both Windows and Unix environments
return os.getenv('USERPROFILE') or os.getenv('HOME')
except:
return ''
def _file_path(self, dirname, filename):
'''
Builds an absolute path and creates the directory and file if they don't already exist.
@dirname - Directory path.
@filename - File name.
Returns a full path of 'dirname/filename'.
'''
if not os.path.exists(dirname):
try:
os.makedirs(dirname)
except:
pass
fpath = os.path.join(dirname, filename)
if not os.path.exists(fpath):
try:
open(fpath, "w").close()
except:
pass
return fpath
def _user_path(self, subdir, basename=''):
'''
Gets the full path to the 'subdir/basename' file in the user binwalk directory.
@subdir - Subdirectory inside the user binwalk directory.
@basename - File name inside the subdirectory.
Returns the full path to the 'subdir/basename' file.
'''
return self._file_path(os.path.join(self.user_dir, self.BINWALK_USER_DIR, subdir), basename)
def _system_path(self, subdir, basename=''):
'''
Gets the full path to the 'subdir/basename' file in the system binwalk directory.
@subdir - Subdirectory inside the system binwalk directory.
@basename - File name inside the subdirectory.
Returns the full path to the 'subdir/basename' file.
'''
return self._file_path(os.path.join(self.system_dir, subdir), basename)
#################################################################################################################
# Default extract rules loaded when --extract is specified.
#
# <case-insensitive unique string from binwalk output text>:<desired file extension>:<command to execute>
#
# Note that %e is a place holder for the extracted file name.
#################################################################################################################
# Assumes these utilities are installed in $PATH.
^gzip compressed data:gz:gzip -d -f '%e'
^lzma compressed data:7z:7zr e -y '%e'
^bzip2 compressed data:bz2:bzip2 -d -f '%e'
^compress'd data:Z:compress -d '%e'
^zip archive data:zip:jar xf '%e' # jar does a better job of unzipping than unzip does...
^posix tar archive:tar:tar xvf '%e'
^rar archive data:rar:unrar e '%e'
^arj archive data.*comment header:arj:arj e '%e'
^iso 9660:iso:7z x '%e' -oiso-root
# These assume the firmware-mod-kit is installed to /opt/firmware-mod-kit.
# If not, change the file paths appropriately.
^squashfs filesystem:squashfs:/opt/firmware-mod-kit/unsquashfs_all.sh '%e'
^jffs2 filesystem:jffs2:/opt/firmware-mod-kit/src/jffs2/unjffs2 '%e'
^ascii cpio archive:cpio:/opt/firmware-mod-kit/uncpio.sh '%e'
^cramfs filesystem:cramfs:/opt/firmware-mod-kit/uncramfs_all.sh '%e'
^bff volume entry:bff:/opt/firmware-mod-kit/src/bff/bffxtractor.py '%e'
^wdk file system:wdk:/opt/firmware-mod-kit/src/firmware-tools/unwdk.py '%e'
^zlib header:zlib:/opt/firmware-mod-kit/src/firmware-tools/unzlib.py '%e'
^ext2 filesystem:ext2:/opt/firmware-mod-kit/src/mountcp/mountcp '%e' ext2-root
^romfs filesystem:romfs:/opt/firmware-mod-kit/src/mountcp/mountcp '%e' romfs-root
# These paths are for the depreciated firmware-mod-kit file paths, which included the 'trunk' directory.
# These will only be run if the above file paths don't exist.
^squashfs filesystem:squashfs:/opt/firmware-mod-kit/trunk/unsquashfs_all.sh '%e'
^jffs2 filesystem:jffs2:/opt/firmware-mod-kit/trunk/src/jffs2/unjffs2 '%e' # requires root
^ascii cpio archive:cpio:/opt/firmware-mod-kit/trunk/uncpio.sh '%e'
^cramfs filesystem:cramfs:/opt/firmware-mod-kit/trunk/uncramfs_all.sh '%e'
^bff volume entry:bff:/opt/firmware-mod-kit/trunk/src/bff/bffxtractor.py '%e'
# If FMK isn't installed, try the system's unsquashfs for SquashFS files
^squashfs filesystem:squashfs:unsquashfs '%e'
# Extract, but don't run anything
private key:key
certificate:crt
html document header:html
xml document:xml
import re
import common
from smartsignature import SmartSignature
class MagicFilter:
'''
Class to filter libmagic results based on include/exclude rules and false positive detection.
An instance of this class is available via the Binwalk.filter object.
Note that all filter strings should be in lower case.
Example code which creates include, exclude, and grep filters before running a binwalk scan:
import binwalk
bw = binwalk.Binwalk()
# Include all signatures whose descriptions contain the string 'filesystem' in the first line of the signature, even if those signatures are normally excluded.
# Note that if exclusive=False was specified, this would merely add these signatures to the default signatures.
# Since exclusive=True (the default) has been specified, ONLY those matching signatures will be loaded; all others will be ignored.
bw.filter.include('filesystem')
# Exclude all signatures whose descriptions contain the string 'jffs2', even if those signatures are normally included.
# In this case, we are now searching for all filesystem signatures, except JFFS2.
bw.filter.exclude('jffs2')
# Add a grep filter. Unlike the include and exclude filters, it does not affect which results are returned by Binwalk.scan(), but it does affect which results
# are printed by Binwalk.display.results(). This is particularly useful for cases like the bincast scan, where multiple lines of results are returned per offset,
# but you only want certian ones displayed. In this case, only file systems whose description contain the string '2012' will be displayed.
bw.filter.grep(filters=['2012'])
bw.scan('firmware.bin')
'''
# If the result returned by libmagic is "data" or contains the text
# 'invalid' or a backslash are known to be invalid/false positives.
DATA_RESULT = "data"
INVALID_RESULTS = ["invalid", "\\"]
INVALID_RESULT = "invalid"
NON_PRINTABLE_RESULT = "\\"
FILTER_INCLUDE = 0
FILTER_EXCLUDE = 1
def __init__(self, show_invalid_results=False):
'''
Class constructor.
@show_invalid_results - Set to True to display results marked as invalid.
Returns None.
'''
self.filters = []
self.grep_filters = []
self.show_invalid_results = show_invalid_results
self.exclusive_filter = False
self.smart = SmartSignature(self)
def include(self, match, exclusive=True):
'''
Adds a new filter which explicitly includes results that contain
the specified matching text.
@match - Regex, or list of regexs, to match.
@exclusive - If True, then results that do not explicitly contain
a FILTER_INCLUDE match will be excluded. If False,
signatures that contain the FILTER_INCLUDE match will
be included in the scan, but will not cause non-matching
results to be excluded.
Returns None.
'''
if not isinstance(match, type([])):
matches = [match]
else:
matches = match
for m in matches:
include_filter = {}
if m:
if exclusive and not self.exclusive_filter:
self.exclusive_filter = True
include_filter['type'] = self.FILTER_INCLUDE
include_filter['filter'] = m
include_filter['regex'] = re.compile(m)
self.filters.append(include_filter)
def exclude(self, match):
'''
Adds a new filter which explicitly excludes results that contain
the specified matching text.
@match - Regex, or list of regexs, to match.
Returns None.
'''
if not isinstance(match, type([])):
matches = [match]
else:
matches = match
for m in matches:
exclude_filter = {}
if m:
exclude_filter['type'] = self.FILTER_EXCLUDE
exclude_filter['filter'] = m
exclude_filter['regex'] = re.compile(m)
self.filters.append(exclude_filter)
def filter(self, data):
'''
Checks to see if a given string should be excluded from or included in the results.
Called internally by Binwalk.scan().
@data - String to check.
Returns FILTER_INCLUDE if the string should be included.
Returns FILTER_EXCLUDE if the string should be excluded.
'''
data = data.lower()
# Loop through the filters to see if any of them are a match.
# If so, return the registered type for the matching filter (FILTER_INCLUDE | FILTER_EXCLUDE).
for f in self.filters:
if f['regex'].search(data):
return f['type']
# If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE.
if self.exclusive_filter:
return self.FILTER_EXCLUDE
return self.FILTER_INCLUDE
def invalid(self, data):
'''
Checks if the given string contains invalid data.
Called internally by Binwalk.scan().
@data - String to validate.
Returns True if data is invalid, False if valid.
'''
# A result of 'data' is never ever valid.
if data == self.DATA_RESULT:
return True
# If showing invalid results, just return False.
if self.show_invalid_results:
return False
# Don't include quoted strings or keyword arguments in this search, as
# strings from the target file may legitimately contain the INVALID_RESULT text.
if self.INVALID_RESULT in common.strip_quoted_strings(self.smart._strip_tags(data)):
return True
# There should be no non-printable characters in any of the data
if self.NON_PRINTABLE_RESULT in data:
return True
return False
def grep(self, data=None, filters=[]):
'''
Add or check case-insensitive grep filters against the supplied data string.
@data - Data string to check grep filters against. Not required if filters is specified.
@filters - Regex, or list of regexs, to add to the grep filters list. Not required if data is specified.
Returns None if data is not specified.
If data is specified, returns True if the data contains a grep filter, or if no grep filters exist.
If data is specified, returns False if the data does not contain any grep filters.
'''
# Add any specified filters to self.grep_filters
if filters:
if not isinstance(filters, type([])):
gfilters = [filters]
else:
gfilters = filters
for gfilter in gfilters:
# Filters are case insensitive
self.grep_filters.append(re.compile(gfilter))
# Check the data against all grep filters until one is found
if data is not None:
# If no grep filters have been created, always return True
if not self.grep_filters:
return True
# Filters are case insensitive
data = data.lower()
# If a filter exists in data, return True
for gfilter in self.grep_filters:
if gfilter.search(data):
return True
# Else, return False
return False
return None
def clear(self):
'''
Clears all include, exclude and grep filters.
Retruns None.
'''
self.filters = []
self.grep_filters = []
#!/usr/bin/env python
import os
import sys
import string
import curses
import platform
import common
class HexDiff(object):
ALL_SAME = 0
ALL_DIFF = 1
SOME_DIFF = 2
DEFAULT_DIFF_SIZE = 0x100
DEFAULT_BLOCK_SIZE = 16
COLORS = {
'red' : '31',
'green' : '32',
'blue' : '34',
}
def __init__(self, binwalk=None):
self.block_hex = ""
self.printed_alt_text = False
if binwalk:
self._pprint = binwalk.display._pprint
self._show_header = binwalk.display.header
self._footer = binwalk.display.footer
self._display_result = binwalk.display.results
self._grep = binwalk.filter.grep
else:
self._pprint = sys.stdout.write
self._show_header = self._print
self._footer = self._simple_footer
self._display_result = self._print
self._grep = None
if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty() and platform.system() != 'Windows':
curses.setupterm()
self.colorize = self._colorize
else:
self.colorize = self._no_colorize
def _no_colorize(self, c, color="red", bold=True):
return c
def _colorize(self, c, color="red", bold=True):
attr = []
attr.append(self.COLORS[color])
if bold:
attr.append('1')
return "\x1b[%sm%s\x1b[0m" % (';'.join(attr), c)
def _print_block_hex(self, alt_text="*"):
printed = False
if self._grep is None or self._grep(self.block_hex):
self._pprint(self.block_hex)
self.printed_alt_text = False
printed = True
elif not self.printed_alt_text:
self._pprint("%s\n" % alt_text)
self.printed_alt_text = True
printed = True
self.block_hex = ""
return printed
def _build_block(self, c, highlight=None):
if highlight == self.ALL_DIFF:
self.block_hex += self.colorize(c, color="red")
elif highlight == self.ALL_SAME:
self.block_hex += self.colorize(c, color="green")
elif highlight == self.SOME_DIFF:
self.block_hex += self.colorize(c, color="blue")
else:
self.block_hex += c
def _simple_footer(self):
print ""
def _header(self, files, block):
header = "OFFSET "
for i in range(0, len(files)):
f = files[i]
header += "%s" % os.path.basename(f)
if i != len(files)-1:
header += " " * ((block*4) + 10 - len(os.path.basename(f)))
self._show_header(header=header)
def display(self, files, offset=0, size=DEFAULT_DIFF_SIZE, block=DEFAULT_BLOCK_SIZE, show_first_only=False):
i = 0
total = 0
fps = []
data = {}
delim = '/'
if show_first_only:
self._header([files[0]], block)
else:
self._header(files, block)
if common.BlockFile.READ_BLOCK_SIZE < block:
read_block_size = block
else:
read_block_size = common.BlockFile.READ_BLOCK_SIZE
for f in files:
fp = common.BlockFile(f, 'rb', length=size, offset=offset)
fp.READ_BLOCK_SIZE = read_block_size
fp.MAX_TRAILING_SIZE = 0
fps.append(fp)
while total < size:
i = 0
for fp in fps:
(ddata, dlen) = fp.read_block()
data[fp.name] = ddata
while i < read_block_size and (total+i) < size:
diff_same = {}
alt_text = "*" + " " * 6
self._build_block("%.08X " % (total + i + offset))
# For each byte in this block, is the byte the same in all files, the same in some files, or different in all files?
for j in range(0, block):
byte_list = []
try:
c = data[files[0]][j+i]
except:
c = None
for f in files:
try:
c = data[f][j+i]
except Exception, e:
c = None
if c not in byte_list:
byte_list.append(c)
if len(byte_list) == 1:
diff_same[j] = self.ALL_SAME
elif len(byte_list) == len(files):
diff_same[j] = self.ALL_DIFF
else:
diff_same[j] = self.SOME_DIFF
for index in range(0, len(files)):
if show_first_only and index > 0:
break
f = files[index]
alt_text += " " * (3 + (3 * block) + 3 + block + 3)
alt_text += delim
for j in range(0, block):
try:
#print "%s[%d]" % (f, j+i)
self._build_block("%.2X " % ord(data[f][j+i]), highlight=diff_same[j])
except Exception, e:
#print str(e)
self._build_block(" ")
if (j+1) == block:
self._build_block(" |")
for k in range(0, block):
try:
if data[f][k+i] in string.printable and data[f][k+i] not in string.whitespace:
self._build_block(data[f][k+i], highlight=diff_same[k])
else:
self._build_block('.', highlight=diff_same[k])
except:
self._build_block(' ')
if index == len(files)-1 or (show_first_only and index == 0):
self._build_block("|\n")
else:
self._build_block('| %s ' % delim)
if self._print_block_hex(alt_text=alt_text[:-1].strip()):
if delim == '\\':
delim = '/'
else:
delim = '\\'
i += block
total += read_block_size
for fp in fps:
fp.close()
self._footer()
if __name__ == "__main__":
HexDiff().display(sys.argv[1:])
# MIPS prologue
# addiu $sp, -XX
# 27 BD FF XX
0 string \377\275\47 MIPSEL instructions, function prologue{offset-adjust:-1}
0 string \47\275\377 MIPS instructions, function prologue
# MIPS epilogue
# jr $ra
0 belong 0x03e00008 MIPS instructions, function epilogue
0 lelong 0x03e00008 MIPSEL instructions, function epilogue
# PowerPC prologue
# mflr r0
0 belong 0x7C0802A6 PowerPC big endian instructions, function prologue
0 lelong 0x7C0802A6 PowerPC little endian instructions, funciton prologue
# PowerPC epilogue
# blr
0 belong 0x4E800020 PowerPC big endian instructions, function epilogue
0 lelong 0x4E800020 PowerPC little endian instructions, function epilogue
# ARM prologue
# STMFD SP!, {XX}
0 beshort 0xE92D ARMEB instructions, function prologue
0 leshort 0xE92D ARM instructions, function prologue{offset-adjust:-2}
# ARM epilogue
# LDMFD SP!, {XX}
0 beshort 0xE8BD ARMEB instructions, function epilogue
0 leshort 0xE8BD ARM instructions, function epilogue{offset-adjust:-2}
# Ubicom32 prologue
# move.4 -4($sp)++, $ra
0 belong 0x02FF6125 Ubicom32 instructions, function prologue
# Ubicom32 epilogues
# calli $ra, 0($ra)
# ret ($sp)4++
0 belong 0xF0A000A0 Ubicom32 instructions, function epilogue
0 belong 0x000022E1 Ubicom32 instructions, function epilogue
# AVR8 prologue
# push r28
# push r29
0 belong 0x93CF93DF AVR8 instructions, function prologue
0 belong 0x93DF93CF AVR8 instructions, function prologue
# AVR32 prologue
# pushm r7,lr
# mov r7,sp
0 string \xEB\xCD\x40\x80\x1A\x97 AVR32 instructions, function prologue
# SPARC eiplogue
# ret
# restore XX
0 string \x81\xC7\xE0\x08\x81\xE8 SPARC instructions, function epilogue
# x86 epilogue
# push ebp
# move ebp, esp
0 string \x55\x89\xE5 Intel x86 instructions, function epilogue
0 belong x Hex: 0x%.8X
#0 string x String: %s
0 lequad x Little Endian Quad: %lld
0 bequad x Big Endian Quad: %lld
0 lelong x Little Endian Long: %d
0 belong x Big Endian Long: %d
0 leshort x Little Endian Short: %d
0 beshort x Big Endian Short: %d
0 ledate x Little Endian Date: %s
0 bedate x Big Endian Date: %s
This source diff could not be displayed because it is too large. You can view the blob instead.
0 string \x1f\x9d\x90 compress'd data, 16 bits
#0 beshort 0x7801 Zlib header, no compression
0 beshort 0x789c Zlib header, default compression
0 beshort 0x78da Zlib header, best compression
0 beshort 0x785e Zlib header, compressed
#!/usr/bin/env python
# Routines to perform Monte Carlo Pi approximation and Chi Squared tests.
# Used for fingerprinting unknown areas of high entropy (e.g., is this block of high entropy data compressed or encrypted?).
# Inspired by people who actually know what they're doing: http://www.fourmilab.ch/random/
import math
class MonteCarloPi(object):
'''
Performs a Monte Carlo Pi approximation.
'''
def __init__(self):
'''
Class constructor.
Returns None.
'''
self.reset()
def reset(self):
'''
Reset state to the beginning.
'''
self.pi = 0
self.error = 0
self.m = 0
self.n = 0
def update(self, data):
'''
Update the pi approximation with new data.
@data - A string of bytes to update (length must be >= 6).
Returns None.
'''
c = 0
dlen = len(data)
while (c+6) < dlen:
# Treat 3 bytes as an x coordinate, the next 3 bytes as a y coordinate.
# Our box is 1x1, so divide by 2^24 to put the x y values inside the box.
x = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
c += 3
y = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
c += 3
# Does the x,y point lie inside the circle inscribed within our box, with diameter == 1?
if ((x**2) + (y**2)) <= 1:
self.m += 1
self.n += 1
def montecarlo(self):
'''
Approximates the value of Pi based on the provided data.
Returns a tuple of (approximated value of pi, percent deviation).
'''
if self.n:
self.pi = (float(self.m) / float(self.n) * 4.0)
if self.pi:
self.error = math.fabs(1.0 - (math.pi / self.pi)) * 100.0
return (self.pi, self.error)
else:
return (0.0, 0.0)
class ChiSquare(object):
'''
Performs a Chi Squared test against the provided data.
'''
IDEAL = 256.0
def __init__(self):
'''
Class constructor.
Returns None.
'''
self.bytes = {}
self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0
self.reset()
def reset(self):
self.xc2 = 0.0
self.byte_count = 0
for key in self.bytes.keys():
self.bytes[key] = 0
def update(self, data):
'''
Updates the current byte counts with new data.
@data - String of bytes to update.
Returns None.
'''
# Count the number of occurances of each byte value
for i in data:
self.bytes[i] += 1
self.byte_count += len(data)
def chisq(self):
'''
Calculate the Chi Square critical value.
Returns the critical value.
'''
expected = self.byte_count / self.IDEAL
if expected:
for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected
return self.xc2
class MathAnalyzer(object):
'''
Class wrapper aroung ChiSquare and MonteCarloPi.
Performs analysis and attempts to interpret the results.
'''
# Data blocks must be in multiples of 6 for the monte carlo pi approximation
BLOCK_SIZE = 32
CHI_CUTOFF = 512
def __init__(self, fp, start, length):
'''
Class constructor.
@fp - A seekable, readable, file object that will be the data source.
@start - The start offset to begin analysis at.
@length - The number of bytes to analyze.
Returns None.
'''
self.fp = fp
self.start = start
self.length = length
def analyze(self):
'''
Perform analysis and interpretation.
Returns a descriptive string containing the results and attempted interpretation.
'''
i = 0
num_error = 0
analyzer_results = []
chi = ChiSquare()
self.fp.seek(self.start)
while i < self.length:
rsize = self.length - i
if rsize > self.BLOCK_SIZE:
rsize = self.BLOCK_SIZE
chi.reset()
chi.update(self.fp.read(rsize))
if chi.chisq() >= self.CHI_CUTOFF:
num_error += 1
i += rsize
if num_error > 0:
verdict = 'Low/medium entropy data block'
else:
verdict = 'High entropy data block'
result = '%s, %d low entropy blocks' % (verdict, num_error)
return result
if __name__ == "__main__":
import sys
rsize = 0
largest = (0, 0)
num_error = 0
data = open(sys.argv[1], 'rb').read()
try:
block_size = int(sys.argv[2], 0)
except:
block_size = 32
chi = ChiSquare()
while rsize < len(data):
chi.reset()
d = data[rsize:rsize+block_size]
if d < block_size:
break
chi.update(d)
if chi.chisq() >= 512:
sys.stderr.write("0x%X -> %d\n" % (rsize, chi.xc2))
num_error += 1
if chi.xc2 >= largest[1]:
largest = (rsize, chi.xc2)
rsize += block_size
sys.stderr.write("Number of deviations: %d\n" % num_error)
sys.stderr.write("Largest deviation: %d at offset 0x%X\n" % (largest[1], largest[0]))
print "Data:",
if num_error != 0:
print "Compressed"
else:
print "Encrypted"
print "Confidence:",
if num_error >= 5 or num_error == 0:
print "High"
elif num_error in [3,4]:
print "Medium"
else:
print "Low"
import re
import os.path
import tempfile
from common import str2int
class MagicParser:
'''
Class for loading, parsing and creating libmagic-compatible magic files.
This class is primarily used internally by the Binwalk class, and a class instance of it is available via the Binwalk.parser object.
One useful method however, is file_from_string(), which will generate a temporary magic file from a given signature string:
import binwalk
bw = binwalk.Binwalk()
# Create a temporary magic file that contains a single entry with a signature of '\\x00FOOBAR\\xFF', and append the resulting
# temporary file name to the list of magic files in the Binwalk class instance.
bw.magic_files.append(bw.parser.file_from_string('\\x00FOOBAR\\xFF', display_name='My custom signature'))
bw.scan('firmware.bin')
All magic files generated by this class will be deleted when the class deconstructor is called.
'''
BIG_ENDIAN = 'big'
LITTLE_ENDIAN = 'little'
MAGIC_STRING_FORMAT = "%d\tstring\t%s\t%s\n"
DEFAULT_DISPLAY_NAME = "Raw string signature"
WILDCARD = 'x'
# If libmagic returns multiple results, they are delimited with this string.
RESULT_SEPERATOR = "\\012- "
def __init__(self, filter=None, smart=None):
'''
Class constructor.
@filter - Instance of the MagicFilter class. May be None if the parse/parse_file methods are not used.
@smart - Instance of the SmartSignature class. May be None if the parse/parse_file methods are not used.
Returns None.
'''
self.matches = set([])
self.signatures = {}
self.filter = filter
self.smart = smart
self.raw_fd = None
self.signature_count = 0
self.fd = tempfile.NamedTemporaryFile()
def __del__(self):
try:
self.cleanup()
except:
pass
def rm_magic_file(self):
'''
Cleans up the temporary magic file generated by self.parse.
Returns None.
'''
try:
self.fd.close()
except:
pass
def cleanup(self):
'''
Cleans up any tempfiles created by the class instance.
Returns None.
'''
self.rm_magic_file()
try:
self.raw_fd.close()
except:
pass
def file_from_string(self, signature_string, offset=0, display_name=DEFAULT_DISPLAY_NAME):
'''
Generates a magic file from a signature string.
This method is intended to be used once per instance.
If invoked multiple times, any previously created magic files will be closed and deleted.
@signature_string - The string signature to search for.
@offset - The offset at which the signature should occur.
@display_name - The text to display when the signature is found.
Returns the name of the generated temporary magic file.
'''
self.raw_fd = tempfile.NamedTemporaryFile()
self.raw_fd.write(self.MAGIC_STRING_FORMAT % (offset, signature_string, display_name))
self.raw_fd.seek(0)
return self.raw_fd.name
def parse(self, file_name):
'''
Parses magic file(s) and contatenates them into a single temporary magic file
while simultaneously removing filtered signatures.
@file_name - Magic file, or list of magic files, to parse.
Returns the name of the generated temporary magic file, which will be automatically
deleted when the class deconstructor is called.
'''
if isinstance(file_name, type([])):
files = file_name
else:
files = [file_name]
for fname in files:
if os.path.exists(fname):
self.parse_file(fname)
else:
sys.stdout.write("WARNING: Magic file '%s' does not exist!\n" % fname)
self.fd.seek(0)
return self.fd.name
def parse_file(self, file_name):
'''
Parses a magic file and appends valid signatures to the temporary magic file, as allowed
by the existing filter rules.
@file_name - Magic file to parse.
Returns None.
'''
# Default to not including signature entries until we've
# found what looks like a valid entry.
include = False
line_count = 0
try:
for line in open(file_name).readlines():
line_count += 1
# Check if this is the first line of a signature entry
entry = self._parse_line(line)
if entry is not None:
# If this signature is marked for inclusion, include it.
if self.filter.filter(entry['description']) == self.filter.FILTER_INCLUDE:
include = True
self.signature_count += 1
if not self.signatures.has_key(entry['offset']):
self.signatures[entry['offset']] = []
if entry['condition'] not in self.signatures[entry['offset']]:
self.signatures[entry['offset']].append(entry['condition'])
else:
include = False
# Keep writing lines of the signature to the temporary magic file until
# we detect a signature that should not be included.
if include:
self.fd.write(line)
self.build_signature_set()
except Exception, e:
raise Exception("Error parsing magic file '%s' on line %d: %s" % (file_name, line_count, str(e)))
def _parse_line(self, line):
'''
Parses a signature line into its four parts (offset, type, condition and description),
looking for the first line of a given signature.
@line - The signature line to parse.
Returns a dictionary with the respective line parts populated if the line is the first of a signature.
Returns a dictionary with all parts set to None if the line is not the first of a signature.
'''
entry = {
'offset' : '',
'type' : '',
'condition' : '',
'description' : '',
'length' : 0
}
# Quick and dirty pre-filter. We are only concerned with the first line of a
# signature, which will always start with a number. Make sure the first byte of
# the line is a number; if not, don't process.
if line[:1] < '0' or line[:1] > '9':
return None
try:
# Split the line into white-space separated parts.
# For this to work properly, replace escaped spaces ('\ ') with '\x20'.
# This means the same thing, but doesn't confuse split().
line_parts = line.replace('\\ ', '\\x20').split()
entry['offset'] = line_parts[0]
entry['type'] = line_parts[1]
# The condition line may contain escaped sequences, so be sure to decode it properly.
entry['condition'] = line_parts[2].decode('string_escape')
entry['description'] = ' '.join(line_parts[3:])
except Exception, e:
raise Exception("%s :: %s", (str(e), line))
# We've already verified that the first character in this line is a number, so this *shouldn't*
# throw an exception, but let's catch it just in case...
try:
entry['offset'] = str2int(entry['offset'])
except Exception, e:
raise Exception("%s :: %s", (str(e), line))
# If this is a string, get the length of the string
if 'string' in entry['type'] or entry['condition'] == self.WILDCARD:
entry['length'] = len(entry['condition'])
# Else, we need to jump through a few more hoops...
else:
# Default to little endian, unless the type field starts with 'be'.
# This assumes that we're running on a little endian system...
if entry['type'].startswith('be'):
endianess = self.BIG_ENDIAN
else:
endianess = self.LITTLE_ENDIAN
# Try to convert the condition to an integer. This does not allow
# for more advanced conditions for the first line of a signature,
# but needing that is rare.
try:
intval = str2int(entry['condition'].strip('L'))
except Exception, e:
raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e)))
# How long is the field type?
if entry['type'] == 'byte':
entry['length'] = 1
elif 'short' in entry['type']:
entry['length'] = 2
elif 'long' in entry['type']:
entry['length'] = 4
elif 'quad' in entry['type']:
entry['length'] = 8
# Convert the integer value to a string of the appropriate endianess
entry['condition'] = self._to_string(intval, entry['length'], endianess)
return entry
def build_signature_set(self):
'''
Builds a list of signature tuples.
Returns a list of tuples in the format: [(<signature offset>, [signature regex])].
'''
signature_set = []
for (offset, sigs) in self.signatures.iteritems():
for sig in sigs:
if sig == self.WILDCARD:
sig = re.compile('.')
else:
sig = re.compile(re.escape(sig))
signature_set.append(sig)
self.signature_set = set(signature_set)
return self.signature_set
def find_signature_candidates(self, data, end):
'''
Finds candidate signatures inside of the data buffer.
Called internally by Binwalk.single_scan.
@data - Data to scan for candidate signatures.
@end - Don't look for signatures beyond this offset.
Returns an ordered list of offsets inside of data at which candidate offsets were found.
'''
candidate_offsets = []
for regex in self.signature_set:
candidate_offsets += [match.start() for match in regex.finditer(data) if match.start() < end]
candidate_offsets = list(set(candidate_offsets))
candidate_offsets.sort()
return candidate_offsets
def _to_string(self, value, size, endianess):
'''
Converts an integer value into a raw string.
@value - The integer value to convert.
@size - Size, in bytes, of the integer value.
@endianess - One of self.LITTLE_ENDIAN | self.BIG_ENDIAN.
Returns a raw string containing value.
'''
data = ""
for i in range(0, size):
data += chr((value >> (8*i)) & 0xFF)
if endianess != self.LITTLE_ENDIAN:
data = data[::-1]
return data
def split(self, data):
'''
Splits multiple libmagic results in the data string into a list of separate results.
@data - Data string returned from libmagic.
Returns a list of result strings.
'''
try:
return data.split(self.RESULT_SEPERATOR)
except:
return []
import os
import sys
import imp
# Valid return values for plugins
PLUGIN_CONTINUE = 0x00
PLUGIN_NO_EXTRACT = 0x01
PLUGIN_NO_DISPLAY = 0x02
PLUGIN_STOP_PLUGINS = 0x04
PLUGIN_TERMINATE = 0x08
class Plugins:
'''
Class to load and call plugin callback functions, handled automatically by Binwalk.scan / Binwalk.single_scan.
An instance of this class is available during a scan via the Binwalk.plugins object.
Each plugin must be placed in the user or system plugins directories, and must define a class named 'Plugin'.
The Plugin class constructor (__init__) is passed one argument, which is the current instance of the Binwalk class.
The Plugin class constructor is called once prior to scanning a file or set of files.
The Plugin class destructor (__del__) is called once after scanning all files.
The Plugin class can define one or all of the following callback methods:
o pre_scan(self, fd)
This method is called prior to running a scan against a file. It is passed the file object of
the file about to be scanned.
o pre_parser(self, result)
This method is called every time any result - valid or invalid - is found in the file being scanned.
It is passed a dictionary with one key ('description'), which contains the raw string returned by libmagic.
The contents of this dictionary key may be modified as necessary by the plugin.
o callback(self, results)
This method is called every time a valid result is found in the file being scanned. It is passed a
dictionary of results. This dictionary is identical to that passed to Binwalk.single_scan's callback
function, and its contents may be modified as necessary by the plugin.
o post_scan(self, fd)
This method is called after running a scan against a file, but before the file has been closed.
It is passed the file object of the scanned file.
Valid return values for all plugin callbacks are (PLUGIN_* values may be OR'd together):
PLUGIN_CONTINUE - Do nothing, continue the scan normally.
PLUGIN_NO_EXTRACT - Do not preform data extraction.
PLUGIN_NO_DISPLAY - Ignore the result(s); they will not be displayed or further processed.
PLUGIN_STOP_PLUGINS - Do not call any other plugins.
PLUGIN_TERMINATE - Terminate the scan.
None - The same as PLUGIN_CONTINUE.
Values returned by pre_scan affect all results during the scan of that particular file.
Values returned by callback affect only that specific scan result.
Values returned by post_scan are ignored since the scan of that file has already been completed.
By default, all plugins are loaded during binwalk signature scans. Plugins that wish to be disabled by
default may create a class variable named 'ENABLED' and set it to False. If ENABLED is set to False, the
plugin will only be loaded if it is explicitly named in the plugins whitelist.
Simple example plugin:
from binwalk.plugins import *
class Plugin:
# Set to False to have this plugin disabled by default.
ENABLED = True
def __init__(self, binwalk):
self.binwalk = binwalk
print 'Scanning initialized!'
def __del__(self):
print 'Scanning complete!'
def pre_scan(self, fd):
print 'About to scan', fd.name
return PLUGIN_CONTINUE
def callback(self, results):
print 'Got a result:', results['description']
return PLUGIN_CONTINUE
def post_scan(self, fd):
print 'Done scanning', fd.name
return PLUGIN_CONTINUE
'''
CALLBACK = 'callback'
PRESCAN = 'pre_scan'
POSTSCAN = 'post_scan'
PREPARSER = 'pre_parser'
PLUGIN = 'Plugin'
MODULE_EXTENSION = '.py'
def __init__(self, binwalk, whitelist=[], blacklist=[]):
self.binwalk = binwalk
self.callback = []
self.pre_scan = []
self.pre_parser = []
self.post_scan = []
self.whitelist = whitelist
self.blacklist = blacklist
def __del__(self):
self._cleanup()
def __exit__(self, t, v, traceback):
self._cleanup()
def _cleanup(self):
try:
del self.binwalk
except:
pass
def _call_plugins(self, callback_list, arg):
retval = PLUGIN_CONTINUE
for callback in callback_list:
if (retval & PLUGIN_STOP_PLUGINS):
break
try:
val = callback(arg)
if val is not None:
retval |= val
except Exception, e:
sys.stderr.write("WARNING: %s.%s failed: %s\n" % (str(callback.im_class), callback.__name__, str(e)))
return retval
def list_plugins(self):
'''
Obtain a list of all user and system plugin modules.
Returns a dictionary of:
{
'user' : {
'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory"
},
'system' : {
'modules' : [list, of, module, names],
'descriptions' : {'module_name' : 'module pydoc string'},
'enabled' : {'module_name' : True},
'path' : "path/to/module/plugin/directory"
}
}
'''
plugins = {
'user' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
},
'system' : {
'modules' : [],
'descriptions' : {},
'enabled' : {},
'path' : None,
}
}
for key in plugins.keys():
plugins[key]['path'] = self.binwalk.config.paths[key][self.binwalk.config.PLUGINS]
for file_name in os.listdir(plugins[key]['path']):
if file_name.endswith(self.MODULE_EXTENSION):
module = file_name[:-len(self.MODULE_EXTENSION)]
if module in self.blacklist:
continue
else:
plugin = imp.load_source(module, os.path.join(plugins[key]['path'], file_name))
plugin_class = getattr(plugin, self.PLUGIN)
try:
enabled = plugin_class.ENABLED
except:
enabled = True
plugins[key]['enabled'][module] = enabled
plugins[key]['modules'].append(module)
try:
plugins[key]['descriptions'][module] = plugin_class.__doc__.strip().split('\n')[0]
except:
plugins[key]['descriptions'][module] = 'No description'
return plugins
def _load_plugins(self):
plugins = self.list_plugins()
self._load_plugin_modules(plugins['user'])
self._load_plugin_modules(plugins['system'])
def _load_plugin_modules(self, plugins):
for module in plugins['modules']:
file_path = os.path.join(plugins['path'], module + self.MODULE_EXTENSION)
try:
plugin = imp.load_source(module, file_path)
plugin_class = getattr(plugin, self.PLUGIN)
try:
# If this plugin is disabled by default and has not been explicitly white listed, ignore it
if plugin_class.ENABLED == False and module not in self.whitelist:
continue
except:
pass
class_instance = plugin_class(self.binwalk)
try:
self.callback.append(getattr(class_instance, self.CALLBACK))
except:
pass
try:
self.pre_scan.append(getattr(class_instance, self.PRESCAN))
except:
pass
try:
self.pre_parser.append(getattr(class_instance, self.PREPARSER))
except:
pass
try:
self.post_scan.append(getattr(class_instance, self.POSTSCAN))
except:
pass
except Exception, e:
sys.stderr.write("WARNING: Failed to load plugin module '%s': %s\n" % (module, str(e)))
def _pre_scan_callbacks(self, fd):
return self._call_plugins(self.pre_scan, fd)
def _post_scan_callbacks(self, fd):
return self._call_plugins(self.post_scan, fd)
def _scan_callbacks(self, results):
return self._call_plugins(self.callback, results)
def _scan_pre_parser_callbacks(self, results):
return self._call_plugins(self.pre_parser, results)
from binwalk.plugins import *
class Plugin:
'''
Validates ARM instructions during opcode scans.
'''
BITMASK = 0x83FF
BITMASK_SIZE = 2
def __init__(self, binwalk):
self.fd = None
if binwalk.scan_type == binwalk.BINARCH:
self.enabled = True
else:
self.enabled = False
def pre_scan(self, fd):
if self.enabled:
self.fd = open(fd.name, 'rb')
def callback(self, results):
if self.fd:
data = ''
try:
if results['description'].startswith('ARM instruction'):
self.fd.seek(results['offset'])
data = self.fd.read(self.BITMASK_SIZE)
data = data[1] + data[0]
elif results['description'].startswith('ARMEB instruction'):
self.fd.seek(results['offset']+self.BITMASK_SIZE)
data = self.fd.read(self.BITMASK_SIZE)
if data:
registers = int(data.encode('hex'), 16)
if (registers & self.BITMASK) != registers:
return PLUGIN_NO_DISPLAY
except:
pass
def post_scan(self, fd):
try:
self.fd.close()
except:
pass
import ctypes
import ctypes.util
from binwalk.plugins import *
class Plugin:
'''
Searches for and validates compress'd data.
'''
ENABLED = True
READ_SIZE = 64
def __init__(self, binwalk):
self.fd = None
self.comp = None
self.binwalk = binwalk
if binwalk.scan_type == binwalk.BINWALK:
self.comp = ctypes.cdll.LoadLibrary(ctypes.util.find_library("compress42"))
binwalk.magic_files.append(binwalk.config.find_magic_file('compressd'))
def __del__(self):
try:
self.fd.close()
except:
pass
def pre_scan(self, fd):
try:
if self.comp:
self.fd = open(fd.name, 'rb')
except:
pass
def callback(self, results):
if self.fd and results['description'].lower().startswith("compress'd data"):
self.fd.seek(results['offset'])
compressed_data = self.fd.read(self.READ_SIZE)
if not self.comp.is_compressed(compressed_data, len(compressed_data)):
return (PLUGIN_NO_DISPLAY | PLUGIN_NO_EXTRACT)
from binwalk.plugins import *
class Plugin:
'''
Ensures that ASCII CPIO archive entries only get extracted once.
'''
def __init__(self, binwalk):
self.binwalk = binwalk
self.found_archive = False
def pre_scan(self, fd):
# Be sure to re-set this at the beginning of every scan
self.found_archive = False
def callback(self, results):
if self.binwalk.extractor.enabled and self.binwalk.scan_type == self.binwalk.BINWALK:
# ASCII CPIO archives consist of multiple entries, ending with an entry named 'TRAILER!!!'.
# Displaying each entry is useful, as it shows what files are contained in the archive,
# but we only want to extract the archive when the first entry is found.
if results['description'].startswith('ASCII cpio archive'):
if not self.found_archive:
# This is the first entry. Set found_archive and allow the scan to continue normally.
self.found_archive = True
return PLUGIN_CONTINUE
elif 'TRAILER!!!' in results['description']:
# This is the last entry, un-set found_archive.
self.found_archive = False
# The first entry has already been found and this is the last entry, or the last entry
# has not yet been found. Don't extract.
return PLUGIN_NO_EXTRACT
# Allow all other results to continue normally.
return PLUGIN_CONTINUE
#!/usr/bin/env python
import ctypes
import ctypes.util
from binwalk.plugins import *
from binwalk.common import BlockFile
class Plugin:
'''
Searches for raw deflate compression streams.
'''
ENABLED = False
SIZE = 64*1024
DESCRIPTION = "Deflate compressed data stream"
def __init__(self, binwalk):
self.binwalk = binwalk
# The tinfl library is built and installed with binwalk
self.tinfl = ctypes.cdll.LoadLibrary(ctypes.util.find_library("tinfl"))
if self.binwalk.extractor.enabled:
# TODO: Add python extractor rule
pass
def pre_scan(self, fp):
self._deflate_scan(fp)
return PLUGIN_TERMINATE
def _extractor(self, file_name):
processed = 0
inflated_data = ''
fd = BlockFile(file_name, 'rb')
fd.READ_BLOCK_SIZE = self.SIZE
while processed < fd.length:
(data, dlen) = fd.read_block()
inflated_block = self.tinfl.inflate_block(data, dlen)
if inflated_block:
inflated_data += inflated_block
else:
break
processed += dlen
fd.close()
print "%s inflated to %d bytes" % (file_name, len(inflated_data))
def _deflate_scan(self, fp):
fp.MAX_TRAILING_SIZE = self.SIZE
# Set these so that the progress report reflects the current scan status
self.binwalk.scan_length = fp.length
self.binwalk.total_scanned = 0
while self.binwalk.total_scanned < self.binwalk.scan_length:
current_total = self.binwalk.total_scanned
(data, dlen) = fp.read_block()
if not data or dlen == 0:
break
for i in range(0, dlen):
if self.tinfl.is_deflated(data[i:], dlen-i, 0):
loc = fp.offset + current_total + i
# Update total_scanned here for immediate progress feedback
self.binwalk.total_scanned = current_total + i
self.binwalk.display.easy_results(loc, self.DESCRIPTION)
if (current_total + i) > self.binwalk.scan_length:
break
# Set total_scanned here in case no data streams were identified
self.binwalk.total_scanned = current_total + dlen
import os
import shutil
from binwalk.common import BlockFile
class Plugin:
'''
Finds and extracts modified LZMA files commonly found in cable modems.
Based on Bernardo Rodrigues' work: http://w00tsec.blogspot.com/2013/11/unpacking-firmware-images-from-cable.html
'''
ENABLED = True
FAKE_LZMA_SIZE = "\x00\x00\x00\x10\x00\x00\x00\x00"
SIGNATURE = "lzma compressed data"
def __init__(self, binwalk):
self.binwalk = binwalk
self.original_cmd = ''
if self.binwalk.extractor.enabled:
# Replace the existing LZMA extraction command with our own
rules = self.binwalk.extractor.get_rules()
for i in range(0, len(rules)):
if rules[i]['regex'].match(self.SIGNATURE):
self.original_cmd = rules[i]['cmd']
rules[i]['cmd'] = self.lzma_cable_extractor
break
def lzma_cable_extractor(self, fname):
# Try extracting the LZMA file without modification first
if not self.binwalk.extractor.execute(self.original_cmd, fname):
out_name = os.path.splitext(fname)[0] + '-patched' + os.path.splitext(fname)[1]
fp_out = open(out_name, 'wb')
fp_in = BlockFile(fname)
fp_in.MAX_TRAILING_SIZE = 0
i = 0
while i < fp_in.length:
(data, dlen) = fp_in.read_block()
if i == 0:
fp_out.write(data[0:5] + self.FAKE_LZMA_SIZE + data[5:])
else:
fp_out.write(data)
i += dlen
fp_in.close()
fp_out.close()
# Overwrite the original file so that it can be cleaned up if -r was specified
shutil.move(out_name, fname)
self.binwalk.extractor.execute(self.original_cmd, fname)
def pre_parser(self, result):
# The modified cable modem LZMA headers all have valid dictionary sizes and a properties byte of 0x5D.
if result['description'].lower().startswith(self.SIGNATURE) and "invalid uncompressed size" in result['description']:
if "properties: 0x5D" in result['description'] and "invalid dictionary size" not in result['description']:
result['invalid'] = False
result['description'] = result['description'].split("invalid uncompressed size")[0] + "missing uncompressed size"
class Plugin:
'''
Modifies string analysis output to mimic that of the Unix strings utility.
'''
ENABLED = False
def __init__(self, binwalk):
self.modify_output = False
if binwalk.scan_type == binwalk.STRINGS:
binwalk.display.quiet = True
self.modify_output = True
def callback(self, results):
if self.modify_output:
try:
print results['description']
except Exception, e:
pass
import ctypes
import ctypes.util
from binwalk.plugins import *
class Plugin:
'''
Searches for and validates zlib compressed data.
'''
MAX_DATA_SIZE = 33 * 1024
def __init__(self, binwalk):
self.fd = None
self.tinfl = None
if binwalk.scan_type == binwalk.BINWALK:
# Add the zlib file to the list of magic files
binwalk.magic_files.append(binwalk.config.find_magic_file('zlib'))
# Load libtinfl.so
self.tinfl = ctypes.cdll.LoadLibrary(ctypes.util.find_library('tinfl'))
def pre_scan(self, fd):
if self.tinfl:
self.fd = open(fd.name, 'rb')
def callback(self, result):
# If this result is a zlib signature match, try to decompress the data
if self.fd and result['description'].lower().startswith('zlib'):
# Seek to and read the suspected zlib data
self.fd.seek(result['offset'])
data = self.fd.read(self.MAX_DATA_SIZE)
# Check if this is valid zlib data
if not self.tinfl.is_deflated(data, len(data), 1):
return (PLUGIN_NO_DISPLAY | PLUGIN_NO_EXTRACT)
return PLUGIN_CONTINUE
def post_scan(self, fd):
if self.fd:
self.fd.close()
import sys
import hashlib
import csv as pycsv
from datetime import datetime
class PrettyPrint:
'''
Class for printing binwalk results to screen/log files.
An instance of PrettyPrint is available via the Binwalk.display object.
The PrettyPrint.results() method is of particular interest, as it is suitable for use as a Binwalk.scan() callback function,
and can be used to print Binwalk.scan() results to stdout, a log file, or both.
Useful class objects:
self.fp - The log file's file object.
self.quiet - If set to True, all output to stdout is supressed.
self.verbose - If set to True, verbose output is enabled.
self.csv - If set to True, data will be saved to the log file in CSV format.
self.format_to_screen - If set to True, output data will be formatted to fit into the current screen width.
Example usage:
import binwalk
bw = binwalk.Binwalk()
bw.display.header()
bw.single_scan('firmware.bin', callback=bw.display.results)
bw.display.footer()
'''
HEADER_WIDTH = 115
BUFFER_WIDTH = 32
MAX_LINE_LEN = 0
DEFAULT_DESCRIPTION_HEADER = "DESCRIPTION"
def __init__(self, binwalk, log=None, csv=False, quiet=False, verbose=0, format_to_screen=False):
'''
Class constructor.
@binwalk - An instance of the Binwalk class.
@log - Output log file.
@csv - If True, save data to log file in CSV format.
@quiet - If True, results will not be displayed to screen.
@verbose - If set to True, target file information will be displayed when file_info() is called.
@format_to_screen - If set to True, format the output data to fit into the current screen width.
Returns None.
'''
self.binwalk = binwalk
self.fp = None
self.log = log
self.csv = None
self.log_csv = csv
self.quiet = quiet
self.verbose = verbose
self.format_to_screen = format_to_screen
if self.format_to_screen:
self.enable_formatting(True)
if self.log is not None:
self.fp = open(log, "w")
if self.log_csv:
self.enable_csv()
def __del__(self):
'''
Class deconstructor.
'''
self.cleanup()
def __exit__(self, t, v, traceback):
self.cleanup()
def cleanup(self):
'''
Clean up any open file descriptors.
'''
try:
self.fp.close()
except:
pass
self.fp = None
def _log(self, data):
'''
Log data to the log file.
'''
if self.fp is not None:
if self.log_csv and self.csv:
data = data.replace('\n', ' ')
while ' ' in data:
data = data.replace(' ', ' ')
data_parts = data.split(None, 2)
if len(data_parts) == 3:
for i in range(0, len(data_parts)):
data_parts[i] = data_parts[i].strip()
self.csv.writerow(data_parts)
else:
self.fp.write(data)
def _pprint(self, data):
'''
Print data to stdout and the log file.
'''
if not self.quiet:
sys.stdout.write(data)
self._log(data)
def _file_md5(self, file_name):
'''
Generate an MD5 hash of the specified file.
'''
md5 = hashlib.md5()
with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(128*md5.block_size), b''):
md5.update(chunk)
return md5.hexdigest()
def _append_to_data_parts(self, data, start, end):
'''
Intelligently appends data to self.string_parts.
For use by self._format.
'''
try:
while data[start] == ' ':
start += 1
if start == end:
end = len(data[start:])
self.string_parts.append(data[start:end])
except:
try:
self.string_parts.append(data[start:])
except:
pass
return start
def _format(self, data):
'''
Formats a line of text to fit in the terminal window.
For Tim.
'''
offset = 0
space_offset = 0
self.string_parts = []
delim = '\n' + ' ' * self.BUFFER_WIDTH
if self.format_to_screen:
while len(data[offset:]) > self.MAX_LINE_LEN:
space_offset = data[offset:offset+self.MAX_LINE_LEN].rfind(' ')
if space_offset == -1 or space_offset == 0:
space_offset = self.MAX_LINE_LEN
self._append_to_data_parts(data, offset, offset+space_offset)
offset += space_offset
self._append_to_data_parts(data, offset, offset+len(data[offset:]))
return delim.join(self.string_parts)
def enable_csv(self):
'''
Enables CSV formatting to log file.
'''
self.log_csv = True
self.csv = pycsv.writer(self.fp)
def enable_formatting(self, tf):
'''
Enables output formatting, which fits output to the current terminal width.
@tf - If True, enable formatting. If False, disable formatting.
Returns None.
'''
self.format_to_screen = tf
if self.format_to_screen:
try:
import fcntl
import struct
import termios
# Get the terminal window width
hw = struct.unpack('hh', fcntl.ioctl(1, termios.TIOCGWINSZ, '1234'))
self.HEADER_WIDTH = hw[1]
except Exception, e:
pass
self.MAX_LINE_LEN = self.HEADER_WIDTH - self.BUFFER_WIDTH
def file_info(self, file_name):
'''
Prints detailed info about the specified file, including file name, scan time and the file's MD5 sum.
Called internally by self.header if self.verbose is not 0.
@file_name - The path to the target file.
@binwalk - Binwalk class instance.
Returns None.
'''
self._pprint("\n")
self._pprint("Scan Time: %s\n" % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
self._pprint("Signatures: %d\n" % self.binwalk.parser.signature_count)
self._pprint("Target File: %s\n" % file_name)
self._pprint("MD5 Checksum: %s\n" % self._file_md5(file_name))
def header(self, file_name=None, header=None, description=DEFAULT_DESCRIPTION_HEADER):
'''
Prints the binwalk header, typically used just before starting a scan.
@file_name - If specified, and if self.verbose > 0, then detailed file info will be included in the header.
@header - If specified, this is a custom header to display at the top of the output.
@description - The description header text to display (default: "DESCRIPTION")
Returns None.
'''
if self.verbose and file_name is not None:
self.file_info(file_name)
self._pprint("\n")
if not header:
self._pprint("DECIMAL \tHEX \t%s\n" % description)
else:
self._pprint(header + "\n")
self._pprint("-" * self.HEADER_WIDTH + "\n")
def footer(self, bwalk=None, file_name=None):
'''
Prints the binwalk footer, typically used just after completing a scan.
Returns None.
'''
self._pprint("\n")
def results(self, offset, results, formatted=False):
'''
Prints the results of a scan. Suitable for use as a callback function for Binwalk.scan().
@offset - The offset at which the results were found.
@results - A list of libmagic result strings.
@formatted - Set to True if the result description has already been formatted properly.
Returns None.
'''
offset_printed = False
for info in results:
# Check for any grep filters before printing
if self.binwalk.filter.grep(info['description']):
if not formatted:
# Only display the offset once per list of results
if not offset_printed:
self._pprint("%-10d\t0x%-8X\t%s\n" % (offset, offset, self._format(info['description'])))
offset_printed = True
else:
self._pprint("%s\t %s\t%s\n" % (' '*10, ' '*8, self._format(info['description'])))
else:
self._pprint(info['description'])
def easy_results(self, offset, description):
'''
Simpler wrapper around prettyprint.results.
@offset - The offset at which the result was found.
@description - Description string to display.
Returns None.
'''
results = {
'offset' : offset,
'description' : description,
}
return self.results(offset, [results])
import re
from common import str2int, get_quoted_strings
class SmartSignature:
'''
Class for parsing smart signature tags in libmagic result strings.
This class is intended for internal use only, but a list of supported 'smart keywords' that may be used
in magic files is available via the SmartSignature.KEYWORDS dictionary:
from binwalk import SmartSignature
for (i, keyword) in SmartSignature().KEYWORDS.iteritems():
print keyword
'''
KEYWORD_DELIM_START = "{"
KEYWORD_DELIM_END = "}"
KEYWORDS = {
'jump' : '%sjump-to-offset:' % KEYWORD_DELIM_START,
'filename' : '%sfile-name:' % KEYWORD_DELIM_START,
'filesize' : '%sfile-size:' % KEYWORD_DELIM_START,
'raw-string' : '%sraw-string:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block
'raw-size' : '%sraw-string-length:' % KEYWORD_DELIM_START,
'adjust' : '%soffset-adjust:' % KEYWORD_DELIM_START,
'delay' : '%sextract-delay:' % KEYWORD_DELIM_START,
'year' : '%syear:' % KEYWORD_DELIM_START,
'epoch' : '%sepoch:' % KEYWORD_DELIM_START,
'raw-replace' : '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
'one-of-many' : '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
}
def __init__(self, filter, ignore_smart_signatures=False):
'''
Class constructor.
@filter - Instance of the MagicFilter class.
@ignore_smart_signatures - Set to True to ignore smart signature keywords.
Returns None.
'''
self.filter = filter
self.invalid = False
self.last_one_of_many = None
self.ignore_smart_signatures = ignore_smart_signatures
def parse(self, data):
'''
Parse a given data string for smart signature keywords. If any are found, interpret them and strip them.
@data - String to parse, as returned by libmagic.
Returns a dictionary of parsed values.
'''
results = {
'offset' : '', # Offset where the match was found, filled in by Binwalk.single_scan.
'description' : '', # The libmagic data string, stripped of all keywords
'name' : '', # The original name of the file, if known
'delay' : '', # Extract delay description
'extract' : '', # Name of the extracted file, filled in by Binwalk.single_scan.
'jump' : 0, # The relative offset to resume the scan from
'size' : 0, # The size of the file, if known
'adjust' : 0, # The relative offset to add to the reported offset
'year' : 0, # The file's creation/modification year, if reported in the signature
'epoch' : 0, # The file's creation/modification epoch time, if reported in the signature
'invalid' : False, # Set to True if parsed numerical values appear invalid
}
self.invalid = False
# If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious),
# don't parse anything, just return the raw data as the description.
if self.ignore_smart_signatures or not self._is_valid(data):
results['description'] = data
else:
# Parse the offset-adjust value. This is used to adjust the reported offset at which
# a signature was located due to the fact that MagicParser.match expects all signatures
# to be located at offset 0, which some wil not be.
results['adjust'] = self._get_math_arg(data, 'adjust')
# Parse the file-size value. This is used to determine how many bytes should be extracted
# when extraction is enabled. If not specified, everything to the end of the file will be
# extracted (see Binwalk.scan).
try:
results['size'] = str2int(self._get_keyword_arg(data, 'filesize'))
except:
pass
try:
results['year'] = str2int(self._get_keyword_arg(data, 'year'))
except:
pass
try:
results['epoch'] = str2int(self._get_keyword_arg(data, 'epoch'))
except:
pass
results['delay'] = self._get_keyword_arg(data, 'delay')
# Parse the string for the jump-to-offset keyword.
# This keyword is honored, even if this string result is one of many.
results['jump'] = self._get_math_arg(data, 'jump')
# If this is one of many, don't do anything and leave description as a blank string.
# Else, strip all keyword tags from the string and process additional keywords as necessary.
if not self._one_of_many(data):
results['name'] = self._get_keyword_arg(data, 'filename').strip('"')
results['description'] = self._strip_tags(data)
results['invalid'] = self.invalid
return results
def _is_valid(self, data):
'''
Validates that result data does not contain smart keywords in file-supplied strings.
@data - Data string to validate.
Returns True if data is OK.
Returns False if data is not OK.
'''
# All strings printed from the target file should be placed in strings, else there is
# no way to distinguish between intended keywords and unintended keywords. Get all the
# quoted strings.
quoted_data = get_quoted_strings(data)
# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
if quoted_data and self.KEYWORD_DELIM_START in quoted_data:
# If so, check to see if the quoted data contains any of our keywords.
# If any keywords are found inside of quoted data, consider the keywords invalid.
for (name, keyword) in self.KEYWORDS.iteritems():
if keyword in quoted_data:
return False
return True
def _one_of_many(self, data):
'''
Determines if a given data string is one result of many.
@data - String result data.
Returns True if the string result is one of many.
Returns False if the string result is not one of many.
'''
if not self.filter.invalid(data):
if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
return True
if self.KEYWORDS['one-of-many'] in data:
# Only match on the data before the first comma, as that is typically unique and static
self.last_one_of_many = data.split(',')[0]
else:
self.last_one_of_many = None
return False
def _get_keyword_arg(self, data, keyword):
'''
Retrieves the argument for keywords that specify arguments.
@data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS.
Returns the argument string value on success.
Returns a blank string on failure.
'''
arg = ''
if self.KEYWORDS.has_key(keyword) and self.KEYWORDS[keyword] in data:
arg = data.split(self.KEYWORDS[keyword])[1].split(self.KEYWORD_DELIM_END)[0]
return arg
def _get_math_arg(self, data, keyword):
'''
Retrieves the argument for keywords that specifiy mathematical expressions as arguments.
@data - String result data, as returned by libmagic.
@keyword - Keyword index in KEYWORDS.
Returns the resulting calculated value.
'''
value = 0
arg = self._get_keyword_arg(data, keyword)
if arg:
for string_int in arg.split('+'):
try:
value += str2int(string_int)
except:
self.invalid = True
return value
def _jump(self, data):
'''
Obtains the jump-to-offset value of a signature, if any.
@data - String result data.
Returns the offset to jump to.
'''
offset = 0
offset_str = self._get_keyword_arg(data, 'jump')
if offset_str:
try:
offset = str2int(offset_str)
except:
pass
return offset
def _parse_raw_strings(self, data):
'''
Process strings that aren't NULL byte terminated, but for which we know the string length.
This should be called prior to any other smart parsing functions.
@data - String to parse.
Returns a parsed string.
'''
if not self.ignore_smart_signatures and self._is_valid(data):
# Get the raw string keyword arg
raw_string = self._get_keyword_arg(data, 'raw-string')
# Was a raw string keyword specified?
if raw_string:
# Get the raw string length arg
raw_size = self._get_keyword_arg(data, 'raw-size')
# Is the raw string length arg is a numeric value?
if re.match('^-?[0-9]+$', raw_size):
# Replace all instances of raw-replace in data with raw_string[:raw_size]
# Also strip out everything after the raw-string keyword, including the keyword itself.
# Failure to do so may (will) result in non-printable characters and this string will be
# marked as invalid when it shouldn't be.
data = data[:data.find(self.KEYWORDS['raw-string'])].replace(self.KEYWORDS['raw-replace'], '"' + raw_string[:str2int(raw_size)] + '"')
return data
def _strip_tags(self, data):
'''
Strips the smart tags from a result string.
@data - String result data.
Returns a sanitized string.
'''
if not self.ignore_smart_signatures:
for (name, keyword) in self.KEYWORDS.iteritems():
start = data.find(keyword)
if start != -1:
end = data[start:].find(self.KEYWORD_DELIM_END)
if end != -1:
data = data.replace(data[start:start+end+1], "")
return data
import os
import urllib2
from config import *
class Update:
'''
Class for updating binwalk configuration and signatures files from the subversion trunk.
Example usage:
from binwalk import Update
Update().update()
'''
BASE_URL = "http://binwalk.googlecode.com/svn/trunk/src/binwalk/"
MAGIC_PREFIX = "magic/"
CONFIG_PREFIX = "config/"
def __init__(self):
'''
Class constructor.
'''
self.config = Config()
def update(self):
'''
Updates all system wide signatures and config files.
Returns None.
'''
self.update_binwalk()
self.update_bincast()
self.update_binarch()
self.update_extract()
self.update_zlib()
def _do_update_from_svn(self, prefix, fname):
'''
Updates the specified file to the latest version of that file in SVN.
@prefix - The URL subdirectory where the file is located.
@fname - The name of the file to update.
Returns None.
'''
# Get the local http proxy, if any
# csoban.kesmarki
proxy_url = os.getenv('HTTP_PROXY')
if proxy_url:
proxy_support = urllib2.ProxyHandler({'http' : proxy_url})
opener = urllib2.build_opener(proxy_support)
urllib2.install_opener(opener)
url = self.BASE_URL + prefix + fname
try:
data = urllib2.urlopen(url).read()
open(self.config.paths['system'][fname], "wb").write(data)
except Exception, e:
raise Exception("Update._do_update_from_svn failed to update file '%s': %s" % (url, str(e)))
def update_binwalk(self):
'''
Updates the binwalk signature file.
Returns None.
'''
self._do_update_from_svn(self.MAGIC_PREFIX, self.config.BINWALK_MAGIC_FILE)
def update_bincast(self):
'''
Updates the bincast signature file.
Returns None.
'''
self._do_update_from_svn(self.MAGIC_PREFIX, self.config.BINCAST_MAGIC_FILE)
def update_binarch(self):
'''
Updates the binarch signature file.
Returns None.
'''
self._do_update_from_svn(self.MAGIC_PREFIX, self.config.BINARCH_MAGIC_FILE)
def update_zlib(self):
'''
Updates the zlib signature file.
Returns None.
'''
self._do_update_from_svn(self.MAGIC_PREFIX, self.config.ZLIB_MAGIC_FILE)
def update_extract(self):
'''
Updates the extract.conf file.
Returns None.
'''
self._do_update_from_svn(self.CONFIG_PREFIX, self.config.EXTRACT_FILE)
#!/bin/bash
# Easy installer script for Debian/RedHat/OSX systems.
function debian
{
# The appropriate unrar package goes under different names in Debian vs Ubuntu
sudo apt-get -y install unrar-nonfree
if [ "$?" != "0" ]
then
echo "WARNING: Failed to install 'unrar-nonfree' package, trying 'unrar' instead..."
sudo apt-get -y install unrar
fi
# Install binwalk/fmk pre-requisites and extraction tools
sudo apt-get -y install git build-essential mtd-utils zlib1g-dev liblzma-dev ncompress gzip bzip2 tar arj p7zip p7zip-full openjdk-6-jdk python-magic python-matplotlib
}
function redhat
{
sudo yum groupinstall -y "Development Tools"
sudo yum install -y git mtd-utils unrar zlib1g-dev liblzma-dev xz-devel compress gzip bzip2 tar arj p7zip p7zip-full openjdk-6-jdk python-magic python-matplotlib
}
function darwin
{
sudo port install git-core arj p7zip py-magic py-matplotlib
}
if [ "$1" == "" ] || [ "$1" == "--sumount" ]
then
PLATFORM=$(python -c 'import platform; print platform.system().lower()')
DISTRO=$(python -c 'import platform; print platform.linux_distribution()[0].lower()')
else
DISTRO="$1"
fi
if [ "$DISTRO" == "" ]
then
DISTRO="$PLATFORM"
fi
echo "Detected $DISTRO $PLATFORM"
case $DISTRO in
debian)
;&
ubuntu)
;&
linuxmint)
;&
knoppix)
;&
aptosid)
debian
;;
redhat)
;&
rhel)
;&
fedora)
;&
centos)
redhat
;;
darwin)
darwin
;;
*)
echo ""
echo "This system is not supported by easy install! You may need to install dependent packages manually."
echo ""
echo "If your system is a derivative of Debian, RedHat or OSX, you can try manually specifying your system type on the command line:"
echo ""
echo -e "\t$0 [debian | redhat | darwin] [--sumount]"
echo ""
exit 1
esac
if [ "$DISTRO" != "darwin" ]
then
# Get and build the firmware mod kit
sudo rm -rf /opt/firmware-mod-kit/
sudo mkdir -p /opt/firmware-mod-kit
sudo chmod a+rwx /opt/firmware-mod-kit
git clone https://code.google.com/p/firmware-mod-kit /opt/firmware-mod-kit/
cd /opt/firmware-mod-kit/src
./configure && sudo make
if [ "$1" == "--sumount" ] || [ "$2" == "--sumount" ]
then
# The following will allow you - and others - to mount/unmount file systems without root permissions.
# This may be problematic, especially on a multi-user system, so think about it first.
sudo chown root ./mountcp/mountsu
sudo chmod u+s ./mountcp/mountsu
sudo chmod o-w ./mountcp/mountsu
sudo chown root ./mountcp/umountsu
sudo chmod u+s ./mountcp/umountsu
sudo chmod o-w ./mountcp/umountsu
sudo chown root ./jffs2/sunjffs2
sudo chmod u+s ./jffs2/sunjffs2
sudo chmod o-w ./jffs2/sunjffs2
fi
cd -
fi
# Install binwalk
sudo python setup.py install
#---------------------------Bootloaders--------------------------------
# CFE bootloader
0 string CFE1CFE1 CFE boot loader
>40 string CFE1CFE1 invalid
# U-Boot boot loader
0 string U-Boot U-Boot boot loader reference{one-of-many}
0 string U-BOOT U-Boot boot loader reference{one-of-many}
0 string u-boot U-Boot boot loader reference{one-of-many}
#------------------Compression Formats-----------------------------
# AFX compressed files (Wolfram Kleff)
0 string -afx- AFX compressed file data{offset-adjust:-2}
# bzip2
0 string BZh91AY&SY bzip2 compressed data, block size = 900k
0 string BZh81AY&SY bzip2 compressed data, block size = 800k
0 string BZh71AY&SY bzip2 compressed data, block size = 700k
0 string BZh61AY&SY bzip2 compressed data, block size = 600k
0 string BZh51AY&SY bzip2 compressed data, block size = 500k
0 string BZh41AY&SY bzip2 compressed data, block size = 400k
0 string BZh31AY&SY bzip2 compressed data, block size = 300k
0 string BZh21AY&SY bzip2 compressed data, block size = 200k
0 string BZh11AY&SY bzip2 compressed data, block size = 100k
# lzop from <markus.oberhumer@jk.uni-linz.ac.at>
0 string \x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a lzop compressed data
>9 beshort <0x0940
>>9 byte&0xf0 =0x00 - version 0.
>>9 beshort&0x0fff x \b%03x,
>>13 byte 1 LZO1X-1,
>>13 byte 2 LZO1X-1(15),
>>13 byte 3 LZO1X-999,
## >>22 bedate >0 last modified: %s,
>>14 byte =0x00 os: MS-DOS
>>14 byte =0x01 os: Amiga
>>14 byte =0x02 os: VMS
>>14 byte =0x03 os: Unix
>>14 byte =0x05 os: Atari
>>14 byte =0x06 os: OS/2
>>14 byte =0x07 os: MacOS
>>14 byte =0x0A os: Tops/20
>>14 byte =0x0B os: WinNT
>>14 byte =0x0E os: Win32
>9 beshort >0x0939
>>9 byte&0xf0 =0x00 - version 0.
>>9 byte&0xf0 =0x10 - version 1.
>>9 byte&0xf0 =0x20 - version 2.
>>9 beshort&0x0fff x \b%03x,
>>15 byte 1 LZO1X-1,
>>15 byte 2 LZO1X-1(15),
>>15 byte 3 LZO1X-999,
## >>25 bedate >0 last modified: %s,
>>17 byte =0x00 os: MS-DOS
>>17 byte =0x01 os: Amiga
>>17 byte =0x02 os: VMS
>>17 byte =0x03 os: Unix
>>17 byte =0x05 os: Atari
>>17 byte =0x06 os: OS/2
>>17 byte =0x07 os: MacOS
>>17 byte =0x0A os: Tops/20
>>17 byte =0x0B os: WinNT
>>17 byte =0x0E os: Win32
# lzip
0 string LZIP lzip compressed data
>4 byte x \b, version: %d
# LZO
0 string \211LZO\000\015\012\032\012 LZO compressed data
# 7-zip archiver, from Thomas Klausner (wiz@danbala.tuwien.ac.at)
# http://www.7-zip.org or DOC/7zFormat.txt
#
0 string 7z\274\257\047\034 7-zip archive data,
>6 byte <0 invalid
>6 byte 0 invalid
>6 byte >20 invalid
>6 byte x version %d
>7 byte x \b.%d
# standard unix compress
# Implemented in the compress binwalk plugin.
#0 string \x1f\x9d\x90 compress'd data, 16 bits
# http://tukaani.org/xz/xz-file-format.txt
0 string \xFD\x37\x7a\x58\x5a\x00 xz compressed data
# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
# Edited by Chris Chittleborough <cchittleborough@yahoo.com.au>, March 2002
# * Original filename is only at offset 10 if "extra field" absent
# * Produce shorter output - notably, only report compression methods
# other than 8 ("deflate", the only method defined in RFC 1952).
#0 string \037\213\x08 gzip compressed data
0 string \x1f\x8b\x08 gzip compressed data
>3 byte &0x01 \b, ASCII
>3 byte&0xE0 !0x00 \b, invalid reserved flag bits
>8 byte 2 \b, maximum compression
>8 byte 4 \b, fastest compression
>8 byte 1 \b, invalid extra flags
>8 byte 3 \b, invalid extra flags
>8 byte >4 \b, invalid extra flags
>3 byte &0x02 \b, has header CRC
>3 byte&0x04 0x04
>>10 leshort x \b, has %d bytes of extra data
>3 byte&0xC =0x08 \b, has original file name
>>10 string x \b{file-name:%s}
>>10 string x \b: "%s"
>3 byte &0x10 \b, has comment
>>3 byte&0xC 0
>>>10 string x \b: "%s"
>9 byte =0x00 \b, from FAT filesystem (MS-DOS, OS/2, NT)
>9 byte =0x01 \b, from Amiga
>9 byte =0x02 \b, from VMS
>9 byte =0x03 \b, from Unix
>9 byte =0x04 \b, from VM/CMS
>9 byte =0x05 \b, from Atari
>9 byte =0x06 \b, from HPFS filesystem (OS/2, NT)
>9 byte =0x07 \b, from MacOS
>9 byte =0x08 \b, from Z-System
>9 byte =0x09 \b, from CP/M
>9 byte =0x0A \b, from TOPS/20
>9 byte =0x0B \b, from NTFS filesystem (NT)
>9 byte =0x0C \b, from QDOS
>9 byte =0x0D \b, from Acorn RISCOS
#>9 byte =0xFF \b, from ZyNOS
#>9 byte >0x0D \b, invalid
#>>9 byte x source: 0x%.2X
#>9 byte <0 \b, invalid
#>>9 byte x source: 0x%.2X
>3 byte &0x20 \b, encrypted (invalid)
# Dates before 1992 are invalid, unless of course you're DD-WRT in which
# case you don't know how to set a date in your gzip files. Brilliant.
>4 lelong =0 \b, NULL date:
>4 lelong <0 \b, invalid date:
>4 lelong >0
>>4 lelong <694224000 \b, invalid date:
>>4 lelong =694224000 \b, invalid date:
>>4 lelong >694224000 \b, last modified:
>4 ledate x %s
>4 lelong x \b{epoch:%d}
# Zlib signatures
# Too short to be useful on their own; see:
#
# o src/binwalk/magic/zlib
# o src/binwalk/plugins/zlib.py
#
#0 beshort 0x789C zlib compressed data
#0 beshort 0x78DA zlib compressed data
#0 beshort 0x7801 zlib compressed data
# Supplementary magic data for the file(1) command to support
# rzip(1). The format is described in magic(5).
#
# Copyright (C) 2003 by Andrew Tridgell. You may do whatever you want with
# this file.
#
0 string RZIP rzip compressed data
>4 byte x - version %d
>5 byte x \b.%d
>6 belong x (%d bytes)
# JAR
0 belong 0xcafed00d JAR compressed with pack200,
>5 byte x version %d.
>4 byte x \b%d
# New LZMA format signature
0 string \xFFLZMA\x00 LZMA compressed data (new),
>6 byte&0x10 0 single-block stream
>6 byte&0x10 0x10 multi-block stream
# See lzma file for LZMA signatures
# Type: OpenSSL certificates/key files
# From: Nicolas Collignon <tsointsoin@gmail.com>
0 string -----BEGIN\x20CERTIFICATE----- PEM certificate
0 string -----BEGIN\x20CERTIFICATE\x20REQ PEM certificate request
0 string -----BEGIN\x20RSA\x20PRIVATE PEM RSA private key
0 string -----BEGIN\x20DSA\x20PRIVATE PEM DSA private key
# Type: OpenSSH key files
# From: Nicolas Collignon <tsointsoin@gmail.com>
0 string SSH\x20PRIVATE\x20KEY OpenSSH RSA1 private key,
>28 string >\0 version "%s"
0 string ssh-dss\x20 OpenSSH DSA public key
0 string ssh-rsa\x20 OpenSSH RSA public key
# Type: Certificates/key files in DER format
# From: Gert Hulselmans <hulselmansgert@gmail.com>
0 string \x30\x82 Private key in DER format (PKCS#8),
>4 string !\x02\x01\x00 invalid,
>>2 beshort x header length: 4, sequence length: %d
0 string \x30\x82 Certificate in DER format (x509 v3),
>4 string !\x30\x82 invalid,
>>2 beshort x header length: 4, sequence length: %d
# GnuPG
# The format is very similar to pgp
0 string \001gpg GPG key trust database
>4 byte x version %d
# Not a very useful signature
#0 beshort 0x9901 GPG key public ring
# This magic is not particularly good, as the keyrings don't have true
# magic. Nevertheless, it covers many keyrings.
#------------------------------------------------------------------------------
# Mavroyanopoulos Nikos <nmav@hellug.gr>
# mcrypt: file(1) magic for mcrypt 2.2.x;
0 string \0m\3 mcrypt 2.5 encrypted data,
>4 byte 0 invalid
>4 string >\0 algorithm: "%s",
>>&1 leshort <1 invalid
>>&1 leshort >0 keysize: %d bytes,
>>>&0 byte 0 invalid
>>>&0 string >\0 mode: "%s",
0 string \0m\2 mcrypt 2.2 encrypted data,
>3 byte 0 algorithm: blowfish-448,
>3 byte 1 algorithm: DES,
>3 byte 2 algorithm: 3DES,
>3 byte 3 algorithm: 3-WAY,
>3 byte 4 algorithm: GOST,
>3 byte 6 algorithm: SAFER-SK64,
>3 byte 7 algorithm: SAFER-SK128,
>3 byte 8 algorithm: CAST-128,
>3 byte 9 algorithm: xTEA,
>3 byte 10 algorithm: TWOFISH-128,
>3 byte 11 algorithm: RC2,
>3 byte 12 algorithm: TWOFISH-192,
>3 byte 13 algorithm: TWOFISH-256,
>3 byte 14 algorithm: blowfish-128,
>3 byte 15 algorithm: blowfish-192,
>3 byte 16 algorithm: blowfish-256,
>3 byte 100 algorithm: RC6,
>3 byte 101 algorithm: IDEA,
>3 byte <0 invalid algorithm
>3 byte >101 invalid algorithm,
>3 byte >16
>>3 byte <100 invalid algorithm,
>4 byte 0 mode: CBC,
>4 byte 1 mode: ECB,
>4 byte 2 mode: CFB,
>4 byte 3 mode: OFB,
>4 byte 4 mode: nOFB,
>4 byte <0 invalid mode,
>4 byte >4 invalid mode,
>5 byte 0 keymode: 8bit
>5 byte 1 keymode: 4bit
>5 byte 2 keymode: SHA-1 hash
>5 byte 3 keymode: MD5 hash
>5 byte <0 invalid keymode
>5 byte >3 invalid keymode
#------------------------------------------------------------------------------
# pgp: file(1) magic for Pretty Good Privacy
#
#0 beshort 0x9900 PGP key public ring
#0 beshort 0x9501 PGP key security ring
#0 beshort 0x9500 PGP key security ring
#0 beshort 0xa600 PGP encrypted data
0 string -----BEGIN\040PGP PGP armored data,
>15 string PUBLIC\040KEY\040BLOCK- public key block
>15 string MESSAGE- message
>15 string SIGNED\040MESSAGE- signed message
>15 string PGP\040SIGNATURE- signature
0 string Salted__ OpenSSL encryption, salted,
>8 belong x salt: 0x%X
>12 belong x \b%X
#-------------------------Kernels-------------------------------------
# Linux kernel boot images, from Albert Cahalan <acahalan@cs.uml.edu>
# and others such as Axel Kohlmeyer <akohlmey@rincewind.chemie.uni-ulm.de>
# and Nicolás Lichtmaier <nick@debian.org>
# All known start with: b8 c0 07 8e d8 b8 00 90 8e c0 b9 00 01 29 f6 29
0 string \xb8\xc0\x07\x8e\xd8\xb8\x00\x90\x8e\xc0\xb9\x00\x01\x29\xf6\x29 Linux kernel boot image
>514 string !HdrS (invalid)
# Finds and prints Linux kernel strings in raw Linux kernels (output like uname -a).
# Commonly found in decompressed embedded kernel binaries.
0 string Linux\ version\ Linux kernel version
>14 byte 0 invalid
>14 byte !0
>>14 string x "%s
>>45 string x \b%s"
This diff is collapsed. Click to expand it.
#------------------------------------------------------------------------------
# $File: pdf,v 1.6 2009/09/19 16:28:11 christos Exp $
# pdf: file(1) magic for Portable Document Format
#
0 string %PDF- PDF document,
>6 byte !0x2e invalid
>5 string x version: "%3s"
#------------------------------------------------------------------------------
# $File: zyxel,v 1.6 2009/09/19 16:28:13 christos Exp $
# zyxel: file(1) magic for ZyXEL modems
#
# From <rob@pe1chl.ampr.org>
# These are the /etc/magic entries to decode datafiles as used for the
# ZyXEL U-1496E DATA/FAX/VOICE modems. (This header conforms to a
# ZyXEL-defined standard)
0 string ZyXEL\002 ZyXEL voice data
>10 byte 0 \b, CELP encoding
>10 byte&0x0B 1 \b, ADPCM2 encoding
>10 byte&0x0B 2 \b, ADPCM3 encoding
>10 byte&0x0B 3 \b, ADPCM4 encoding
>10 byte&0x0B 8 \b, New ADPCM3 encoding
>10 byte&0x04 4 \b,with resync
0 string LinuxGuestRecord Xen saved domain file
0 string \x3chtml HTML document header{extract-delay:HTML document footer}
>5 byte !0x20
>>5 byte !0x3e \b, invalid
0 string \x3cHTML HTML document header{extract-delay:HTML document footer}
>5 byte !0x20
>>5 byte !0x3e \b, invalid
0 string \x3c/html\x3e HTML document footer{offset-adjust:7}
0 string \x3c/HTML\x3e HTML document footer{offset-adjust:7}
0 string \x3c?xml\x20version XML document,
>15 string x version: "%.3s"
#------------------------------------------------------------------------------
# $File: sql,v 1.6 2009/09/19 16:28:12 christos Exp $
# sql: file(1) magic for SQL files
#
# From: "Marty Leisner" <mleisner@eng.mc.xerox.com>
# Recognize some MySQL files.
#
0 beshort 0xfe01 MySQL table definition file
>2 string <1 invalid
>2 string >\11 invalid
>2 byte x Version %d
0 string \xfe\xfe\x03 MySQL MISAM index file
>3 string <1 invalid
>3 string >\11 invalid
>3 byte x Version %d
0 string \xfe\xfe\x07 MySQL MISAM compressed data file
>3 string <1 invalid
>3 string >\11 invalid
>3 byte x Version %d
0 string \xfe\xfe\x05 MySQL ISAM index file
>3 string <1 invalid
>3 string >\11 invalid
>3 byte x Version %d
0 string \xfe\xfe\x06 MySQL ISAM compressed data file
>3 string <1 invalid
>3 string >\11 invalid
>3 byte x Version %d
0 string \376bin MySQL replication log
#------------------------------------------------------------------------------
# iRiver H Series database file
# From Ken Guest <ken@linux.ie>
# As observed from iRivNavi.iDB and unencoded firmware
#
0 string iRivDB iRiver Database file
>11 string >\0 Version "%s"
>39 string iHP-100 [H Series]
#------------------------------------------------------------------------------
# SQLite database files
# Ken Guest <ken@linux.ie>, Ty Sarna, Zack Weinberg
#
# Version 1 used GDBM internally; its files cannot be distinguished
# from other GDBM files.
#
# Version 2 used this format:
0 string **\x20This\x20file\x20contains\x20an\x20SQLite SQLite 2.x database
# Version 3 of SQLite allows applications to embed their own "user version"
# number in the database. Detect this and distinguish those files.
0 string SQLite\x20format\x203
>60 string _MTN Monotone source repository
>60 belong !0 SQLite 3.x database, user version %u
>60 belong 0 SQLite 3.x database
#!/usr/bin/env python
import os
import sys
from os import listdir, path
from distutils.core import setup
WIDTH = 115
# Check for pre-requisite modules only if --no-prereq-checks was not specified
if "--no-prereq-checks" not in sys.argv:
print "checking pre-requisites"
try:
import magic
try:
magic.MAGIC_NO_CHECK_TEXT
except Exception, e:
print "\n", "*" * WIDTH
print "Pre-requisite failure:", str(e)
print "It looks like you have an old or incompatible magic module installed."
print "Please install the official python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
print "*" * WIDTH, "\n"
sys.exit(1)
except Exception, e:
print "\n", "*" * WIDTH
print "Pre-requisite failure:", str(e)
print "Please install the python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
print "*" * WIDTH, "\n"
sys.exit(1)
try:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import numpy
except Exception, e:
print "\n", "*" * WIDTH
print "Pre-requisite check warning:", str(e)
print "To take advantage of this tool's entropy plotting capabilities, please install the python-matplotlib module."
print "*" * WIDTH, "\n"
if raw_input('Continue installation without this module (Y/n)? ').lower().startswith('n'):
print 'Quitting...\n'
sys.exit(1)
else:
# This is super hacky.
sys.argv.pop(sys.argv.index("--no-prereq-checks"))
# Build / install C compression libraries
c_lib_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "C")
c_lib_makefile = os.path.join(c_lib_dir, "Makefile")
working_directory = os.getcwd()
os.chdir(c_lib_dir)
status = 0
if not os.path.exists(c_lib_makefile):
status |= os.system("./configure")
status |= os.system("make")
if status != 0:
print "ERROR: Failed to build libtinfl.so! Do you have gcc installed?"
sys.exit(1)
if "install" in sys.argv:
os.system("make install")
os.chdir(working_directory)
# Generate a new magic file from the files in the magic directory
print "generating binwalk magic file"
magic_files = listdir("magic")
magic_files.sort()
fd = open("binwalk/magic/binwalk", "wb")
for magic in magic_files:
fpath = path.join("magic", magic)
if path.isfile(fpath):
fd.write(open(fpath).read())
fd.close()
# The data files to install along with the binwalk module
install_data_files = ["magic/*", "config/*", "plugins/*"]
# Install the binwalk module, script and support files
setup( name = "binwalk",
version = "1.2.3",
description = "Firmware analysis tool",
author = "Craig Heffner",
url = "http://binwalk.googlecode.com",
requires = ["magic", "matplotlib.pyplot"],
packages = ["binwalk"],
package_data = {"binwalk" : install_data_files},
scripts = ["bin/binwalk"],
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment