Added ability to pass raw strings to be scanned via API

458df42e · devttys0 · 11ad600c · 458df42e · 458df42e · 458df42e
Commit 458df42e authored Nov 07, 2014 by devttys0
Hide whitespace changes
Inline Side-by-side

Showing with 258 additions and 240 deletions

common.py src/binwalk/core/common.py +243 -235

module.py src/binwalk/core/module.py +3 -3

general.py src/binwalk/modules/general.py +12 -2

No files found.
--- a/src/binwalk/core/common.py
+++ b/src/binwalk/core/common.py
@@ -7,19 +7,8 @@ import sys
 import ast
 import hashlib
 import operator as op
-import binwalk.core.idb
 from binwalk.core.compat import *
-# This allows other modules/scripts to subclass BlockFile from a custom class. Defaults to io.FileIO.
-if has_key(__builtins__, 'BLOCK_FILE_PARENT_CLASS'):
-    BLOCK_FILE_PARENT_CLASS = __builtins__['BLOCK_FILE_PARENT_CLASS']
-else:
-    BLOCK_FILE_PARENT_CLASS = io.FileIO
-# Special override for when we're running in IDA
-if binwalk.core.idb.LOADED_IN_IDA:
-    BLOCK_FILE_PARENT_CLASS = binwalk.core.idb.IDBFileIO
 # The __debug__ value is a bit backwards; by default it is set to True, but
 # then set to False if the Python interpreter is run with the -O option.
 if not __debug__:
@@ -222,246 +211,265 @@ class MathExpression(object):
        else:
            raise TypeError(node)
+class StringFile(object):
-class BlockFile(BLOCK_FILE_PARENT_CLASS):
    '''
-    Abstraction class for accessing binary files.
+    A class to allow access to strings as if they were read from a file.
+    Used internally as a conditional superclass to InternalBlockFile.
-    This class overrides io.FilIO's read and write methods. This guaruntees two things:
-        1. All requested data will be read/written via the read and write methods.
-        2. All reads return a str object and all writes can accept either a str or a
-           bytes object, regardless of the Python interpreter version.
-    However, the downside is that other io.FileIO methods won't work properly in Python 3,
-    namely things that are wrappers around self.read (e.g., readline, readlines, etc).
-    This class also provides a read_block method, which is used by binwalk to read in a
-    block of data, plus some additional data (DEFAULT_BLOCK_PEEK_SIZE), but on the next block read
-    pick up at the end of the previous data block (not the end of the additional data). This
-    is necessary for scans where a signature may span a block boundary.
-    The descision to force read to return a str object instead of a bytes object is questionable
-    for Python 3, but it seemed the best way to abstract differences in Python 2/3 from the rest
-    of the code (especially for people writing plugins) and to add Python 3 support with
-    minimal code change.
    '''
+    def __init__(self, fname, mode='r'):
+        self.string = fname
+        self.name = "String"
+        self.args.size = len(self.string)
-    # The DEFAULT_BLOCK_PEEK_SIZE limits the amount of data available to a signature.
+    def read(self, n=-1):
-    # While most headers/signatures are far less than this value, some may reference
+        if n == -1:
-    # pointers in the header structure which may point well beyond the header itself.
+            data = self.string[self.total_read:]
-    # Passing the entire remaining buffer to libmagic is resource intensive and will
-    # significantly slow the scan; this value represents a reasonable buffer size to
-    # pass to libmagic which will not drastically affect scan time.
-    DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024
-    # Max number of bytes to process at one time. This needs to be large enough to
-    # limit disk I/O, but small enough to limit the size of processed data blocks.
-    DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
-    def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
-        '''
-        Class constructor.
-        @fname  - Path to the file to be opened.
-        @mode   - Mode to open the file in (default: 'r').
-        @length - Maximum number of bytes to read from the file via self.block_read().
-        @offset - Offset at which to start reading from the file.
-        @block  - Size of data block to read (excluding any trailing size),
-        @peek   - Size of trailing data to append to the end of each block.
-        @swap   - Swap every n bytes of data.
-        Returns None.
-        '''
-        self.total_read = 0
-        self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
-        self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
-        # This is so that custom parent classes can access/modify arguments as necessary
-        self.args = GenericContainer(fname=fname,
-                                     mode=mode,
-                                     length=length,
-                                     offset=offset,
-                                     block=block,
-                                     peek=peek,
-                                     swap=swap,
-                                     size=0)
-        # Python 2.6 doesn't like modes like 'rb' or 'wb'
-        mode = self.args.mode.replace('b', '')
-        super(self.__class__, self).__init__(fname, mode)
-        self.swap_size = self.args.swap
-        if self.args.size:
-            self.size = self.args.size
-        else:
-            try:
-                self.size = file_size(self.args.fname)
-            except KeyboardInterrupt as e:
-                raise e
-            except Exception:
-                self.size = 0
-        if self.args.offset < 0:
-            self.offset = self.size + self.args.offset
-        else:
-            self.offset = self.args.offset
-        if self.offset < 0:
-            self.offset = 0
-        elif self.offset > self.size:
-            self.offset = self.size
-        if self.args.offset < 0:
-            self.length = self.args.offset * -1
-        elif self.args.length:
-            self.length = self.args.length
-        else:
-            self.length = self.size - self.args.offset
-        if self.length < 0:
-            self.length = 0
-        elif self.length > self.size:
-            self.length = self.size
-        if self.args.block is not None:
-            self.block_read_size = self.args.block
-        self.base_block_size = self.block_read_size
-        if self.args.peek is not None:
-            self.block_peek_size = self.args.peek
-        self.base_peek_size = self.block_peek_size
-        # Work around for python 2.6 where FileIO._name is not defined
-        try:
-            self.name
-        except AttributeError:
-            self._name = fname
-        self.seek(self.offset)
-    def _swap_data_block(self, block):
-        '''
-        Reverses every self.swap_size bytes inside the specified data block.
-        Size of data block must be a multiple of self.swap_size.
-        @block - The data block to swap.
-        Returns a swapped string.
-        '''
-        i = 0
-        data = ""
-        if self.swap_size > 0:
-            while i < len(block):
-                data += block[i:i+self.swap_size][::-1]
-                i += self.swap_size
        else:
-            data = block
+            data = self.string[self.total_read:self.total_read+n]
        return data
-    def reset(self):
+    def tell(self):
-        self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
+        return self.total_read
-        self.seek(self.offset)
-    def set_block_size(self, block=None, peek=None):
-        if block is not None:
-            self.block_read_size = block
-        if peek is not None:
-            self.block_peek_size = peek
-    def write(self, data):
+    def write(self, *args, **kwargs):
-        '''
+        pass
-        Writes data to the opened file.
-        io.FileIO.write does not guaruntee that all data will be written;
-        this method overrides io.FileIO.write and does guaruntee that all data will be written.
-        Returns the number of bytes written.
+    def seek(self, *args, **kwargs):
-        '''
+        pass
-        n = 0
-        l = len(data)
-        data = str2bytes(data)
-        while n < l:
+    def close(self):
-            n += super(self.__class__, self).write(data[n:])
+        pass
-        return n
+def BlockFile(fname, mode='r', subclass=io.FileIO, **kwargs):
-    def read(self, n=-1):
+    # Defining a class inside a function allows it to be dynamically subclassed
-        ''''
+    class InternalBlockFile(subclass):
-        Reads up to n bytes of data (or to EOF if n is not specified).
-        Will not read more than self.length bytes.
-        io.FileIO.read does not guaruntee that all requested data will be read;
-        this method overrides io.FileIO.read and does guaruntee that all data will be read.
-        Returns a str object containing the read data.
        '''
-        l = 0
+        Abstraction class for accessing binary files.
-        data = b''
-        if self.total_read < self.length:
-            # Don't read more than self.length bytes from the file
-            if (self.total_read + n) > self.length:
-                n = self.length - self.total_read
-            while n < 0 or l < n:
-                tmp = super(self.__class__, self).read(n-l)
-                if tmp:
-                    data += tmp
-                    l += len(tmp)
-                else:
-                    break
-            self.total_read += len(data)
-        return self._swap_data_block(bytes2str(data))
+        This class overrides io.FilIO's read and write methods. This guaruntees two things:
-    def peek(self, n=-1):
+            1. All requested data will be read/written via the read and write methods.
-        '''
+            2. All reads return a str object and all writes can accept either a str or a
-        Peeks at data in file.
+               bytes object, regardless of the Python interpreter version.
-        '''
-        pos = self.tell()
-        data = self.read(n)
-        self.seek(pos)
-        return data
-    def seek(self, n, whence=os.SEEK_SET):
+        However, the downside is that other io.FileIO methods won't work properly in Python 3,
-        if whence == os.SEEK_SET:
+        namely things that are wrappers around self.read (e.g., readline, readlines, etc).
-            self.total_read = n - self.offset
-        elif whence == os.SEEK_CUR:
-            self.total_read += n
-        elif whence == os.SEEK_END:
-            self.total_read = self.size + n
-        super(self.__class__, self).seek(n, whence)
+        This class also provides a read_block method, which is used by binwalk to read in a
+        block of data, plus some additional data (DEFAULT_BLOCK_PEEK_SIZE), but on the next block read
+        pick up at the end of the previous data block (not the end of the additional data). This
+        is necessary for scans where a signature may span a block boundary.
-    def read_block(self):
+        The descision to force read to return a str object instead of a bytes object is questionable
+        for Python 3, but it seemed the best way to abstract differences in Python 2/3 from the rest
+        of the code (especially for people writing plugins) and to add Python 3 support with
+        minimal code change.
        '''
-        Reads in a block of data from the target file.
-        Returns a tuple of (str(file block data), block data length).
-        '''
-        data = self.read(self.block_read_size)
-        dlen = len(data)
-        data += self.peek(self.block_peek_size)
-        return (data, dlen)
-    def dup(self):
-        '''
-        Creates a new BlockFile instance with all the same initialization settings as this one.
-        Returns new BlockFile object.
-        '''
-        return BlockFile(self.name,
-                         length=self.length,
-                         offset=self.offset,
-                         block=self.base_block_read_size,
-                         peek=self.base_peek_size,
-                         swap=self.swap)
+        # The DEFAULT_BLOCK_PEEK_SIZE limits the amount of data available to a signature.
+        # While most headers/signatures are far less than this value, some may reference
+        # pointers in the header structure which may point well beyond the header itself.
+        # Passing the entire remaining buffer to libmagic is resource intensive and will
+        # significantly slow the scan; this value represents a reasonable buffer size to
+        # pass to libmagic which will not drastically affect scan time.
+        DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024
+        # Max number of bytes to process at one time. This needs to be large enough to
+        # limit disk I/O, but small enough to limit the size of processed data blocks.
+        DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
+        def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
+            '''
+            Class constructor.
+            @fname  - Path to the file to be opened.
+            @mode   - Mode to open the file in (default: 'r').
+            @length - Maximum number of bytes to read from the file via self.block_read().
+            @offset - Offset at which to start reading from the file.
+            @block  - Size of data block to read (excluding any trailing size),
+            @peek   - Size of trailing data to append to the end of each block.
+            @swap   - Swap every n bytes of data.
+            Returns None.
+            '''
+            self.total_read = 0
+            self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
+            self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
+            # This is so that custom parent classes can access/modify arguments as necessary
+            self.args = GenericContainer(fname=fname,
+                                         mode=mode,
+                                         length=length,
+                                         offset=offset,
+                                         block=block,
+                                         peek=peek,
+                                         swap=swap,
+                                         size=0)
+            # Python 2.6 doesn't like modes like 'rb' or 'wb'
+            mode = self.args.mode.replace('b', '')
+            super(self.__class__, self).__init__(fname, mode)
+            self.swap_size = self.args.swap
+            if self.args.size:
+                self.size = self.args.size
+            else:
+                try:
+                    self.size = file_size(self.args.fname)
+                except KeyboardInterrupt as e:
+                    raise e
+                except Exception:
+                    self.size = 0
+            if self.args.offset < 0:
+                self.offset = self.size + self.args.offset
+            else:
+                self.offset = self.args.offset
+            if self.offset < 0:
+                self.offset = 0
+            elif self.offset > self.size:
+                self.offset = self.size
+            if self.args.offset < 0:
+                self.length = self.args.offset * -1
+            elif self.args.length:
+                self.length = self.args.length
+            else:
+                self.length = self.size - self.args.offset
+            if self.length < 0:
+                self.length = 0
+            elif self.length > self.size:
+                self.length = self.size
+            if self.args.block is not None:
+                self.block_read_size = self.args.block
+            self.base_block_size = self.block_read_size
+            if self.args.peek is not None:
+                self.block_peek_size = self.args.peek
+            self.base_peek_size = self.block_peek_size
+            # Work around for python 2.6 where FileIO._name is not defined
+            try:
+                self.name
+            except AttributeError:
+                self._name = fname
+            self.seek(self.offset)
+        def _swap_data_block(self, block):
+            '''
+            Reverses every self.swap_size bytes inside the specified data block.
+            Size of data block must be a multiple of self.swap_size.
+            @block - The data block to swap.
+            Returns a swapped string.
+            '''
+            i = 0
+            data = ""
+            if self.swap_size > 0:
+                while i < len(block):
+                    data += block[i:i+self.swap_size][::-1]
+                    i += self.swap_size
+            else:
+                data = block
+            return data
+        def reset(self):
+            self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
+            self.seek(self.offset)
+        def set_block_size(self, block=None, peek=None):
+            if block is not None:
+                self.block_read_size = block
+            if peek is not None:
+                self.block_peek_size = peek
+        def write(self, data):
+            '''
+            Writes data to the opened file.
+            io.FileIO.write does not guaruntee that all data will be written;
+            this method overrides io.FileIO.write and does guaruntee that all data will be written.
+            Returns the number of bytes written.
+            '''
+            n = 0
+            l = len(data)
+            data = str2bytes(data)
+            while n < l:
+                n += super(self.__class__, self).write(data[n:])
+            return n
+        def read(self, n=-1):
+            ''''
+            Reads up to n bytes of data (or to EOF if n is not specified).
+            Will not read more than self.length bytes.
+            io.FileIO.read does not guaruntee that all requested data will be read;
+            this method overrides io.FileIO.read and does guaruntee that all data will be read.
+            Returns a str object containing the read data.
+            '''
+            l = 0
+            data = b''
+            if self.total_read < self.length:
+                # Don't read more than self.length bytes from the file
+                if (self.total_read + n) > self.length:
+                    n = self.length - self.total_read
+                while n < 0 or l < n:
+                    tmp = super(self.__class__, self).read(n-l)
+                    if tmp:
+                        data += tmp
+                        l += len(tmp)
+                    else:
+                        break
+                self.total_read += len(data)
+            return self._swap_data_block(bytes2str(data))
+        def peek(self, n=-1):
+            '''
+            Peeks at data in file.
+            '''
+            pos = self.tell()
+            data = self.read(n)
+            self.seek(pos)
+            return data
+        def seek(self, n, whence=os.SEEK_SET):
+            if whence == os.SEEK_SET:
+                self.total_read = n - self.offset
+            elif whence == os.SEEK_CUR:
+                self.total_read += n
+            elif whence == os.SEEK_END:
+                self.total_read = self.size + n
+            super(self.__class__, self).seek(n, whence)
+        def read_block(self):
+            '''
+            Reads in a block of data from the target file.
+            Returns a tuple of (str(file block data), block data length).
+            '''
+            data = self.read(self.block_read_size)
+            dlen = len(data)
+            data += self.peek(self.block_peek_size)
+            return (data, dlen)
+    return InternalBlockFile(fname, mode=mode, **kwargs)
--- a/src/binwalk/core/module.py
+++ b/src/binwalk/core/module.py
@@ -354,10 +354,10 @@ class Module(object):
            # Values in self.target_file_list are either already open files (BlockFile instances), or paths
            # to files that need to be opened for scanning.
-            if isinstance(next_target_file, binwalk.core.common.BlockFile):
+            if isinstance(next_target_file, str):
-                fp = next_target_file
-            else:
                fp = self.config.open_file(next_target_file)
+            else:
+                fp = next_target_file
            self.status.clear()
            self.status.total = fp.length

--- a/src/binwalk/modules/general.py
+++ b/src/binwalk/modules/general.py
 # Module to process general user input options (scan length, starting offset, etc).
+import io
 import os
 import sys
 import argparse
@@ -67,6 +68,10 @@ class General(Module):
               short=None,
               type=binwalk.core.common.BlockFile,
               kwargs={'files' : []}),
+        # Hidden, API-only arguments
+        Option(long="string",
+               kwargs={'subclass' : binwalk.core.common.StringFile}),
    ]
    KWARGS = [
@@ -82,6 +87,7 @@ class General(Module):
        Kwarg(name='files', default=[]),
        Kwarg(name='show_help', default=False),
        Kwarg(name='keep_going', default=False),
+        Kwarg(name='subclass', default=io.FileIO),
    ]
    PRIMARY = False
@@ -89,6 +95,10 @@ class General(Module):
    def load(self):
        self.target_files = []
+        # A special case for when we're loaded into IDA
+        if self.subclass == io.FileIO and binwalk.core.idb.LOADED_IN_IDA:
+            self.subclass = binwalk.core.idb.IDBFileIO
        # Order is important with these two methods
        self._open_target_files()
        self._set_verbosity()
@@ -141,7 +151,7 @@ class General(Module):
        if swap is None:
            swap = self.swap_size
-        return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)
+        return binwalk.core.common.BlockFile(fname, subclass=self.subclass, length=length, offset=offset, swap=swap, block=block, peek=peek)
    def _open_target_files(self):
        '''
@@ -151,7 +161,7 @@ class General(Module):
        # Validate the target files listed in target_files
        for tfile in self.files:
            # Ignore directories.
-            if not os.path.isdir(tfile):
+            if not self.subclass == io.FileIO or not os.path.isdir(tfile):
                # Make sure we can open the target files
                try:
                    self.target_files.append(self.open_file(tfile))