Fixed display and filtering bugs related to the --continue option

8db7bd2b · devttys0 · 96c660a8 · 8db7bd2b · 8db7bd2b · 8db7bd2b
Commit 8db7bd2b authored Sep 08, 2014 by devttys0
6 changed files
--- a/src/binwalk/core/display.py
+++ b/src/binwalk/core/display.py
@@ -37,7 +37,7 @@ class Display(object):
    def format_strings(self, header, result):
        self.result_format = result
        self.header_format = header
-        
+
        if self.num_columns == 0:
            self.num_columns = len(header.split())

@@ -98,8 +98,12 @@ class Display(object):

    def _fprint(self, fmt, columns, csv=True, stdout=True, filter=True):
        line = fmt % tuple(columns)
-        
-        if not filter or self.filter.valid_result(line):
+
+        # TODO: Additional filtering was originally done here to support the --grep option,
+        #       which is now depreciated. Seems redundant now, as the result won't get passed
+        #       to the display class unless it has already passed the filter.valid_result check.
+        #if not filter or self.filter.valid_result(line):
+        if True:
            if not self.quiet and stdout:
                sys.stdout.write(self._format_line(line.strip()) + "\n")
                sys.stdout.flush()
@@ -129,7 +133,7 @@ class Display(object):
                raise e
            except Exception:
                pass
-        
+
        return start

    def _format_line(self, line):
@@ -140,18 +144,23 @@ class Display(object):
        delim = '\n'
        offset = 0
        self.string_parts = []
+        libmagic_newline_delim = "\\012- "

-        if self.fit_to_screen and len(line) > self.SCREEN_WIDTH:
-            # Split the line into an array of columns, e.g., ['0', '0x00000000', 'Some description here']
-            line_columns = line.split(None, self.num_columns-1)
+        # Split the line into an array of columns, e.g., ['0', '0x00000000', 'Some description here']
+        line_columns = line.split(None, self.num_columns-1)
+        if line_columns:
            # Find where the start of the last column (description) starts in the line of text.
            # All line wraps need to be aligned to this offset.
            offset = line.rfind(line_columns[-1])
            # The delimiter will be a newline followed by spaces padding out the line wrap to the alignment offset.
            delim += ' ' * offset
+
+            if libmagic_newline_delim in line:
+                line = line.replace(libmagic_newline_delim, delim)
+
+        if line_columns and self.fit_to_screen and len(line) > self.SCREEN_WIDTH:
            # Calculate the maximum length that each wrapped line can be
            max_line_wrap_length = self.SCREEN_WIDTH - offset
-
            # Append all but the last column to formatted_line
            formatted_line = line[:offset]

@@ -172,7 +181,6 @@ class Display(object):
            # Append self.string_parts to formatted_line; each part seperated by delim
            formatted_line += delim.join(self.string_parts)
        else:
-            # Line fits on screen as-is, no need to format it
            formatted_line = line

        return formatted_line

--- a/src/binwalk/core/filter.py
+++ b/src/binwalk/core/filter.py
@@ -22,13 +22,13 @@ class FilterType(object):
            self.regex = re.compile(self.filter)

 class FilterInclude(FilterType):
-    
+
    def __init__(self, **kwargs):
        super(FilterInclude, self).__init__(**kwargs)
        self.type = self.FILTER_INCLUDE

 class FilterExclude(FilterType):
-    
+
    def __init__(self, **kwargs):
        super(FilterExclude, self).__init__(**kwargs)
        self.type = self.FILTER_EXCLUDE
@@ -42,7 +42,6 @@ class Filter(object):
    # If the result returned by libmagic is "data" or contains the text
    # 'invalid' or a backslash are known to be invalid/false positives.
    UNKNOWN_RESULTS = ["data", "very short file (no magic)"]
-    INVALID_RESULTS = ["invalid", "\\"]
    INVALID_RESULT = "invalid"
    NON_PRINTABLE_RESULT = "\\"

@@ -71,7 +70,7 @@ class Filter(object):
                 signatures that contain the FILTER_INCLUDE match will
                 be included in the scan, but will not cause non-matching
                 results to be excluded.
-        
+
        Returns None.
        '''
        if not isinstance(match, type([])):
@@ -92,7 +91,7 @@ class Filter(object):
        the specified matching text.

        @match - Regex, or list of regexs, to match.
-        
+
        Returns None.
        '''
        if not isinstance(match, type([])):
@@ -116,8 +115,8 @@ class Filter(object):
        '''
        data = data.lower()

-        # Loop through the filters to see if any of them are a match. 
-        # If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE). 
+        # Loop through the filters to see if any of them are a match.
+        # If so, return the registered type for the matching filter (FILTER_INCLUDE || FILTER_EXCLUDE).
        for f in self.filters:
            if f.regex.search(data):
                return f.type
@@ -148,13 +147,18 @@ class Filter(object):
        if self.show_invalid_results:
            return True

-        # Don't include quoted strings or keyword arguments in this search, as 
+        # Sanitized data contains only the non-quoted portion of the data
+        sanitized_data = common.strip_quoted_strings(self.smart.strip_tags(data))
+
+        # Don't include quoted strings or keyword arguments in this search, as
        # strings from the target file may legitimately contain the INVALID_RESULT text.
-        if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)):
+        if self.INVALID_RESULT in sanitized_data:
            return False

-        # There should be no non-printable characters in any of the data
-        if self.NON_PRINTABLE_RESULT in data:
+        # There should be no non-printable characters in any of the quoted string data
+        non_printables_raw = set(re.findall("\\\\\d{3}", data))
+        non_printables_sanitized = set(re.findall("\\\\d{3}", sanitized_data))
+        if len(non_printables_raw) and non_printables_raw != non_printables_sanitized:
            return False

        return True
@@ -197,13 +201,13 @@ class Filter(object):

            # Else, return False
            return False
-    
+
        return None

    def clear(self):
        '''
        Clears all include, exclude and grep filters.
-        
+
        Retruns None.
        '''
        self.filters = []

--- a/src/binwalk/core/magic.py
+++ b/src/binwalk/core/magic.py
@@ -29,17 +29,20 @@ class Magic(object):
    MAGIC_NO_CHECK_APPTYPE  = 0x008000
    MAGIC_NO_CHECK_TOKENS   = 0x100000
    MAGIC_NO_CHECK_ENCODING = 0x200000
-    
+
    MAGIC_FLAGS = MAGIC_NO_CHECK_TEXT | MAGIC_NO_CHECK_ENCODING | MAGIC_NO_CHECK_APPTYPE | MAGIC_NO_CHECK_TOKENS

    LIBRARY = "magic"

-    def __init__(self, magic_file=None, flags=0):
+    def __init__(self, magic_file=None, flags=0, keep_going=False):
        if magic_file:
            self.magic_file = str2bytes(magic_file)
        else:
            self.magic_file = None

+        if keep_going:
+            flags = flags | self.MAGIC_CONTINUE
+
        self.libmagic = binwalk.core.C.Library(self.LIBRARY, self.LIBMAGIC_FUNCTIONS)

        binwalk.core.common.debug("libmagic.magic_open(0x%X)" % (self.MAGIC_FLAGS | flags))

--- a/src/binwalk/modules/__init__.py
+++ b/src/binwalk/modules/__init__.py
 from binwalk.modules.signature import Signature
-from binwalk.modules.binvis import Plotter
+#from binwalk.modules.binvis import Plotter
 from binwalk.modules.hexdiff import HexDiff
-from binwalk.modules.hashmatch import HashMatch
+#from binwalk.modules.hashmatch import HashMatch
 from binwalk.modules.general import General
 from binwalk.modules.extractor import Extractor
 from binwalk.modules.entropy import Entropy
 from binwalk.modules.heuristics import HeuristicCompressionAnalyzer
 from binwalk.modules.compression import RawCompression
-from binwalk.modules.codeid import CodeID
+#from binwalk.modules.codeid import CodeID
--- a/src/binwalk/modules/general.py
+++ b/src/binwalk/modules/general.py
@@ -17,7 +17,7 @@ class General(Module):
    ORDER = 0

    DEFAULT_DEPENDS = []
-        
+
    CLI = [
        Option(long='length',
               short='l',
@@ -34,6 +34,10 @@ class General(Module):
               type=int,
               kwargs={'block' : 0},
               description='Set file block size'),
+        Option(long='continue',
+               short='k',
+               kwargs={'keep_going' : True},
+               description='Show all matches for every offset, not just the first'),
        Option(long='swap',
               short='g',
               type=int,
@@ -101,6 +105,7 @@ class General(Module):
        Kwarg(name='verbose', default=False),
        Kwarg(name='files', default=[]),
        Kwarg(name='show_help', default=False),
+        Kwarg(name='keep_going', default=False),
    ]

    PRIMARY = False
@@ -108,7 +113,7 @@ class General(Module):
    def load(self):
        self.target_files = []

-        # Order is important with these two methods        
+        # Order is important with these two methods
        self._open_target_files()
        self._set_verbosity()

@@ -128,7 +133,7 @@ class General(Module):
                                                    verbose=self.verbose,
                                                    filter=self.filter,
                                                    fit_to_screen=self.format_to_terminal)
-        
+
        if self.show_help:
            show_help()
            if not binwalk.core.idb.LOADED_IN_IDA:
@@ -155,7 +160,7 @@ class General(Module):
        Must be called after self._test_target_files so that self.target_files is properly set.
        '''
        # If more than one target file was specified, enable verbose mode; else, there is
-        # nothing in some outputs to indicate which scan corresponds to which file. 
+        # nothing in some outputs to indicate which scan corresponds to which file.
        if len(self.target_files) > 1 and not self.verbose:
            self.verbose = True

@@ -188,4 +193,4 @@ class General(Module):
                    raise e
                except Exception as e:
                    self.error(description="Cannot open file : %s" % str(e))
-        
+
--- a/src/binwalk/modules/signature.py
+++ b/src/binwalk/modules/signature.py
@@ -53,6 +53,8 @@ class Signature(Module):
    VERBOSE_FORMAT = "%s    %d"

    def init(self):
+        self.keep_going = self.config.keep_going
+
        # Create Signature and MagicParser class instances. These are mostly for internal use.
        self.smart = binwalk.core.smart.Signature(self.config.filter, ignore_smart_signatures=self.dumb_scan)
        self.parser = binwalk.core.parser.MagicParser(self.config.filter, self.smart)
@@ -69,6 +71,7 @@ class Signature(Module):
            ]

        elif self.cast_data_types:
+            self.keep_going = True
            self.magic_files = [
                    self.config.settings.user.bincast,
                    self.config.settings.system.bincast,
@@ -82,10 +85,11 @@ class Signature(Module):
        # Parse the magic file(s) and initialize libmagic
        binwalk.core.common.debug("Loading magic files: %s" % str(self.magic_files))
        self.mfile = self.parser.parse(self.magic_files)
-        self.magic = binwalk.core.magic.Magic(self.mfile)
+        self.magic = binwalk.core.magic.Magic(self.mfile, keep_going=self.keep_going)

        # Once the temporary magic files are loaded into libmagic, we don't need them anymore; delete the temp files
-        self.parser.rm_magic_files()
+        if not binwalk.core.common.DEBUG:
+            self.parser.rm_magic_files()

        self.VERBOSE = ["Signatures:", self.parser.signature_count]