Added extractor conf option to *not* recurse into directories (e.g., for…

Added extractor conf option to *not* recurse into directories (e.g., for extracted file systems) when --matryoshka is specified

Added extractor conf option to not recurse into directories (e.g., for…
Added extractor conf option to *not* recurse into directories (e.g., for extracted file systems) when --matryoshka is specified
0b023660 · devttys0 · 22e6286e · 0b023660 · 0b023660
Commit 0b023660 authored Mar 19, 2015 by devttys0
Hide whitespace changes
Inline Side-by-side

Showing with 40 additions and 21 deletions

extract.conf src/binwalk/config/extract.conf +10 -10

extractor.py src/binwalk/modules/extractor.py +30 -11

No files found.
--- a/src/binwalk/config/extract.conf
+++ b/src/binwalk/config/extract.conf
 #######################################################################################################################################
 # Default extraction rules, loaded when --extract is specified.
 #
-# <lower-case unique string from binwalk output text>:<desired file extension>:<command to execute>:<successful command return codes>
+# <lower-case unique string from binwalk output text>:<desired file extension>:<command to execute>:<successful command return codes>:<False if directories should not be recursively scanned>
 #
 # Note that %e is a place holder for the extracted file name.
 #
@@ -40,21 +40,21 @@
 ^stuffit:sit:unstuff '%e'
 # Try unsquashfs first, or if not installed, sasquatch
-^squashfs filesystem:squashfs:unsquashfs -d '%%squashfs-root%%' '%e'
+^squashfs filesystem:squashfs:unsquashfs -d '%%squashfs-root%%' '%e':0:False
-^squashfs filesystem:squashfs:sasquatch -d '%%squashfs-root%%' '%e'
+^squashfs filesystem:squashfs:sasquatch -d '%%squashfs-root%%' '%e':0:False
 # Try cramfsck first; if that fails, swap the file system and try again
-^cramfs filesystem:cramfs:cramfsck -x '%%cramfs-root%%' '%e'
+^cramfs filesystem:cramfs:cramfsck -x '%%cramfs-root%%' '%e':0:False
-^cramfs filesystem:cramfs:cramfsswap '%e' '%e.swap' && cramfsck -x '%%cramfs-root%%' '%e.swap'
+^cramfs filesystem:cramfs:cramfsswap '%e' '%e.swap' && cramfsck -x '%%cramfs-root%%' '%e.swap':0:False
 # Try mounting the file system (this requires root privileges)
-^squashfs filesystem:squashfs:mkdir squashfs-root && mount -t squashfs '%e' squashfs-root
+^squashfs filesystem:squashfs:mkdir squashfs-root && mount -t squashfs '%e' squashfs-root:0:False
-^cramfs filesystem:cramfs:mkdir cramfs-root && mount -t cramfs '%e' cramfs-root
+^cramfs filesystem:cramfs:mkdir cramfs-root && mount -t cramfs '%e' cramfs-root:0:False
-^ext2 filesystem:ext2:mkdir ext2-root && mount -t ext2 '%e' ext2-root
+^ext2 filesystem:ext2:mkdir ext2-root && mount -t ext2 '%e' ext2-root:0:False
-^romfs filesystem:romfs:mkdir romfs-root && mount -t romfs '%e' romfs-root
+^romfs filesystem:romfs:mkdir romfs-root && mount -t romfs '%e' romfs-root:0:False
 # Use sviehb's jefferson.py tool for JFFS2 extraction
-^jffs2 filesystem:jffs2:jefferson.py -d '%%jffs2-root%%' '%e'
+^jffs2 filesystem:jffs2:jefferson.py -d '%%jffs2-root%%' '%e':0:False
 # These were extractors used from FMK that still need suitable replacements.
 #^bff volume entry:bff:/opt/firmware-mod-kit/src/bff/bffxtractor.py '%e'

--- a/src/binwalk/modules/extractor.py
+++ b/src/binwalk/modules/extractor.py
@@ -127,7 +127,7 @@ class Extractor(Module):
            # Attempt extraction
            binwalk.core.common.debug("Extractor callback for %s @%d [%s]" % (r.file.name, r.offset, r.description))
-            (extraction_directory, dd_file) = self.extract(r.offset, r.description, r.file.path, size, r.name)
+            (extraction_directory, dd_file, recurse_into_directories) = self.extract(r.offset, r.description, r.file.path, size, r.name)
            # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
            if extraction_directory and dd_file:
@@ -150,11 +150,21 @@ class Extractor(Module):
                    self.result(description=file_path, display=False)
                    # If recursion was specified, and the file is not the same one we just dd'd, and if it is not a directory
-                    if self.matryoshka and file_path != dd_file_path and not os.path.isdir(file_path):
+                    if self.matryoshka and file_path != dd_file_path:
                        # If the recursion level of this file is less than or equal to our desired recursion level
                        if len(real_file_path.split(self.base_recursion_dir)[1].split(os.path.sep)) <= self.matryoshka:
-                            # Add the file to our list of pending files
+                            # If this is a directory and we are supposed to process directories for this extractor,
-                            self.pending.append(file_path)
+                            # then add all files under that directory to the list of pending files.
+                            if os.path.isdir(file_path):
+                                if recurse_into_directories:
+                                    for root, dirs, files in os.walk(file_path):
+                                        for f in files:
+                                            full_path = os.path.join(root, f)
+                                            print ("Adding '%s' to the list of pending shit." % full_path)
+                                            self.pending.append(full_path)
+                            # If it's just a file, it to eh list of pending files
+                            else:
+                                self.pending.append(file_path)
                # Update the last directory listing for the next time we extract a file to this same output directory
                self.last_directory_listing[extraction_directory] = directory_listing
@@ -162,7 +172,7 @@ class Extractor(Module):
    def append_rule(self, r):
        self.extract_rules.append(r.copy())
-    def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None, codes=[0, None]):
+    def add_rule(self, txtrule=None, regex=None, extension=None, cmd=None, codes=[0, None], recurse=True):
        '''
        Adds a set of rules to the extraction rule list.
@@ -172,6 +182,7 @@ class Extractor(Module):
        @cmd       - If rule string is not specified, this is the command to run.
                     Alternatively a callable object may be specified, which will be passed one argument: the path to the file to extract.
        @codes     - A list of valid return codes for the extractor.
+        @recurse   - If False, extracted directories will not be recursed into when the matryoshka option is enabled.
        Returns None.
        '''
@@ -181,7 +192,8 @@ class Extractor(Module):
            'extension'     : '',
            'cmd'           : '',
            'regex'         : None,
-            'codes'         : codes
+            'codes'         : codes,
+            'recurse'       : recurse,
        }
        # Process single explicitly specified rule
@@ -211,6 +223,7 @@ class Extractor(Module):
                r['extension'] = values[1]
                r['cmd'] = values[2]
                r['codes'] = values[3]
+                r['recurse'] = values[4]
            except KeyboardInterrupt as e:
                raise e
            except Exception:
@@ -357,10 +370,11 @@ class Extractor(Module):
        original_dir = os.getcwd()
        rules = self.match(description)
        file_path = os.path.realpath(file_name)
+        recurse_into_directories = False
        # No extraction rules for this file
        if not rules:
-            return (None, None)
+            return (None, None, False)
        else:
            binwalk.core.common.debug("Found %d matching extraction rules" % len(rules))
@@ -426,6 +440,8 @@ class Extractor(Module):
                    # If the command executed OK, don't try any more rules
                    if extract_ok == True:
+                        # Make sure we recurse into any extracted directories if instructed to
+                        recurse_into_directories = rule['recurse']
                        break
                    # Else, remove the extracted file if this isn't the last rule in the list.
                    # If it is the last rule, leave the file on disk for the user to examine.
@@ -443,7 +459,7 @@ class Extractor(Module):
            os.chdir(original_dir)
-        return (output_directory, fname)
+        return (output_directory, fname, recurse_into_directories)
    def _entry_offset(self, index, entries, description):
        '''
@@ -488,11 +504,11 @@ class Extractor(Module):
        @rule - Rule string.
-        Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>', '<comma separated return codes>'].
+        Returns an array of ['<case insensitive matching string>', '<file extension>', '<command to run>', '<comma separated return codes>', <recurse into extracted directories: True|False>].
        '''
-        values = rule.strip().split(self.RULE_DELIM, 3)
+        values = rule.strip().split(self.RULE_DELIM, 4)
-        if len(values) == 4:
+        if len(values) >= 4:
            codes = values[3].split(',')
            for i in range(0, len(codes)):
                try:
@@ -501,6 +517,9 @@ class Extractor(Module):
                    binwalk.core.common.warning("The specified return code '%s' for extractor '%s' is not a valid number!" % (codes[i], values[0]))
            values[3] = codes
+        if len(values) >= 5:
+            values[4] = (values[4].lower() == 'true')
        return values
    def _dd(self, file_name, offset, size, extension, output_file_name=None):