Added binwalk.modules.extractor.output data to map scan results to carved/extracted files.

46f87000 · devttys0 · f53a618a · 46f87000 · 46f87000 · 46f87000
Commit 46f87000 authored Jul 16, 2015 by devttys0
Show whitespace changes
Inline Side-by-side

Showing with 38 additions and 7 deletions

API.md API.md +6 -5

extractor.py src/binwalk/modules/extractor.py +17 -2

extract_data.py src/scripts/examples/extract_data.py +15 -0

No files found.
--- a/API.md
+++ b/API.md
@@ -81,15 +81,16 @@ for module in binwalk.scan('firmware1.bin', 'firmware2.bin', signature=True, qui
 Note the above use of the `--quiet` option which prevents the binwalk module from printing its normal output to screen.
-Each module object will also have an additional `extractor` attribute, which is an instance of the `binwalk.modules.extractor` object used to extract files if `--extract` was specified. In particular, `binwalk.modules.extractor.output` is a dictionary containing the base extraction directory for each scanned file:
+Each module object will also have an additional `extractor` attribute, which is an instance of the `binwalk.modules.extractor` object used to extract files if `--extract` was specified. In particular, `binwalk.modules.extractor.output` is a dictionary containing information about carved/extracted data:
 ```python
 for module in binwalk.scan('firmware1.bin', 'firmware2.bin', signature=True, quiet=True, extract=True):
-    print ("%s Results:" % module.name)
    for result in module.results:
-        print ("\t%s    0x%.8X    %s" % (result.file.path, result.offset, result.description))
+        if module.extractor.output.has_key(result.file.path):
-    for (file_path, output_dir) in module.extractor.output:
+            if module.extractor.output[result.file.path].carved.has_key(result.offset):
-        print ("%s data was extracted to: %s" % (file_path, output_dir))
+                print "Carved data from offset 0x%X to %s" % (module.extractor.output[result.file.path].carved[result.offset])
+            if module.extractor.output[result.file.path].extracted.has_key(result.offset):
+                print "Extracted data from offset 0x%X to %s" % (module.extractor.output[result.file.path].extracted[result.offset][0])
 ```
 Module Exceptions

--- a/src/binwalk/modules/extractor.py
+++ b/src/binwalk/modules/extractor.py
@@ -14,6 +14,12 @@ from binwalk.core.compat import *
 from binwalk.core.module import Module, Option, Kwarg
 from binwalk.core.common import file_size, file_md5, unique_file_name, BlockFile
+class ExtractInfo(object):
+    def __init__(self):
+        self.carved = {}
+        self.extracted = {}
+        self.directory = None
 class Extractor(Module):
    '''
    Extractor class, responsible for extracting files from the target file and executing external applications, if requested.
@@ -151,6 +157,9 @@ class Extractor(Module):
        # Note that r.display is still True even if --quiet has been specified; it is False if the result has been
        # explicitly excluded via the -y/-x options.
        if r.valid and r.extract and r.display:
+            # Create some extract output for this file, it it doesn't already exist
+            if not binwalk.core.common.has_key(self.output, r.file.path):
+                self.output[r.file.path] = ExtractInfo()
            # Attempt extraction
            binwalk.core.common.debug("Extractor callback for %s @%d [%s]" % (r.file.name, r.offset, r.description))
@@ -158,8 +167,10 @@ class Extractor(Module):
            # If the extraction was successful, self.extract will have returned the output directory and name of the dd'd file
            if extraction_directory and dd_file:
-                # Get the full path to the dd'd file
+                # Get the full path to the dd'd file and save it in the output info for this file
                dd_file_path = os.path.join(extraction_directory, dd_file)
+                self.output[r.file.path].carved[r.offset] = dd_file_path
+                self.output[r.file.path].extracted[r.offset] = []
                # Do a directory listing of the output directory
                directory_listing = set(os.listdir(extraction_directory))
@@ -176,6 +187,10 @@ class Extractor(Module):
                    real_file_path = os.path.realpath(file_path)
                    self.result(description=file_path, display=False)
+                    # Also keep a list of files created by the extraction utility
+                    if real_file_path != dd_file_path:
+                        self.output[r.file.path].extracted[r.offset].append(real_file_path)
                    # If recursion was specified, and the file is not the same one we just dd'd
                    if self.matryoshka and file_path != dd_file_path and scan_extracted_files:
                        # If the recursion level of this file is less than or equal to our desired recursion level
@@ -367,7 +382,7 @@ class Extractor(Module):
        # Set the initial base extraction directory for later determining the level of recusion
        if self.directory is None:
            self.directory = os.path.realpath(output_directory) + os.path.sep
-            self.output[path] = self.directory
+            self.output[path].directory = self.directory
        return output_directory

--- a/src/scripts/examples/extract_data.py
+++ b/src/scripts/examples/extract_data.py
+#!/usr/bin/env python
+import sys
+import binwalk
+# Extracts and logs
+for module in binwalk.scan(*sys.argv[1:], signature=True, quiet=True, extract=True):
+    print ("%s Results:" % module.name)
+    for result in module.results:
+        if module.extractor.output.has_key(result.file.path):
+            if module.extractor.output[result.file.path].extracted.has_key(result.offset):
+                print ("Extracted '%s' at offset 0x%X from '%s' to '%s'" % (result.description.split(',')[0],
+                                                                            result.offset,
+                                                                            result.file.path,
+                                                                            str(module.extractor.output[result.file.path].extracted[result.offset])))