Added --lzma and --stop options to raw compression module

0a988379 · devttys0 · 45474ebd · 0a988379 · 0a988379
Commit 0a988379 authored Oct 22, 2014 by devttys0
Show whitespace changes
Inline Side-by-side

Showing with 122 additions and 2 deletions

__init__.py src/binwalk/modules/__init__.py +6 -1

compression.py src/binwalk/modules/compression.py +116 -1

No files found.
--- a/src/binwalk/modules/__init__.py
+++ b/src/binwalk/modules/__init__.py
@@ -4,12 +4,17 @@ try:
 except ImportError:
    pass

+# Don't load the compression module if the lzma module can't be found
+try:
+    from binwalk.modules.compression import RawCompression
+except ImportError:
+    pass
+
 from binwalk.modules.signature import Signature
 from binwalk.modules.hexdiff import HexDiff
 from binwalk.modules.general import General
 from binwalk.modules.extractor import Extractor
 from binwalk.modules.entropy import Entropy
-from binwalk.modules.compression import RawCompression

 # These are depreciated.
 #from binwalk.modules.binvis import Plotter

--- a/src/binwalk/modules/compression.py
+++ b/src/binwalk/modules/compression.py
 # Performs raw decompression of various compression algorithms (currently, only deflate).

 import os
+import lzma
+import struct
 import binwalk.core.C
+import binwalk.core.compat
 from binwalk.core.module import Option, Kwarg, Module

+class LZMAHeader(object):
+    def __init__(self, **kwargs):
+        for (k,v) in binwalk.core.compat.iterator(kwargs):
+            setattr(self, k, v)
+
+class LZMA(object):
+
+    DESCRIPTION = "Raw LZMA compression stream"
+    FAKE_SIZE = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
+    MAX_PROP = ((4 * 5 + 4) * 9 + 8)
+    BLOCK_SIZE = 32*1024
+
+    def __init__(self, module):
+        self.module = module
+
+        self.build_properties()
+        self.build_dictionaries()
+        self.build_headers()
+
+    def build_property(self, pb, lp, lc):
+        prop = (((pb * 5) + lp) * 9) + lc
+        if prop > self.MAX_PROP:
+            prop = None
+        return prop
+
+    def parse_property(self, prop):
+        prop = int(ord(prop))
+
+        if prop > self.MAX_PROP:
+            return None
+
+        pb = prop / (9 * 5);
+        prop -= pb * 9 * 5;
+        lp = prop / 9;
+        lc = prop - lp * 9;
+
+        return (pb, lp, lc)
+
+    def parse_header(self, header):
+        (pb, lp, lc) = self.parse_property(header[0])
+        dictionary = struct.unpack("<I", binwalk.core.compat.str2bytes(header[1:5]))[0]
+        return LZMAHeader(pb=pb, lp=lp, lc=lc, dictionary=dictionary)
+
+    def build_properties(self):
+        self.properties = set()
+
+        for pb in range(0, 9):
+            for lp in range(0, 5):
+                for lc in range(0, 5):
+                    prop = self.build_property(pb, lp, lc)
+                    if prop is not None:
+                        self.properties.add(chr(prop))
+
+    def build_dictionaries(self):
+        self.dictionaries = set()
+
+        for n in range(16, 26):
+            self.dictionaries.add(binwalk.core.compat.bytes2str(struct.pack("<I", 2**n)))
+
+    def build_headers(self):
+        self.headers = set()
+
+        for prop in self.properties:
+            for dictionary in self.dictionaries:
+                self.headers.add(prop + dictionary + self.FAKE_SIZE)
+
+    def decompress(self, data):
+        result = None
+        description = None
+
+        for header in self.headers:
+            # The only acceptable exceptions are those indicating that the input data was truncated.
+            try:
+                lzma.decompress(binwalk.core.compat.str2bytes(header + data))
+                result = self.parse_header(header)
+                break
+            except IOError as e:
+                # The Python2 module gives this error on truncated input data.
+                if str(e) == "unknown BUF error":
+                    result = self.parse_header(header)
+                    break
+            except Exception as e:
+                # The Python3 module gives this error on truncated input data.
+                # The inconsistency between modules is a bit worrisome.
+                if str(e) == "Compressed data ended before the end-of-stream marker was reached":
+                    result = self.parse_header(header)
+                    break
+
+        if result is not None:
+            description = "%s, pb: %d, lp: %d, lc: %d, dictionary size: %d" % (self.DESCRIPTION,
+                                                                               result.pb,
+                                                                               result.lp,
+                                                                               result.lc,
+                                                                               result.dictionary)
+
+        return description
+
 class Deflate(object):
    '''
    Finds and extracts raw deflate compression streams.
@@ -49,6 +149,7 @@ class RawCompression(Module):

    DECOMPRESSORS = {
            'deflate'   : Deflate,
+            'lzma'      : LZMA,
    }

    TITLE = 'Raw Compression'
@@ -58,10 +159,19 @@ class RawCompression(Module):
                   long='deflate',
                   kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
                   description='Scan for raw deflate compression streams'),
+            Option(short='Z',
+                   long='lzma',
+                   kwargs={'enabled' : True, 'decompressor_class' : 'lzma'},
+                   description='Scan for raw LZMA compression streams'),
+            Option(short='S',
+                   long='stop',
+                   kwargs={'stop_on_first_hit' : True},
+                   description='Stop after the first result'),
    ]

    KWARGS = [
            Kwarg(name='enabled', default=False),
+            Kwarg(name='stop_on_first_hit', default=False),
            Kwarg(name='decompressor_class', default=None),
    ]

@@ -71,11 +181,12 @@ class RawCompression(Module):
    def run(self):
        for fp in iter(self.next_file, None):

+            file_done = False
            fp.set_block_size(peek=self.decompressor.BLOCK_SIZE)

            self.header()

-            while True:
+            while not file_done:
                (data, dlen) = fp.read_block()
                if not data:
                    break
@@ -84,6 +195,10 @@ class RawCompression(Module):
                    description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
                    if description:
                        self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
+                        if self.stop_on_first_hit:
+                            file_done = True
+                            break
+                    self.status.completed += 1

                self.status.completed = fp.tell() - fp.offset