Added compression.py module; working out bugs in BlockFile offset/length values.

d06af2df · devttys0 · b22d5899 · d06af2df · d06af2df · d06af2df
Commit d06af2df authored Dec 22, 2013 by devttys0
7 changed files
--- a/src/binwalk/core/common.py
+++ b/src/binwalk/core/common.py
@@ -276,6 +276,7 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 			self._name = fname
 		self.seek(self.offset)
+		print self.name, self.offset, self.length, self.total_read
 	def _swap_data_block(self, block):
 		'''
@@ -302,7 +303,7 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 		self.set_block_size(block=self.base_trail_size, trail=self.base_trail_size)
 		self.seek(self.offset)
-	def set_block_size(self, block=0, trail=0):
+	def set_block_size(self, block=None, trail=None):
 		if block is not None:
 			self.READ_BLOCK_SIZE = block
 		if trail is not None:

--- a/src/binwalk/core/module.py
+++ b/src/binwalk/core/module.py
@@ -81,6 +81,7 @@ class Result(object):
 		Class constructor.
 		@offset      - The file offset of the result.
+		@size        - Size of the result, if known.
 		@description - The result description, as displayed to the user.
 		@module      - Name of the module that generated the result.
 		@file        - The file object of the scanned file.
@@ -93,6 +94,7 @@ class Result(object):
 		Returns None.
 		'''
 		self.offset = 0
+		self.size = 0
 		self.description = ''
 		self.module = ''
 		self.file = None
@@ -100,6 +102,7 @@ class Result(object):
 		self.display = True
 		self.extract = True
 		self.plot = True
+		self.name = None
 		for (k, v) in iterator(kwargs):
 			setattr(self, k, v)
@@ -172,9 +175,13 @@ class Module(object):
 	# Modules with a higher order are displayed first in help output
 	ORDER = 5
+	# Set to False if this is not a primary module
+	PRIMARY = True
 	def __init__(self, **kwargs):
 		self.errors = []
 		self.results = []
+		self.target_file_list = []
 		self.status = None
 		self.enabled = False
 		self.name = self.__class__.__name__
@@ -191,6 +198,11 @@ class Module(object):
 		except Exception as e:
 			self.error(exception=e)
+		try:
+			self.target_file_list = list(self.config.target_files)
+		except AttributeError as e:
+			pass
 	def __del__(self):
 		return None
@@ -276,6 +288,24 @@ class Module(object):
 		return args
+	def next_file(self):
+		'''
+		Gets the next file to be scanned (including pending extracted files, if applicable).
+		Also re/initializes self.status.
+		'''
+		fp = None
+		# Add any pending extracted files to the target_files list and reset the extractor's pending file list
+		self.target_file_list += [self.config.open_file(f) for f in self.extractor.pending]
+		self.extractor.pending = []
+		if self.target_file_list:
+			fp = self.target_file_list.pop(0)
+			self.status.clear()
+			self.status.total = fp.length
+		return fp
 	def clear(self, results=True, errors=True):
 		'''
 		Clears results and errors lists.
@@ -526,7 +556,8 @@ class Modules(object):
 		# Add all loaded modules that marked themselves as enabled to the run_modules list
 		for (module, obj) in iterator(self.loaded_modules):
-			if obj.enabled:
+			# Report the results if the module is enabled and if it is a primary module or if it reported any results/errors
+			if obj.enabled and (obj.PRIMARY or obj.results or obj.errors):
 				run_modules.append(obj)
 		self.arguments = orig_arguments

--- a/src/binwalk/modules/__init__.py
+++ b/src/binwalk/modules/__init__.py
@@ -6,3 +6,4 @@ from binwalk.modules.configuration import Configuration
 from binwalk.modules.extractor import Extractor
 from binwalk.modules.entropy import Entropy
 from binwalk.modules.heuristics import HeuristicCompressionAnalyzer
+from binwalk.modules.compression import RawCompression
--- a/src/binwalk/modules/compression.py
+++ b/src/binwalk/modules/compression.py
+#!/usr/bin/env python
+import os
+import ctypes
+import ctypes.util
+from binwalk.core.module import Option, Kwarg, Module
+class Deflate(object):
+	'''
+	Finds and extracts raw deflate compression streams.
+	'''
+	ENABLED = False
+	BLOCK_SIZE = 33*1024
+	# To prevent many false positives, only show data that decompressed to a reasonable size and didn't just result in a bunch of NULL bytes
+	MIN_DECOMP_SIZE = 32*1024
+	DESCRIPTION = "Raw deflate compression stream"
+	def __init__(self, module):
+		self.module = module
+		# The tinfl library is built and installed with binwalk
+		self.tinfl = ctypes.cdll.LoadLibrary(ctypes.util.find_library("tinfl"))
+		if not self.tinfl:
+			raise Exception("Failed to load the tinfl library")
+		# Add an extraction rule
+		if self.module.extractor.enabled:
+			self.module.extractor.add_rule(regex='^%s' % self.DESCRIPTION.lower(), extension="deflate", cmd=self._extractor)
+	def pre_scan(self, fp):
+		if self.tinfl:
+			# Make sure we'll be getting enough data for a good decompression test
+			if fp.MAX_TRAILING_SIZE < self.SIZE:
+				fp.MAX_TRAILING_SIZE = self.SIZE
+			self._deflate_scan(fp)
+			return PLUGIN_TERMINATE
+	def _extractor(self, file_name):
+		if self.tinfl:
+			out_file = os.path.splitext(file_name)[0]
+			self.tinfl.inflate_raw_file(file_name, out_file)
+	def decompress(self, data):
+		description = None
+		decomp_size = self.tinfl.is_deflated(data, len(data), 0)
+		if decomp_size >= self.MIN_DECOMP_SIZE:
+			description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size
+		return description
+class RawCompression(Module):
+	DECOMPRESSORS = {
+			'deflate' : Deflate,
+	}
+	TITLE = 'Raw Compression'
+	CLI = [
+			Option(short='T',
+				   long='deflate',
+				   kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
+				   description='Scan for raw deflate compression streams'),
+	]
+	KWARGS = [
+			Kwarg(name='enabled', default=False),
+			Kwarg(name='decompressor_class', default=None),
+	]
+	def init(self):
+		self.decompressor = self.DECOMPRESSORS[self.decompressor_class](self)
+	def run(self):
+		for fp in iter(self.next_file, None):
+			fp.set_block_size(trail=self.decompressor.BLOCK_SIZE)
+			self.header()
+			while True:
+				(data, dlen) = fp.read_block()
+				if not data:
+					break
+				for i in range(0, dlen):
+					description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
+					if description:
+						self.result(description=description, file=fp, offset=fp.offset+fp.tell()-dlen+i)
+				self.status.completed = fp.tell()
+			self.footer()
--- a/src/binwalk/modules/configuration.py
+++ b/src/binwalk/modules/configuration.py
@@ -100,6 +100,8 @@ class Configuration(Module):
 		Kwarg(name='show_help', default=False),
 	]
+	PRIMARY = False
 	def load(self):
 		self.target_files = []

--- a/src/binwalk/modules/extractor.py
+++ b/src/binwalk/modules/extractor.py
@@ -22,11 +22,9 @@ class Extractor(Module):
 	# Place holder for the extracted file name in the command 
 	FILE_NAME_PLACEHOLDER = '%e'
-	# Max size of data to read/write at one time when extracting data
-	MAX_READ_SIZE = 10 * 1024 * 1024
 	TITLE = 'Extraction'
 	ORDER = 9
+	PRIMARY = False
 	CLI = [
 			Option(short='e',
@@ -95,7 +93,8 @@ class Extractor(Module):
 			r.file.size
 		except KeyboardInterrupt as e:
 			pass
-		except Exception:
+		except Exception as e:
+			print e
 			return
 		if not r.size:

--- a/src/binwalk/modules/signature.py
+++ b/src/binwalk/modules/signature.py
@@ -142,21 +142,7 @@ class Signature(Module):
 							break
 	def run(self):
-		target_files = self.config.target_files
+		for fp in iter(self.next_file, None):
+			self.header()
-		while target_files:
+			self.scan_file(fp)
-			for fp in target_files:
+			self.footer()
-				self.header()
-				self.status.clear()
-				self.status.total = fp.length
-				self.status.completed = 0
-				self.scan_file(fp)
-				self.footer()
-			# Add any pending extracted files to the target_files list and reset the extractor's pending file list
-			target_files = [self.config.open_file(f) for f in self.extractor.pending]
-			self.extractor.pending = []