Fixed signature scan offset bug, simplified BlockFile code.

6161bcf6 · devttys0 · d06af2df · 6161bcf6 · 6161bcf6 · 6161bcf6
Commit 6161bcf6 authored Dec 22, 2013 by devttys0
10 changed files
--- a/src/binwalk/core/common.py
+++ b/src/binwalk/core/common.py
@@ -204,13 +204,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 	# Passing the entire remaining buffer to libmagic is resource intensive and will
 	# significantly slow the scan; this value represents a reasonable buffer size to
 	# pass to libmagic which will not drastically affect scan time.
-	MAX_TRAILING_SIZE = 8 * 1024
+	DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024

 	# Max number of bytes to process at one time. This needs to be large enough to 
 	# limit disk I/O, but small enough to limit the size of processed data blocks.
-	READ_BLOCK_SIZE = 1 * 1024 * 1024
+	DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024

-	def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE, trail=MAX_TRAILING_SIZE, swap=0):
+	def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
 		'''
 		Class constructor.

@@ -219,17 +219,19 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 		@length - Maximum number of bytes to read from the file via self.block_read().
 		@offset - Offset at which to start reading from the file.
 		@block  - Size of data block to read (excluding any trailing size),
-		@trail  - Size of trailing data to append to the end of each block.
+		@peek   - Size of trailing data to append to the end of each block.
 		@swap   - Swap every n bytes of data.

 		Returns None.
 		'''
 		self.total_read = 0
 		self.swap_size = swap
+		self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
+		self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE

 		# Python 2.6 doesn't like modes like 'rb' or 'wb'
 		mode = mode.replace('b', '')
-		
+
 		try:
 			self.size = file_size(fname)
 		except KeyboardInterrupt as e:
@@ -259,13 +261,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 		elif self.length > self.size:
 			self.length = self.size

-		if block > 0:
-			self.READ_BLOCK_SIZE = block
-		self.base_block_size = self.READ_BLOCK_SIZE
+		if block is not None:
+			self.block_read_size = block
+		self.base_block_size = self.block_read_size
 			
-		if trail > 0:
-			self.MAX_TRAILING_SIZE = trail
-		self.base_trail_size = self.MAX_TRAILING_SIZE
+		if peek is not None:
+			self.block_peek_size = peek
+		self.base_peek_size = self.block_peek_size

 		super(self.__class__, self).__init__(fname, mode)

@@ -276,7 +278,6 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 			self._name = fname

 		self.seek(self.offset)
-		print self.name, self.offset, self.length, self.total_read

 	def _swap_data_block(self, block):
 		'''
@@ -300,14 +301,14 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
 		return data

 	def reset(self):
-		self.set_block_size(block=self.base_trail_size, trail=self.base_trail_size)
+		self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
 		self.seek(self.offset)

-	def set_block_size(self, block=None, trail=None):
+	def set_block_size(self, block=None, peek=None):
 		if block is not None:
-			self.READ_BLOCK_SIZE = block
-		if trail is not None:
-			self.MAX_TRAILING_SIZE = trail
+			self.block_read_size = block
+		if peek is not None:
+			self.block_peek_size = peek

 	def write(self, data):
 		'''
@@ -357,13 +358,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):

 		return self._swap_data_block(bytes2str(data))

-	def _internal_read(self, n=-1):
+	def peek(self, n=-1):
 		'''
-		Same as self.read, but doesn't increment self.total_read.
-		For use by self.read_block.
+		Peeks at data in file.
 		'''
+		pos = self.tell()
 		data = self.read(n)
-		self.total_read -= len(data)
+		self.seek(pos)
 		return data

 	def seek(self, n, whence=os.SEEK_SET):
@@ -382,32 +383,9 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):

 		Returns a tuple of (str(file block data), block data length).
 		'''
-		dlen = 0
-		data = None
-		rsize = self.READ_BLOCK_SIZE + self.MAX_TRAILING_SIZE
-
-		# Do the read. Must use self._internal_read so that the total_read value is untouched (we update this ourselves later)
-		data = self._internal_read(rsize)
-
-		if data:
-
-			# Get the actual length of the read in data
-			dlen = len(data)
-				
-			# Calculate how far back we need to seek to pick up at the self.READ_BLOCK_SIZE offset
-			seek_offset = dlen - self.READ_BLOCK_SIZE
-			# If the actual read size was less than self.READ_BLOCK_SIZE seek backwards zero bytes
-			if seek_offset < 0:
-				seek_offset = 0
-
-			# Read in READ_BLOCK_SIZE plus MAX_TRAILING_SIZE bytes, but return a max dlen value
-			# Return a max dlen value of READ_BLOCK_SIZE. This ensures that there is a MAX_TRAILING_SIZE
-			# buffer at the end of the returned data in case a signature is found at or near data[READ_BLOCK_SIZE].
-			if dlen == rsize:
-				dlen = self.READ_BLOCK_SIZE
-
-			# Seek to the self.total_read offset so the next read can pick up where this one left off.
-			self.seek(self.tell() - seek_offset)
+		data = self.read(self.block_read_size)
+		dlen = len(data)
+		data += self.peek(self.block_peek_size)

 		return (data, dlen)

--- a/src/binwalk/modules/binvis.py
+++ b/src/binwalk/modules/binvis.py
@@ -165,7 +165,7 @@ class Plotter(Module):
 		self._print("Generating data points for %s" % fp.name)

 		# We don't need any extra data from BlockFile
-		fp.MAX_TRAILING_SIZE = 0
+		fp.set_block_size(peek=0)

 		while True:
 			(data, dlen) = fp.read_block()
@@ -247,7 +247,7 @@ class Plotter(Module):
 			ygrid.scale(12.8, 12.8, 12.8)
 			zgrid.scale(12.8, 12.8, 12.8)

-		for fd in self.config.target_files:
+		for fd in iter(self.next_file, None):
 			data_points = self._generate_data_points(fd)

 			self._print("Generating plot points from %d data points" % len(data_points))

--- a/src/binwalk/modules/compression.py
+++ b/src/binwalk/modules/compression.py
@@ -31,8 +31,8 @@ class Deflate(object):
 	def pre_scan(self, fp):
 		if self.tinfl:
 			# Make sure we'll be getting enough data for a good decompression test
-			if fp.MAX_TRAILING_SIZE < self.SIZE:
-				fp.MAX_TRAILING_SIZE = self.SIZE
+			if fp.block_read_size < self.SIZE:
+				fp.set_block_size(peek=self.SIZE)

 			self._deflate_scan(fp)

@@ -61,7 +61,7 @@ class RawCompression(Module):
 	TITLE = 'Raw Compression'

 	CLI = [
-			Option(short='T',
+			Option(short='X',
 				   long='deflate',
 				   kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
 				   description='Scan for raw deflate compression streams'),
@@ -78,7 +78,7 @@ class RawCompression(Module):
 	def run(self):
 		for fp in iter(self.next_file, None):

-			fp.set_block_size(trail=self.decompressor.BLOCK_SIZE)
+			fp.set_block_size(peek=self.decompressor.BLOCK_SIZE)

 			self.header()

@@ -90,9 +90,9 @@ class RawCompression(Module):
 				for i in range(0, dlen):
 					description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
 					if description:
-						self.result(description=description, file=fp, offset=fp.offset+fp.tell()-dlen+i)
+						self.result(description=description, file=fp, offset=fp.tell()-dlen+i)

-				self.status.completed = fp.tell()
+				self.status.completed = fp.tell() - fp.offset

 			self.footer()

--- a/src/binwalk/modules/configuration.py
+++ b/src/binwalk/modules/configuration.py
@@ -154,7 +154,7 @@ class Configuration(Module):
 		if len(self.target_files) > 1 and not self.verbose:
 			self.verbose = True

-	def open_file(self, fname, length=None, offset=None, swap=None, block=0, trail=0):
+	def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None):
 		'''
 		Opens the specified file with all pertinent configuration settings.
 		'''
@@ -165,7 +165,7 @@ class Configuration(Module):
 		if swap is None:
 			swap = self.swap_size

-		return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, trail=trail)
+		return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)

 	def _open_target_files(self):
 		'''

--- a/src/binwalk/modules/entropy.py
+++ b/src/binwalk/modules/entropy.py
@@ -86,7 +86,7 @@ class Entropy(Module):
 	def run(self):
 		from pyqtgraph.Qt import QtGui

-		for fp in self.config.target_files:
+		for fp in iter(self.next_file, None):

 			if self.display_results:
 				self.header()

--- a/src/binwalk/modules/extractor.py
+++ b/src/binwalk/modules/extractor.py
@@ -485,7 +485,7 @@ class Extractor(Module):
 			
 		try:
 			# Open the target file and seek to the offset
-			fdin = BlockFile(file_name, 'r', length=size, offset=offset)
+			fdin = self.config.open_file(file_name, length=size, offset=offset)
 			
 			# Open the output file
 			try:

--- a/src/binwalk/modules/hashmatch.py
+++ b/src/binwalk/modules/hashmatch.py
@@ -332,12 +332,12 @@ class HashMatch(Module):
 		'''
 		Main module method.
 		'''
-		needle = self.config.target_files[0].name
+		needle = self.next_file().name
 		haystack = []

 		self.header()
 				
-		for fp in self.config.target_files[1:]:
+		for fp in iter(self.next_file, None):
 			haystack.append(fp.name)

 		if os.path.isfile(needle):

--- a/src/binwalk/modules/heuristics.py
+++ b/src/binwalk/modules/heuristics.py
@@ -132,7 +132,7 @@ class HeuristicCompressionAnalyzer(Module):
 				self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET

 	def run(self):
-		for fp in self.config.target_files:
+		for fp in iter(self.next_file, None):
 			
 			if has_key(self.blocks, fp.name):


--- a/src/binwalk/modules/signature.py
+++ b/src/binwalk/modules/signature.py
@@ -98,7 +98,7 @@ class Signature(Module):
 				break

 			current_block_offset = 0
-			block_start = fp.offset + fp.tell() - dlen
+			block_start = fp.tell() - dlen
 			self.status.completed = block_start - fp.offset

 			for candidate_offset in self.parser.find_signature_candidates(data, dlen):
@@ -111,7 +111,7 @@ class Signature(Module):
 					continue

 				# In python3 we need a bytes object to pass to magic.buffer
-				candidate_data = str2bytes(data[candidate_offset:candidate_offset+fp.MAX_TRAILING_SIZE])
+				candidate_data = str2bytes(data[candidate_offset:candidate_offset+fp.block_peek_size])
 			
 				# Pass the data to libmagic, and split out multiple results into a list
 				magic_result = self.magic.buffer(candidate_data)

--- a/src/binwalk/plugins/lzmamod.py
+++ b/src/binwalk/plugins/lzmamod.py
@@ -33,7 +33,7 @@ class Plugin(object):
 			fp_out = BlockFile(out_name, 'w')
 			# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
 			fp_in = self.module.config.open_file(fname, offset=0, length=0)
-			fp_in.MAX_TRAILING_SIZE = 0
+			fp_in.set_block_size(peek=0)
 			i = 0

 			while i < fp_in.length: