Commit 6161bcf6 by devttys0

Fixed signature scan offset bug, simplified BlockFile code.

parent d06af2df
......@@ -204,13 +204,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
# Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time.
MAX_TRAILING_SIZE = 8 * 1024
DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024
# Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks.
READ_BLOCK_SIZE = 1 * 1024 * 1024
DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE, trail=MAX_TRAILING_SIZE, swap=0):
def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
'''
Class constructor.
......@@ -219,17 +219,19 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
@length - Maximum number of bytes to read from the file via self.block_read().
@offset - Offset at which to start reading from the file.
@block - Size of data block to read (excluding any trailing size),
@trail - Size of trailing data to append to the end of each block.
@peek - Size of trailing data to append to the end of each block.
@swap - Swap every n bytes of data.
Returns None.
'''
self.total_read = 0
self.swap_size = swap
self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
# Python 2.6 doesn't like modes like 'rb' or 'wb'
mode = mode.replace('b', '')
try:
self.size = file_size(fname)
except KeyboardInterrupt as e:
......@@ -259,13 +261,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
elif self.length > self.size:
self.length = self.size
if block > 0:
self.READ_BLOCK_SIZE = block
self.base_block_size = self.READ_BLOCK_SIZE
if block is not None:
self.block_read_size = block
self.base_block_size = self.block_read_size
if trail > 0:
self.MAX_TRAILING_SIZE = trail
self.base_trail_size = self.MAX_TRAILING_SIZE
if peek is not None:
self.block_peek_size = peek
self.base_peek_size = self.block_peek_size
super(self.__class__, self).__init__(fname, mode)
......@@ -276,7 +278,6 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
self._name = fname
self.seek(self.offset)
print self.name, self.offset, self.length, self.total_read
def _swap_data_block(self, block):
'''
......@@ -300,14 +301,14 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
return data
def reset(self):
self.set_block_size(block=self.base_trail_size, trail=self.base_trail_size)
self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
self.seek(self.offset)
def set_block_size(self, block=None, trail=None):
def set_block_size(self, block=None, peek=None):
if block is not None:
self.READ_BLOCK_SIZE = block
if trail is not None:
self.MAX_TRAILING_SIZE = trail
self.block_read_size = block
if peek is not None:
self.block_peek_size = peek
def write(self, data):
'''
......@@ -357,13 +358,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
return self._swap_data_block(bytes2str(data))
def _internal_read(self, n=-1):
def peek(self, n=-1):
'''
Same as self.read, but doesn't increment self.total_read.
For use by self.read_block.
Peeks at data in file.
'''
pos = self.tell()
data = self.read(n)
self.total_read -= len(data)
self.seek(pos)
return data
def seek(self, n, whence=os.SEEK_SET):
......@@ -382,32 +383,9 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
Returns a tuple of (str(file block data), block data length).
'''
dlen = 0
data = None
rsize = self.READ_BLOCK_SIZE + self.MAX_TRAILING_SIZE
# Do the read. Must use self._internal_read so that the total_read value is untouched (we update this ourselves later)
data = self._internal_read(rsize)
if data:
# Get the actual length of the read in data
dlen = len(data)
# Calculate how far back we need to seek to pick up at the self.READ_BLOCK_SIZE offset
seek_offset = dlen - self.READ_BLOCK_SIZE
# If the actual read size was less than self.READ_BLOCK_SIZE seek backwards zero bytes
if seek_offset < 0:
seek_offset = 0
# Read in READ_BLOCK_SIZE plus MAX_TRAILING_SIZE bytes, but return a max dlen value
# Return a max dlen value of READ_BLOCK_SIZE. This ensures that there is a MAX_TRAILING_SIZE
# buffer at the end of the returned data in case a signature is found at or near data[READ_BLOCK_SIZE].
if dlen == rsize:
dlen = self.READ_BLOCK_SIZE
# Seek to the self.total_read offset so the next read can pick up where this one left off.
self.seek(self.tell() - seek_offset)
data = self.read(self.block_read_size)
dlen = len(data)
data += self.peek(self.block_peek_size)
return (data, dlen)
......@@ -165,7 +165,7 @@ class Plotter(Module):
self._print("Generating data points for %s" % fp.name)
# We don't need any extra data from BlockFile
fp.MAX_TRAILING_SIZE = 0
fp.set_block_size(peek=0)
while True:
(data, dlen) = fp.read_block()
......@@ -247,7 +247,7 @@ class Plotter(Module):
ygrid.scale(12.8, 12.8, 12.8)
zgrid.scale(12.8, 12.8, 12.8)
for fd in self.config.target_files:
for fd in iter(self.next_file, None):
data_points = self._generate_data_points(fd)
self._print("Generating plot points from %d data points" % len(data_points))
......
......@@ -31,8 +31,8 @@ class Deflate(object):
def pre_scan(self, fp):
if self.tinfl:
# Make sure we'll be getting enough data for a good decompression test
if fp.MAX_TRAILING_SIZE < self.SIZE:
fp.MAX_TRAILING_SIZE = self.SIZE
if fp.block_read_size < self.SIZE:
fp.set_block_size(peek=self.SIZE)
self._deflate_scan(fp)
......@@ -61,7 +61,7 @@ class RawCompression(Module):
TITLE = 'Raw Compression'
CLI = [
Option(short='T',
Option(short='X',
long='deflate',
kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
description='Scan for raw deflate compression streams'),
......@@ -78,7 +78,7 @@ class RawCompression(Module):
def run(self):
for fp in iter(self.next_file, None):
fp.set_block_size(trail=self.decompressor.BLOCK_SIZE)
fp.set_block_size(peek=self.decompressor.BLOCK_SIZE)
self.header()
......@@ -90,9 +90,9 @@ class RawCompression(Module):
for i in range(0, dlen):
description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
if description:
self.result(description=description, file=fp, offset=fp.offset+fp.tell()-dlen+i)
self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
self.status.completed = fp.tell()
self.status.completed = fp.tell() - fp.offset
self.footer()
......@@ -154,7 +154,7 @@ class Configuration(Module):
if len(self.target_files) > 1 and not self.verbose:
self.verbose = True
def open_file(self, fname, length=None, offset=None, swap=None, block=0, trail=0):
def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None):
'''
Opens the specified file with all pertinent configuration settings.
'''
......@@ -165,7 +165,7 @@ class Configuration(Module):
if swap is None:
swap = self.swap_size
return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, trail=trail)
return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)
def _open_target_files(self):
'''
......
......@@ -86,7 +86,7 @@ class Entropy(Module):
def run(self):
from pyqtgraph.Qt import QtGui
for fp in self.config.target_files:
for fp in iter(self.next_file, None):
if self.display_results:
self.header()
......
......@@ -485,7 +485,7 @@ class Extractor(Module):
try:
# Open the target file and seek to the offset
fdin = BlockFile(file_name, 'r', length=size, offset=offset)
fdin = self.config.open_file(file_name, length=size, offset=offset)
# Open the output file
try:
......
......@@ -332,12 +332,12 @@ class HashMatch(Module):
'''
Main module method.
'''
needle = self.config.target_files[0].name
needle = self.next_file().name
haystack = []
self.header()
for fp in self.config.target_files[1:]:
for fp in iter(self.next_file, None):
haystack.append(fp.name)
if os.path.isfile(needle):
......
......@@ -132,7 +132,7 @@ class HeuristicCompressionAnalyzer(Module):
self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET
def run(self):
for fp in self.config.target_files:
for fp in iter(self.next_file, None):
if has_key(self.blocks, fp.name):
......
......@@ -98,7 +98,7 @@ class Signature(Module):
break
current_block_offset = 0
block_start = fp.offset + fp.tell() - dlen
block_start = fp.tell() - dlen
self.status.completed = block_start - fp.offset
for candidate_offset in self.parser.find_signature_candidates(data, dlen):
......@@ -111,7 +111,7 @@ class Signature(Module):
continue
# In python3 we need a bytes object to pass to magic.buffer
candidate_data = str2bytes(data[candidate_offset:candidate_offset+fp.MAX_TRAILING_SIZE])
candidate_data = str2bytes(data[candidate_offset:candidate_offset+fp.block_peek_size])
# Pass the data to libmagic, and split out multiple results into a list
magic_result = self.magic.buffer(candidate_data)
......
......@@ -33,7 +33,7 @@ class Plugin(object):
fp_out = BlockFile(out_name, 'w')
# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
fp_in = self.module.config.open_file(fname, offset=0, length=0)
fp_in.MAX_TRAILING_SIZE = 0
fp_in.set_block_size(peek=0)
i = 0
while i < fp_in.length:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment