Commit 6161bcf6 by devttys0

Fixed signature scan offset bug, simplified BlockFile code.

parent d06af2df
...@@ -204,13 +204,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -204,13 +204,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
# Passing the entire remaining buffer to libmagic is resource intensive and will # Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to # significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time. # pass to libmagic which will not drastically affect scan time.
MAX_TRAILING_SIZE = 8 * 1024 DEFAULT_BLOCK_PEEK_SIZE = 8 * 1024
# Max number of bytes to process at one time. This needs to be large enough to # Max number of bytes to process at one time. This needs to be large enough to
# limit disk I/O, but small enough to limit the size of processed data blocks. # limit disk I/O, but small enough to limit the size of processed data blocks.
READ_BLOCK_SIZE = 1 * 1024 * 1024 DEFAULT_BLOCK_READ_SIZE = 1 * 1024 * 1024
def __init__(self, fname, mode='r', length=0, offset=0, block=READ_BLOCK_SIZE, trail=MAX_TRAILING_SIZE, swap=0): def __init__(self, fname, mode='r', length=0, offset=0, block=DEFAULT_BLOCK_READ_SIZE, peek=DEFAULT_BLOCK_PEEK_SIZE, swap=0):
''' '''
Class constructor. Class constructor.
...@@ -219,13 +219,15 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -219,13 +219,15 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
@length - Maximum number of bytes to read from the file via self.block_read(). @length - Maximum number of bytes to read from the file via self.block_read().
@offset - Offset at which to start reading from the file. @offset - Offset at which to start reading from the file.
@block - Size of data block to read (excluding any trailing size), @block - Size of data block to read (excluding any trailing size),
@trail - Size of trailing data to append to the end of each block. @peek - Size of trailing data to append to the end of each block.
@swap - Swap every n bytes of data. @swap - Swap every n bytes of data.
Returns None. Returns None.
''' '''
self.total_read = 0 self.total_read = 0
self.swap_size = swap self.swap_size = swap
self.block_read_size = self.DEFAULT_BLOCK_READ_SIZE
self.block_peek_size = self.DEFAULT_BLOCK_PEEK_SIZE
# Python 2.6 doesn't like modes like 'rb' or 'wb' # Python 2.6 doesn't like modes like 'rb' or 'wb'
mode = mode.replace('b', '') mode = mode.replace('b', '')
...@@ -259,13 +261,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -259,13 +261,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
elif self.length > self.size: elif self.length > self.size:
self.length = self.size self.length = self.size
if block > 0: if block is not None:
self.READ_BLOCK_SIZE = block self.block_read_size = block
self.base_block_size = self.READ_BLOCK_SIZE self.base_block_size = self.block_read_size
if trail > 0: if peek is not None:
self.MAX_TRAILING_SIZE = trail self.block_peek_size = peek
self.base_trail_size = self.MAX_TRAILING_SIZE self.base_peek_size = self.block_peek_size
super(self.__class__, self).__init__(fname, mode) super(self.__class__, self).__init__(fname, mode)
...@@ -276,7 +278,6 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -276,7 +278,6 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
self._name = fname self._name = fname
self.seek(self.offset) self.seek(self.offset)
print self.name, self.offset, self.length, self.total_read
def _swap_data_block(self, block): def _swap_data_block(self, block):
''' '''
...@@ -300,14 +301,14 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -300,14 +301,14 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
return data return data
def reset(self): def reset(self):
self.set_block_size(block=self.base_trail_size, trail=self.base_trail_size) self.set_block_size(block=self.base_block_size, peek=self.base_peek_size)
self.seek(self.offset) self.seek(self.offset)
def set_block_size(self, block=None, trail=None): def set_block_size(self, block=None, peek=None):
if block is not None: if block is not None:
self.READ_BLOCK_SIZE = block self.block_read_size = block
if trail is not None: if peek is not None:
self.MAX_TRAILING_SIZE = trail self.block_peek_size = peek
def write(self, data): def write(self, data):
''' '''
...@@ -357,13 +358,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -357,13 +358,13 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
return self._swap_data_block(bytes2str(data)) return self._swap_data_block(bytes2str(data))
def _internal_read(self, n=-1): def peek(self, n=-1):
''' '''
Same as self.read, but doesn't increment self.total_read. Peeks at data in file.
For use by self.read_block.
''' '''
pos = self.tell()
data = self.read(n) data = self.read(n)
self.total_read -= len(data) self.seek(pos)
return data return data
def seek(self, n, whence=os.SEEK_SET): def seek(self, n, whence=os.SEEK_SET):
...@@ -382,32 +383,9 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS): ...@@ -382,32 +383,9 @@ class BlockFile(BLOCK_FILE_PARENT_CLASS):
Returns a tuple of (str(file block data), block data length). Returns a tuple of (str(file block data), block data length).
''' '''
dlen = 0 data = self.read(self.block_read_size)
data = None
rsize = self.READ_BLOCK_SIZE + self.MAX_TRAILING_SIZE
# Do the read. Must use self._internal_read so that the total_read value is untouched (we update this ourselves later)
data = self._internal_read(rsize)
if data:
# Get the actual length of the read in data
dlen = len(data) dlen = len(data)
data += self.peek(self.block_peek_size)
# Calculate how far back we need to seek to pick up at the self.READ_BLOCK_SIZE offset
seek_offset = dlen - self.READ_BLOCK_SIZE
# If the actual read size was less than self.READ_BLOCK_SIZE seek backwards zero bytes
if seek_offset < 0:
seek_offset = 0
# Read in READ_BLOCK_SIZE plus MAX_TRAILING_SIZE bytes, but return a max dlen value
# Return a max dlen value of READ_BLOCK_SIZE. This ensures that there is a MAX_TRAILING_SIZE
# buffer at the end of the returned data in case a signature is found at or near data[READ_BLOCK_SIZE].
if dlen == rsize:
dlen = self.READ_BLOCK_SIZE
# Seek to the self.total_read offset so the next read can pick up where this one left off.
self.seek(self.tell() - seek_offset)
return (data, dlen) return (data, dlen)
...@@ -165,7 +165,7 @@ class Plotter(Module): ...@@ -165,7 +165,7 @@ class Plotter(Module):
self._print("Generating data points for %s" % fp.name) self._print("Generating data points for %s" % fp.name)
# We don't need any extra data from BlockFile # We don't need any extra data from BlockFile
fp.MAX_TRAILING_SIZE = 0 fp.set_block_size(peek=0)
while True: while True:
(data, dlen) = fp.read_block() (data, dlen) = fp.read_block()
...@@ -247,7 +247,7 @@ class Plotter(Module): ...@@ -247,7 +247,7 @@ class Plotter(Module):
ygrid.scale(12.8, 12.8, 12.8) ygrid.scale(12.8, 12.8, 12.8)
zgrid.scale(12.8, 12.8, 12.8) zgrid.scale(12.8, 12.8, 12.8)
for fd in self.config.target_files: for fd in iter(self.next_file, None):
data_points = self._generate_data_points(fd) data_points = self._generate_data_points(fd)
self._print("Generating plot points from %d data points" % len(data_points)) self._print("Generating plot points from %d data points" % len(data_points))
......
...@@ -31,8 +31,8 @@ class Deflate(object): ...@@ -31,8 +31,8 @@ class Deflate(object):
def pre_scan(self, fp): def pre_scan(self, fp):
if self.tinfl: if self.tinfl:
# Make sure we'll be getting enough data for a good decompression test # Make sure we'll be getting enough data for a good decompression test
if fp.MAX_TRAILING_SIZE < self.SIZE: if fp.block_read_size < self.SIZE:
fp.MAX_TRAILING_SIZE = self.SIZE fp.set_block_size(peek=self.SIZE)
self._deflate_scan(fp) self._deflate_scan(fp)
...@@ -61,7 +61,7 @@ class RawCompression(Module): ...@@ -61,7 +61,7 @@ class RawCompression(Module):
TITLE = 'Raw Compression' TITLE = 'Raw Compression'
CLI = [ CLI = [
Option(short='T', Option(short='X',
long='deflate', long='deflate',
kwargs={'enabled' : True, 'decompressor_class' : 'deflate'}, kwargs={'enabled' : True, 'decompressor_class' : 'deflate'},
description='Scan for raw deflate compression streams'), description='Scan for raw deflate compression streams'),
...@@ -78,7 +78,7 @@ class RawCompression(Module): ...@@ -78,7 +78,7 @@ class RawCompression(Module):
def run(self): def run(self):
for fp in iter(self.next_file, None): for fp in iter(self.next_file, None):
fp.set_block_size(trail=self.decompressor.BLOCK_SIZE) fp.set_block_size(peek=self.decompressor.BLOCK_SIZE)
self.header() self.header()
...@@ -90,9 +90,9 @@ class RawCompression(Module): ...@@ -90,9 +90,9 @@ class RawCompression(Module):
for i in range(0, dlen): for i in range(0, dlen):
description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE]) description = self.decompressor.decompress(data[i:i+self.decompressor.BLOCK_SIZE])
if description: if description:
self.result(description=description, file=fp, offset=fp.offset+fp.tell()-dlen+i) self.result(description=description, file=fp, offset=fp.tell()-dlen+i)
self.status.completed = fp.tell() self.status.completed = fp.tell() - fp.offset
self.footer() self.footer()
...@@ -154,7 +154,7 @@ class Configuration(Module): ...@@ -154,7 +154,7 @@ class Configuration(Module):
if len(self.target_files) > 1 and not self.verbose: if len(self.target_files) > 1 and not self.verbose:
self.verbose = True self.verbose = True
def open_file(self, fname, length=None, offset=None, swap=None, block=0, trail=0): def open_file(self, fname, length=None, offset=None, swap=None, block=None, peek=None):
''' '''
Opens the specified file with all pertinent configuration settings. Opens the specified file with all pertinent configuration settings.
''' '''
...@@ -165,7 +165,7 @@ class Configuration(Module): ...@@ -165,7 +165,7 @@ class Configuration(Module):
if swap is None: if swap is None:
swap = self.swap_size swap = self.swap_size
return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, trail=trail) return binwalk.core.common.BlockFile(fname, length=length, offset=offset, swap=swap, block=block, peek=peek)
def _open_target_files(self): def _open_target_files(self):
''' '''
......
...@@ -86,7 +86,7 @@ class Entropy(Module): ...@@ -86,7 +86,7 @@ class Entropy(Module):
def run(self): def run(self):
from pyqtgraph.Qt import QtGui from pyqtgraph.Qt import QtGui
for fp in self.config.target_files: for fp in iter(self.next_file, None):
if self.display_results: if self.display_results:
self.header() self.header()
......
...@@ -485,7 +485,7 @@ class Extractor(Module): ...@@ -485,7 +485,7 @@ class Extractor(Module):
try: try:
# Open the target file and seek to the offset # Open the target file and seek to the offset
fdin = BlockFile(file_name, 'r', length=size, offset=offset) fdin = self.config.open_file(file_name, length=size, offset=offset)
# Open the output file # Open the output file
try: try:
......
...@@ -332,12 +332,12 @@ class HashMatch(Module): ...@@ -332,12 +332,12 @@ class HashMatch(Module):
''' '''
Main module method. Main module method.
''' '''
needle = self.config.target_files[0].name needle = self.next_file().name
haystack = [] haystack = []
self.header() self.header()
for fp in self.config.target_files[1:]: for fp in iter(self.next_file, None):
haystack.append(fp.name) haystack.append(fp.name)
if os.path.isfile(needle): if os.path.isfile(needle):
......
...@@ -132,7 +132,7 @@ class HeuristicCompressionAnalyzer(Module): ...@@ -132,7 +132,7 @@ class HeuristicCompressionAnalyzer(Module):
self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET self.blocks[result.file.name][-1].end = result.offset - self.BLOCK_OFFSET
def run(self): def run(self):
for fp in self.config.target_files: for fp in iter(self.next_file, None):
if has_key(self.blocks, fp.name): if has_key(self.blocks, fp.name):
......
...@@ -98,7 +98,7 @@ class Signature(Module): ...@@ -98,7 +98,7 @@ class Signature(Module):
break break
current_block_offset = 0 current_block_offset = 0
block_start = fp.offset + fp.tell() - dlen block_start = fp.tell() - dlen
self.status.completed = block_start - fp.offset self.status.completed = block_start - fp.offset
for candidate_offset in self.parser.find_signature_candidates(data, dlen): for candidate_offset in self.parser.find_signature_candidates(data, dlen):
...@@ -111,7 +111,7 @@ class Signature(Module): ...@@ -111,7 +111,7 @@ class Signature(Module):
continue continue
# In python3 we need a bytes object to pass to magic.buffer # In python3 we need a bytes object to pass to magic.buffer
candidate_data = str2bytes(data[candidate_offset:candidate_offset+fp.MAX_TRAILING_SIZE]) candidate_data = str2bytes(data[candidate_offset:candidate_offset+fp.block_peek_size])
# Pass the data to libmagic, and split out multiple results into a list # Pass the data to libmagic, and split out multiple results into a list
magic_result = self.magic.buffer(candidate_data) magic_result = self.magic.buffer(candidate_data)
......
...@@ -33,7 +33,7 @@ class Plugin(object): ...@@ -33,7 +33,7 @@ class Plugin(object):
fp_out = BlockFile(out_name, 'w') fp_out = BlockFile(out_name, 'w')
# Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored # Use self.module.config.open_file here to ensure that other config settings (such as byte-swapping) are honored
fp_in = self.module.config.open_file(fname, offset=0, length=0) fp_in = self.module.config.open_file(fname, offset=0, length=0)
fp_in.MAX_TRAILING_SIZE = 0 fp_in.set_block_size(peek=0)
i = 0 i = 0
while i < fp_in.length: while i < fp_in.length:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment