From ae122ee86b7d4870fe0a59d8734a9d8a133697ad Mon Sep 17 00:00:00 2001 From: devttys0 <heffnercj@gmail.com> Date: Fri, 13 Dec 2013 08:11:15 -0500 Subject: [PATCH] Initial move of hexdiff.py/plotter.py to modules. --- src/binwalk/hexdiff.py | 218 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- src/binwalk/modules/binvis.py | 310 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/binwalk/modules/hexdiff.py | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 560 insertions(+), 218 deletions(-) delete mode 100644 src/binwalk/hexdiff.py create mode 100644 src/binwalk/modules/binvis.py create mode 100644 src/binwalk/modules/hexdiff.py diff --git a/src/binwalk/hexdiff.py b/src/binwalk/hexdiff.py deleted file mode 100644 index 705d5dd..0000000 --- a/src/binwalk/hexdiff.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import curses -import platform -import binwalk.common as common -from binwalk.compat import * - -class HexDiff(object): - - ALL_SAME = 0 - ALL_DIFF = 1 - SOME_DIFF = 2 - - DEFAULT_DIFF_SIZE = 0x100 - DEFAULT_BLOCK_SIZE = 16 - - COLORS = { - 'red' : '31', - 'green' : '32', - 'blue' : '34', - } - - def __init__(self, binwalk=None): - self.block_hex = "" - self.printed_alt_text = False - - if binwalk: - self._pprint = binwalk.display._pprint - self._show_header = binwalk.display.header - self._footer = binwalk.display.footer - self._display_result = binwalk.display.results - self._grep = binwalk.filter.grep - else: - self._pprint = sys.stdout.write - self._show_header = self._print - self._footer = self._simple_footer - self._display_result = self._print - self._grep = None - - if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty() and platform.system() != 'Windows': - curses.setupterm() - self.colorize = self._colorize - else: - self.colorize = self._no_colorize - - def _no_colorize(self, c, color="red", bold=True): - return c - - def _colorize(self, c, color="red", bold=True): - attr = [] - - attr.append(self.COLORS[color]) - if bold: - attr.append('1') - - return "\x1b[%sm%s\x1b[0m" % (';'.join(attr), c) - - def _print_block_hex(self, alt_text="*"): - printed = False - - if self._grep is None or self._grep(self.block_hex): - self._pprint(self.block_hex) - self.printed_alt_text = False - printed = True - elif not self.printed_alt_text: - self._pprint("%s\n" % alt_text) - self.printed_alt_text = True - printed = True - - self.block_hex = "" - return printed - - def _build_block(self, c, highlight=None): - if highlight == self.ALL_DIFF: - self.block_hex += self.colorize(c, color="red") - elif highlight == self.ALL_SAME: - self.block_hex += self.colorize(c, color="green") - elif highlight == self.SOME_DIFF: - self.block_hex += self.colorize(c, color="blue") - else: - self.block_hex += c - - def _simple_footer(self): - print("") - - def _header(self, files, block): - header = "OFFSET " - for i in range(0, len(files)): - f = files[i] - header += "%s" % os.path.basename(f) - if i != len(files)-1: - header += " " * ((block*4) + 10 - len(os.path.basename(f))) - self._show_header(header=header) - - def display(self, files, offset=0, size=DEFAULT_DIFF_SIZE, block=DEFAULT_BLOCK_SIZE, show_first_only=False): - i = 0 - total = 0 - fps = [] - data = {} - delim = '/' - - # If negative offset, then we're going that far back from the end of the file - if offset < 0: - size = offset * -1 - - if show_first_only: - self._header([files[0]], block) - else: - self._header(files, block) - - if common.BlockFile.READ_BLOCK_SIZE < block: - read_block_size = block - else: - read_block_size = common.BlockFile.READ_BLOCK_SIZE - - for f in files: - fp = common.BlockFile(f, 'r', length=size, offset=offset) - fp.READ_BLOCK_SIZE = read_block_size - fp.MAX_TRAILING_SIZE = 0 - fps.append(fp) - - # BlockFile handles calculation of negative offsets, if one was specified - offset = fps[0].offset - - while total < size: - i = 0 - files_finished = 0 - - for fp in fps: - (ddata, dlen) = fp.read_block() - data[fp.name] = ddata - if not ddata or dlen == 0: - files_finished += 1 - - if files_finished == len(fps): - break - - while i < read_block_size and (total+i) < size: - diff_same = {} - alt_text = "*" + " " * 6 - - self._build_block("%.08X " % (total + i + offset)) - - # For each byte in this block, is the byte the same in all files, the same in some files, or different in all files? - for j in range(0, block): - byte_list = [] - - try: - c = data[files[0]][j+i] - except: - c = None - - for f in files: - try: - c = data[f][j+i] - except Exception as e: - c = None - - if c not in byte_list: - byte_list.append(c) - - if len(byte_list) == 1: - diff_same[j] = self.ALL_SAME - elif len(byte_list) == len(files): - diff_same[j] = self.ALL_DIFF - else: - diff_same[j] = self.SOME_DIFF - - for index in range(0, len(files)): - if show_first_only and index > 0: - break - - f = files[index] - - alt_text += " " * (3 + (3 * block) + 3 + block + 3) - alt_text += delim - - for j in range(0, block): - try: - self._build_block("%.2X " % ord(data[f][j+i]), highlight=diff_same[j]) - except Exception as e: - self._build_block(" ") - - if (j+1) == block: - self._build_block(" |") - for k in range(0, block): - try: - if data[f][k+i] in string.printable and data[f][k+i] not in string.whitespace: - self._build_block(data[f][k+i], highlight=diff_same[k]) - else: - self._build_block('.', highlight=diff_same[k]) - except: - self._build_block(' ') - - if index == len(files)-1 or (show_first_only and index == 0): - self._build_block("|\n") - else: - self._build_block('| %s ' % delim) - - if self._print_block_hex(alt_text=alt_text[:-1].strip()): - if delim == '\\': - delim = '/' - else: - delim = '\\' - - i += block - total += read_block_size - - for fp in fps: - fp.close() - - self._footer() - -if __name__ == "__main__": - HexDiff().display(sys.argv[1:]) - diff --git a/src/binwalk/modules/binvis.py b/src/binwalk/modules/binvis.py new file mode 100644 index 0000000..7dbb3eb --- /dev/null +++ b/src/binwalk/modules/binvis.py @@ -0,0 +1,310 @@ +import os +import binwalk.module +from binwalk.compat import * +from binwalk.common import BlockFile + +class Plotter(object): + ''' + Base class for plotting binaries in Qt. + Other plotter classes are derived from this. + ''' + VIEW_DISTANCE = 1024 + MAX_2D_PLOT_POINTS = 12500 + MAX_3D_PLOT_POINTS = 25000 + + NAME = "Binary Visualization" + + CLI = [ + binwalk.module.ModuleOption(short='3', + long='3D', + kwargs={'axis' : 3, 'enabled' : True}, + description='Generate a 3D binary visualization'), + binwalk.module.ModuleOption(short='2', + long='2D', + kwargs={'axis' : 2, 'enabled' : True}, + description='Project data points onto 3D cube walls only'), + binwalk.module.ModuleOption(short='Z', + long='max-points', + type=int, + kwargs={'max_points' : 0}, + nargs=1, + description='Set the maximum number of plotted data points'), + binwalk.module.ModuleOption(short='V', + long='show-grids', + kwargs={'show_grids' : True}, + description='Display the x-y-z grids in the resulting plot'), + ] + + KWARGS = [ + binwalk.module.ModuleKwarg(name='axis', default=3), + binwalk.module.ModuleKwarg(name='max_points', default=0), + binwalk.module.ModuleKwarg(name='show_grids', default=False), + ] + + def __init__(self, **kwargs): + ''' + Class constructor. + + @axis - Set to 2 for 2D plotting, 3 for 3D plotting. + @max_points - The maximum number of data points to display. + @show_grids - Set to True to display x-y-z grids. + + Returns None. + ''' + import pyqtgraph.opengl as gl + from pyqtgraph.Qt import QtGui + + binwalk.module.process_kwargs(self, kwargs) + + self.verbose = self.config.verbose + self.files = self.config.target_files + self.offset = self.config.offset + self.length = self.config.length + self.plane_count = -1 + self.plot_points = None + + if self.axis == 2: + self.MAX_PLOT_POINTS = self.MAX_2D_PLOT_POINTS + self._generate_data_point = self._generate_2d_data_point + elif self.axis == 3: + self.MAX_PLOT_POINTS = self.MAX_3D_PLOT_POINTS + self._generate_data_point = self._generate_3d_data_point + else: + raise Exception("Invalid Plotter axis specified: %d. Must be one of: [2, 3]." % self.axis) + + if not self.max_points: + self.max_points = self.MAX_PLOT_POINTS + + self.app = QtGui.QApplication([]) + self.window = gl.GLViewWidget() + self.window.opts['distance'] = self.VIEW_DISTANCE + + if len(self.files) == 1: + self.window.setWindowTitle(self.files[0]) + + def _print(self, message): + ''' + Print console messages. For internal use only. + ''' + if self.verbose: + print (message) + + def _generate_plot_points(self, data_points): + ''' + Generates plot points from a list of data points. + + @data_points - A dictionary containing each unique point and its frequency of occurance. + + Returns a set of plot points. + ''' + total = 0 + min_weight = 0 + weightings = {} + plot_points = {} + + # If the number of data points exceeds the maximum number of allowed data points, use a + # weighting system to eliminate data points that occur less freqently. + if sum(data_points.itervalues()) > self.max_points: + + # First, generate a set of weight values 1 - 10 + for i in range(1, 11): + weightings[i] = 0 + + # Go through every data point and how many times that point occurs + for (point, count) in iterator(data_points): + # For each data point, compare it to each remaining weight value + for w in get_keys(weightings): + + # If the number of times this data point occurred is >= the weight value, + # then increment the weight value. Since weight values are ordered lowest + # to highest, this means that more frequent data points also increment lower + # weight values. Thus, the more high-frequency data points there are, the + # more lower-frequency data points are eliminated. + if count >= w: + weightings[w] += 1 + else: + break + + # Throw out weight values that exceed the maximum number of data points + if weightings[w] > self.max_points: + del weightings[w] + + # If there's only one weight value left, no sense in continuing the loop... + if len(weightings) == 1: + break + + # The least weighted value is our minimum weight + min_weight = min(weightings) + + # Get rid of all data points that occur less frequently than our minimum weight + for point in get_keys(data_points): + if data_points[point] < min_weight: + del data_points[point] + + for point in sorted(data_points, key=data_points.get, reverse=True): + plot_points[point] = data_points[point] + total += 1 + if total >= self.max_points: + break + + return plot_points + + def _generate_data_point(self, data): + ''' + Subclasses must override this to return the appropriate data point. + + @data - A string of data self.axis in length. + + Returns a data point tuple. + ''' + return (0,0,0) + + def _generate_data_points(self, file_name): + ''' + Generates a dictionary of data points and their frequency of occurrance. + + @file_name - The file to generate data points from. + + Returns a dictionary. + ''' + i = 0 + data_points = {} + + self._print("Generating data points for %s" % file_name) + + with BlockFile(file_name, 'r', offset=self.offset, length=self.length) as fp: + fp.MAX_TRAILING_SIZE = 0 + + while True: + (data, dlen) = fp.read_block() + if not data or not dlen: + break + + i = 0 + while (i+(self.axis-1)) < dlen: + point = self._generate_data_point(data[i:i+self.axis]) + if has_key(data_points, point): + data_points[point] += 1 + else: + data_points[point] = 1 + i += 3 + + return data_points + + def _generate_plot(self, plot_points): + import numpy as np + import pyqtgraph.opengl as gl + + nitems = float(len(plot_points)) + + pos = np.empty((nitems, 3)) + size = np.empty((nitems)) + color = np.empty((nitems, 4)) + + i = 0 + for (point, weight) in iterator(plot_points): + r = 0.0 + g = 0.0 + b = 0.0 + + pos[i] = point + frequency_percentage = (weight / nitems) + + # Give points that occur more frequently a brighter color and larger point size. + # Frequency is determined as a percentage of total unique data points. + if frequency_percentage > .005: + size[i] = .20 + r = 1.0 + elif frequency_percentage > .002: + size[i] = .10 + g = 1.0 + r = 1.0 + else: + size[i] = .05 + g = 1.0 + + color[i] = (r, g, b, 1.0) + + i += 1 + + scatter_plot = gl.GLScatterPlotItem(pos=pos, size=size, color=color, pxMode=False) + scatter_plot.translate(-127.5, -127.5, -127.5) + + return scatter_plot + + def plot(self, wait=True): + import pyqtgraph.opengl as gl + + self.window.show() + + if self.show_grids: + xgrid = gl.GLGridItem() + ygrid = gl.GLGridItem() + zgrid = gl.GLGridItem() + + self.window.addItem(xgrid) + self.window.addItem(ygrid) + self.window.addItem(zgrid) + + # Rotate x and y grids to face the correct direction + xgrid.rotate(90, 0, 1, 0) + ygrid.rotate(90, 1, 0, 0) + + # Scale grids to the appropriate dimensions + xgrid.scale(12.8, 12.8, 12.8) + ygrid.scale(12.8, 12.8, 12.8) + zgrid.scale(12.8, 12.8, 12.8) + + for file_name in self.files: + data_points = self._generate_data_points(file_name) + + self._print("Generating plot points from %d data points" % len(data_points)) + + self.plot_points = self._generate_plot_points(data_points) + del data_points + + self._print("Generating graph from %d plot points" % len(self.plot_points)) + + self.window.addItem(self._generate_plot(self.plot_points)) + + if wait: + self.wait() + + def wait(self): + from pyqtgraph.Qt import QtCore, QtGui + + t = QtCore.QTimer() + t.start(50) + QtGui.QApplication.instance().exec_() + + def _generate_3d_data_point(self, data): + ''' + Plot data points within a 3D cube. + ''' + return (ord(data[0]), ord(data[1]), ord(data[2])) + + def _generate_2d_data_point(self, data): + ''' + Plot data points projected on each cube face. + ''' + self.plane_count += 1 + if self.plane_count > 5: + self.plane_count = 0 + + if self.plane_count == 0: + return (0, ord(data[0]), ord(data[1])) + elif self.plane_count == 1: + return (ord(data[0]), 0, ord(data[1])) + elif self.plane_count == 2: + return (ord(data[0]), ord(data[1]), 0) + elif self.plane_count == 3: + return (255, ord(data[0]), ord(data[1])) + elif self.plane_count == 4: + return (ord(data[0]), 255, ord(data[1])) + elif self.plane_count == 5: + return (ord(data[0]), ord(data[1]), 255) + + def run(self): + self.plot() + return self.plot_points + diff --git a/src/binwalk/modules/hexdiff.py b/src/binwalk/modules/hexdiff.py new file mode 100644 index 0000000..0badb29 --- /dev/null +++ b/src/binwalk/modules/hexdiff.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python + +# TODO: Use sane defaults for block size and file size, if not specified. +# Handle header output for multiple files. + +import os +import sys +import curses +import platform +import binwalk.module +import binwalk.common as common +from binwalk.compat import * + +class HexDiff(object): + + ALL_SAME = 0 + ALL_DIFF = 1 + SOME_DIFF = 2 + + DEFAULT_DIFF_SIZE = 0x100 + DEFAULT_BLOCK_SIZE = 16 + + COLORS = { + 'red' : '31', + 'green' : '32', + 'blue' : '34', + } + + NAME = "Binary Diffing" + CLI = [ + binwalk.module.ModuleOption(short='W', + long='hexdump', + kwargs={'enabled' : True}, + description='Perform a hexdump / diff of a file or files'), + binwalk.module.ModuleOption(short='G', + long='green', + kwargs={'show_green' : True, 'show_blue' : False, 'show_green' : False}, + description='Only show lines containing bytes that are the same among all files'), + binwalk.module.ModuleOption(short='i', + long='red', + kwargs={'show_red' : True, 'show_blue' : False, 'show_green' : False}, + description='Only show lines containing bytes that are different among all files'), + binwalk.module.ModuleOption(short='U', + long='blue', + kwargs={'show_blue' : True, 'show_red' : False, 'show_green' : False}, + description='Only show lines containing bytes that are different among some files'), + binwalk.module.ModuleOption(short='w', + long='terse', + kwargs={'terse' : True}, + description='Diff all files, but only display a hex dump of the first file'), + ] + + KWARGS = [ + binwalk.module.ModuleKwarg(name='show_red', default=True), + binwalk.module.ModuleKwarg(name='show_blue', default=True), + binwalk.module.ModuleKwarg(name='show_green', default=True), + binwalk.module.ModuleKwarg(name='terse', default=False), + ] + + def __init__(self, **kwargs): + binwalk.module.process_kwargs(self, kwargs) + + self.block_hex = "" + self.printed_alt_text = False + + if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty() and platform.system() != 'Windows': + curses.setupterm() + self.colorize = self._colorize + else: + self.colorize = self._no_colorize + + def _no_colorize(self, c, color="red", bold=True): + return c + + def _colorize(self, c, color="red", bold=True): + attr = [] + + attr.append(self.COLORS[color]) + if bold: + attr.append('1') + + return "\x1b[%sm%s\x1b[0m" % (';'.join(attr), c) + + def _color_filter(self, data): + red = '\x1b[' + self.COLORS['red'] + ';' + green = '\x1b[' + self.COLORS['green'] + ';' + blue = '\x1b[' + self.COLORS['blue'] + ';' + + if self.show_blue and blue in data: + return True + if self.show_green and green in data: + return True + if self.show_red and red in data: + return True + return False + + def _print_block_hex(self, alt_text="*"): + printed = False + + if self._color_filter(self.block_hex): + self.config.display.result(self.block_hex) + self.printed_alt_text = False + printed = True + elif not self.printed_alt_text: + self.config.display.result("%s\n" % alt_text) + self.printed_alt_text = True + printed = True + + self.block_hex = "" + return printed + + def _build_block(self, c, highlight=None): + if highlight == self.ALL_DIFF: + self.block_hex += self.colorize(c, color="red") + elif highlight == self.ALL_SAME: + self.block_hex += self.colorize(c, color="green") + elif highlight == self.SOME_DIFF: + self.block_hex += self.colorize(c, color="blue") + else: + self.block_hex += c + + def run(self): + i = 0 + total = 0 + fps = [] + data = {} + delim = '/' + + offset = self.config.offset + size = self.config.length + block = self.config.block + files = self.config.target_files + + self.config.display.format_strings("\n%s\n", "%s\n") + + # If negative offset, then we're going that far back from the end of the file + if offset < 0: + size = offset * -1 + + # TODO: Display all file names in hexdump + if self.terse: + self.config.display.header(files[0]) + else: + self.config.display.header(files[0]) + + if common.BlockFile.READ_BLOCK_SIZE < block: + read_block_size = block + else: + read_block_size = common.BlockFile.READ_BLOCK_SIZE + + for f in files: + fp = common.BlockFile(f, 'r', length=size, offset=offset) + fp.READ_BLOCK_SIZE = read_block_size + fp.MAX_TRAILING_SIZE = 0 + fps.append(fp) + + # BlockFile handles calculation of negative offsets, if one was specified + offset = fps[0].offset + + while total < size: + i = 0 + files_finished = 0 + + for fp in fps: + (ddata, dlen) = fp.read_block() + data[fp.name] = ddata + if not ddata or dlen == 0: + files_finished += 1 + + if files_finished == len(fps): + break + + while i < read_block_size and (total+i) < size: + diff_same = {} + alt_text = "*" + " " * 6 + + self._build_block("%.08X " % (total + i + offset)) + + # For each byte in this block, is the byte the same in all files, the same in some files, or different in all files? + for j in range(0, block): + byte_list = [] + + try: + c = data[files[0]][j+i] + except: + c = None + + for f in files: + try: + c = data[f][j+i] + except Exception as e: + c = None + + if c not in byte_list: + byte_list.append(c) + + if len(byte_list) == 1: + diff_same[j] = self.ALL_SAME + elif len(byte_list) == len(files): + diff_same[j] = self.ALL_DIFF + else: + diff_same[j] = self.SOME_DIFF + + for index in range(0, len(files)): + if self.terse and index > 0: + break + + f = files[index] + + alt_text += " " * (3 + (3 * block) + 3 + block + 3) + alt_text += delim + + for j in range(0, block): + try: + self._build_block("%.2X " % ord(data[f][j+i]), highlight=diff_same[j]) + except Exception as e: + self._build_block(" ") + + if (j+1) == block: + self._build_block(" |") + for k in range(0, block): + try: + if data[f][k+i] in string.printable and data[f][k+i] not in string.whitespace: + self._build_block(data[f][k+i], highlight=diff_same[k]) + else: + self._build_block('.', highlight=diff_same[k]) + except: + self._build_block(' ') + + if index == len(files)-1 or (self.terse and index == 0): + self._build_block("|\n") + else: + self._build_block('| %s ' % delim) + + if self._print_block_hex(alt_text=alt_text[:-1].strip()): + if delim == '\\': + delim = '/' + else: + delim = '\\' + + i += block + total += read_block_size + + for fp in fps: + fp.close() + + self.config.display.footer() + + return True + -- libgit2 0.26.0