Added --rehash option to binwalk.

64bf06ac · devttys0 · 4bb153c2 · 64bf06ac · 64bf06ac · 64bf06ac
Commit 64bf06ac authored Dec 07, 2013 by devttys0
Showing with 52 additions and 11 deletions

binwalk src/bin/binwalk +29 -2

__init__.py src/binwalk/__init__.py +4 -2

cmdopts.py src/binwalk/cmdopts.py +2 -0

hashmatch.py src/binwalk/hashmatch.py +6 -2

prettyprint.py src/binwalk/prettyprint.py +11 -5

No files found.
--- a/src/bin/binwalk
+++ b/src/bin/binwalk
@@ -4,6 +4,7 @@ import os.path
 import binwalk
 import binwalk.cmdopts
 import binwalk.plotter
+from binwalk.hashmatch import HashMatch
 from binwalk.compat import *
 from threading import Thread
 from getopt import GetoptError, gnu_getopt as GetOpt
@@ -84,6 +85,7 @@ def main():
 	do_files = False
 	log_file = None
 	do_csv = False
+	do_rehash = False
 	save_plot = False
 	show_plot = True
 	show_grids = False
@@ -159,6 +161,8 @@ def main():
 			requested_scans.append(binwalk.Binwalk.ENTROPY)
 		elif opt in ("-W", "--diff"):
 			requested_scans.append(binwalk.Binwalk.HEXDIFF)
+		elif opt in ("-P", "--rehash"):
+			do_rehash = True
 		elif opt in ("-w", "--terse"):
 			show_single_hex_dump = True
 		elif opt in ("-a", "--gzip"):
@@ -384,7 +388,13 @@ def main():
 	if not requested_scans:
 		requested_scans.append(binwalk.Binwalk.BINWALK)

-	# Sort the scan types to ensure the entropy scan is performed last
+	# If rehash was requested, add that here.
+	# We don't add it directly when parsing command line options, since that would require an explicit --binwalk scan request.
+	# Since rehash can only be run if a binwalk scan was already run, forcing the user to specify that is redundant.
+	if do_rehash:
+		requested_scans.append(binwalk.Binwalk.REHASH)
+
+	# Sort the scan types to ensure that scans are executed in the proper order (some scans rely on others being run first)
 	requested_scans.sort()

 	# Everything is set up, let's do a scan
@@ -438,6 +448,23 @@ def main():
 				r = bwalk.analyze_compression(target_files, offset=offset, length=length)
 				bwalk.concatenate_results(results, r)

+			elif scan_type == binwalk.Binwalk.REHASH:
+		
+				diff_dirs = []
+
+				for target_file in target_files:
+					if has_key(results, target_file):
+						for (offset, offset_results) in results[target_file]:
+							for result in offset_results:
+								if has_key(result, "extract") and result["extract"]:
+									base_dir = result["extract"].split(os.path.sep)[0]
+									if base_dir and base_dir not in diff_dirs:
+										diff_dirs.append(base_dir)
+
+				if len(diff_dirs) > 1:
+					HashMatch(display=bwalk.display, cutoff=50).directories(diff_dirs[0], diff_dirs[1:])
+					
+
 			elif scan_type == binwalk.Binwalk.BINVIS:
 			
 				# Always enable verbose mode; generating the plot can take some time for large files,
@@ -484,7 +511,7 @@ def main():

 try:
 	# Special options for profiling the code. For debug use only.
-	if '--profile' in sys.argv or '-P' in sys.argv:
+	if '--profile' in sys.argv:
 		import cProfile
 		cProfile.run('main()')
 	else:

--- a/src/binwalk/__init__.py
+++ b/src/binwalk/__init__.py
@@ -60,6 +60,7 @@ class Binwalk(object):

 	# Valid scan_type values.
 	# ENTROPY must be the largest value to ensure it is performed last if multiple scans are performed.
+	# REHASH must also be larger than any scans that would generate extracted files.
 	BINWALK = 0x01
 	BINARCH = 0x02
 	BINCAST = 0x03
@@ -67,8 +68,9 @@ class Binwalk(object):
 	COMPRESSION = 0x05
 	HEXDIFF = 0x06
 	CUSTOM = 0x07
-	BINVIS = 0x08
-	ENTROPY = 0x09
+	REHASH = 0x08
+	BINVIS = 0x09
+	ENTROPY = 0x0A

 	def __init__(self, magic_files=[], flags=magic.MAGIC_NONE, log=None, quiet=False, verbose=0, ignore_smart_keywords=False, ignore_time_skews=False, load_extractor=False, load_plugins=True, exec_commands=True, max_extract_size=None):
 		'''

--- a/src/binwalk/cmdopts.py
+++ b/src/binwalk/cmdopts.py
@@ -14,6 +14,7 @@ long_options = [
 		"green",
 		"red",
 		"blue",
+		"rehash",
 		"examples",
 		"quiet", 
 		"csv",
@@ -130,6 +131,7 @@ def usage(fd):
 	fd.write("\t-r, --rm                      Cleanup extracted files and zero-size files\n")
 	fd.write("\t-d, --honor-footers           Only extract files up to their corresponding footer signatures\n")
 	fd.write("\t-z, --carve                   Carve data from files, but don't execute extraction utilities (implies -d)\n")
+	fd.write("\t-P, --rehash                  Recursively diff data extracted from FILE1 with the data extracted from all other files.\n")
 	fd.write("\n")

 	fd.write("Plugin Options:\n")

--- a/src/binwalk/hashmatch.py
+++ b/src/binwalk/hashmatch.py
@@ -38,7 +38,7 @@ class HashMatch(object):
 		@symlinks         - Set to True to include symbolic link files.
 		@name             - Set to True to only compare files whose base names match.
 		@max_results      - Stop searching after x number of matches.
-		@display          - Set to True to display results to stdout.
+		@display          - Set to True to display results to stdout, or pass an instance of binwalk.prettyprint.PrettyPrint.
 		@log              - Specify a log file to log results to.
 		@csv              - Set to True to log data in CSV format.
 		@quiet            - Set to True to suppress output to stdout.
@@ -60,8 +60,12 @@ class HashMatch(object):
 		self.max_results = max_results

 		if display:
+			if isinstance(display, PrettyPrint):
+				self.pretty_print = display
+			else:
 				self.pretty_print = PrettyPrint(log=log, csv=csv, format_to_screen=format_to_screen, quiet=quiet)
-			self.pretty_print.header(header="PERCENTAGE\t\t\tFILE NAME")
+
+			self.pretty_print.header(header="PERCENTAGE\t\t\tFILE", csv=True)
 		else:
 			self.pretty_print = None


--- a/src/binwalk/prettyprint.py
+++ b/src/binwalk/prettyprint.py
@@ -228,13 +228,14 @@ class PrettyPrint:
 		self._pprint("Target File:   %s\n" % file_name, nolog=nolog)
 		self._pprint("MD5 Checksum:  %s\n" % md5sum, nolog=nolog)

-	def header(self, file_name=None, header=None, description=DEFAULT_DESCRIPTION_HEADER):
+	def header(self, file_name=None, header=None, description=DEFAULT_DESCRIPTION_HEADER, csv=True):
 		'''
 		Prints the binwalk header, typically used just before starting a scan.

 		@file_name   - If specified, and if self.verbose > 0, then detailed file info will be included in the header.
 		@header      - If specified, this is a custom header to display at the top of the output.
 		@description - The description header text to display (default: "DESCRIPTION")
+		@csv         - Set to True to print the header verbatim to the CSV file.

 		Returns None.
 		'''
@@ -244,16 +245,21 @@ class PrettyPrint:
 			self.file_info(file_name)

 		if self.log_csv:
-			nolog = True
+			if csv:
+				nolog1 = False
+			else:
+				nolog1 = True
+
+			nolog2 = True

 		self._pprint("\n")

 		if not header:
-			self._pprint("DECIMAL   \tHEX       \t%s\n" % description, nolog=nolog)
+			self._pprint("DECIMAL   \tHEX       \t%s\n" % description, nolog=nolog1)
 		else:
-			self._pprint(header + "\n", nolog=nolog)
+			self._pprint(header + "\n", nolog=nolog1)
 		
-		self._pprint("-" * self.HEADER_WIDTH + "\n", nolog=nolog)
+		self._pprint("-" * self.HEADER_WIDTH + "\n", nolog=nolog2)

 	def footer(self, bwalk=None, file_name=None):
 		'''