Commit 48a1a48b by devttys0

Updated hashmatch.py to perform strings-based hashing; added snappy stream compression signature.

parent a30e51e1
import os import os
import re import re
import magic
import fnmatch import fnmatch
import ctypes import ctypes
import ctypes.util import ctypes.util
import binwalk.smartstrings
import magic
from binwalk.compat import * from binwalk.compat import *
from binwalk.common import file_md5 from binwalk.common import file_md5
...@@ -20,7 +20,7 @@ class HashMatch(object): ...@@ -20,7 +20,7 @@ class HashMatch(object):
FUZZY_DEFAULT_CUTOFF = 50 FUZZY_DEFAULT_CUTOFF = 50
def __init__(self, cutoff=None, fuzzy=True, strings=False, same=False, missing=False, symlinks=False, matches={}, types={}): def __init__(self, cutoff=None, fuzzy=True, strings=False, same=False, missing=False, symlinks=False, name=False, matches={}, types={}):
''' '''
Class constructor. Class constructor.
...@@ -30,6 +30,7 @@ class HashMatch(object): ...@@ -30,6 +30,7 @@ class HashMatch(object):
@same - Set to True to show files that are the same, False to show files that are different. @same - Set to True to show files that are the same, False to show files that are different.
@missing - Set to True to show missing files. @missing - Set to True to show missing files.
@symlinks - Set to True to include symbolic link files. @symlinks - Set to True to include symbolic link files.
@name - Set to True to only compare files whose base names match.
@matches - A dictionary of file names to diff. @matches - A dictionary of file names to diff.
@types - A dictionary of file types to diff. @types - A dictionary of file types to diff.
...@@ -42,6 +43,7 @@ class HashMatch(object): ...@@ -42,6 +43,7 @@ class HashMatch(object):
self.show_missing = missing self.show_missing = missing
self.symlinks = symlinks self.symlinks = symlinks
self.matches = matches self.matches = matches
self.name = name
self.types = types self.types = types
self.magic = magic.open(0) self.magic = magic.open(0)
...@@ -55,6 +57,9 @@ class HashMatch(object): ...@@ -55,6 +57,9 @@ class HashMatch(object):
for k in get_keys(self.types): for k in get_keys(self.types):
self.types[k] = re.compile(self.types[k]) self.types[k] = re.compile(self.types[k])
def _get_strings(self, fname):
return ''.join([string for (offset, string) in binwalk.smartstrings.FileStrings(fname, n=10).strings()])
def files(self, file1, file2): def files(self, file1, file2):
''' '''
Fuzzy diff two files. Fuzzy diff two files.
...@@ -65,21 +70,37 @@ class HashMatch(object): ...@@ -65,21 +70,37 @@ class HashMatch(object):
Returns the match percentage. Returns the match percentage.
Returns None on error. Returns None on error.
''' '''
status = 0
if not self.name or os.path.basename(file1) == os.path.basename(file2):
if self.fuzzy: if self.fuzzy:
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT) hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
# TODO: Implement strings hashing
try: try:
if self.lib.fuzzy_hash_filename(str2bytes(file1), hash1) == 0 and self.lib.fuzzy_hash_filename(str2bytes(file2), hash2) == 0: if self.strings:
file1_strings = self._get_strings(file1)
file2_strings = self._get_strings(file2)
if file1_strings == file2_strings:
return 100
else:
status |= self.lib.fuzzy_hash_buf(str2bytes(file1_strings), len(file1_strings), hash1)
status |= self.lib.fuzzy_hash_buf(str2bytes(file2_strings), len(file2_strings), hash2)
else:
status |= self.lib.fuzzy_hash_filename(str2bytes(file1), hash1)
status |= self.lib.fuzzy_hash_filename(str2bytes(file2), hash2)
if status == 0:
if hash1.raw == hash2.raw: if hash1.raw == hash2.raw:
return 100 return 100
else: else:
return self.lib.fuzzy_compare(hash1, hash2) return self.lib.fuzzy_compare(hash1, hash2)
except Exception as e: except Exception as e:
print "WARNING: Exception while performing fuzzy comparison:", e print "WARNING: Exception while performing fuzzy comparison:", e
else:
elif not self.strings:
if file_md5(file1) == file_md5(file2): if file_md5(file1) == file_md5(file2):
return 100 return 100
...@@ -181,6 +202,7 @@ class HashMatch(object): ...@@ -181,6 +202,7 @@ class HashMatch(object):
file2 = os.path.join(dir2, f) file2 = os.path.join(dir2, f)
m = self.files(file1, file2) m = self.files(file1, file2)
if m is not None:
matches = self.is_match(m) matches = self.is_match(m)
if (matches and self.show_same) or (not matches and not self.show_same): if (matches and self.show_same) or (not matches and not self.show_same):
...@@ -196,8 +218,9 @@ class HashMatch(object): ...@@ -196,8 +218,9 @@ class HashMatch(object):
if __name__ == '__main__': if __name__ == '__main__':
import sys import sys
hmatch = HashMatch(missing=True) hmatch = HashMatch(strings=True, name=True)
print hmatch.file(sys.argv[1], sys.argv[2:])
#for (match, fname) in hmatch.directories(sys.argv[1], sys.argv[2]): #for (match, fname) in hmatch.directories(sys.argv[1], sys.argv[2]):
for (match, fname) in hmatch.find_file(sys.argv[1], sys.argv[2:]): #for (match, fname) in hmatch.find_file(sys.argv[1], sys.argv[2:]):
print match, fname # print match, fname
...@@ -15,6 +15,7 @@ class FileStrings(object): ...@@ -15,6 +15,7 @@ class FileStrings(object):
MAX_STRING_LENGTH = 20 MAX_STRING_LENGTH = 20
MAX_SPECIAL_CHARS_RATIO = .4 MAX_SPECIAL_CHARS_RATIO = .4
MAX_ENTROPY = 0.9 MAX_ENTROPY = 0.9
DEFAULT_ENTROPY_BLOCK = 1024
LETTERS = set(string.letters) LETTERS = set(string.letters)
NUMBERS = set(string.digits) NUMBERS = set(string.digits)
...@@ -31,7 +32,7 @@ class FileStrings(object): ...@@ -31,7 +32,7 @@ class FileStrings(object):
'(' : ')', '(' : ')',
} }
def __init__(self, file_name, binwalk, length=0, offset=0, n=MIN_STRING_LENGTH, block=0, algorithm=None, plugins=None): def __init__(self, file_name, binwalk=None, length=0, offset=0, n=MIN_STRING_LENGTH, block=DEFAULT_ENTROPY_BLOCK, algorithm='gzip', plugins=None):
''' '''
Class constructor. Preferred to be invoked from the Strings class instead of directly. Class constructor. Preferred to be invoked from the Strings class instead of directly.
...@@ -65,7 +66,7 @@ class FileStrings(object): ...@@ -65,7 +66,7 @@ class FileStrings(object):
# Perform an entropy analysis over the entire file (anything less may generate poor entropy data). # Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
# Give fake file results list to prevent FileEntropy from doing too much analysis. # Give fake file results list to prevent FileEntropy from doing too much analysis.
with entropy.FileEntropy(file_name, block=block, file_results=['foo']) as e: with entropy.FileEntropy(file_name, block=block, file_results=['foo']) as e:
(self.x, self.y, self.average_entropy) = e.analyze() (self.x, self.y, self.average_entropy) = e.analyze(algorithm=algorithm)
for i in range(0, len(self.x)): for i in range(0, len(self.x)):
self.entropy[self.x[i]] = self.y[i] self.entropy[self.x[i]] = self.y[i]
# Make sure our block size matches the entropy analysis's block size # Make sure our block size matches the entropy analysis's block size
...@@ -82,6 +83,7 @@ class FileStrings(object): ...@@ -82,6 +83,7 @@ class FileStrings(object):
self.start = self.fd.offset self.start = self.fd.offset
# Set the total_scanned and scan_length values for plugins and status display messages # Set the total_scanned and scan_length values for plugins and status display messages
if self.binwalk:
self.binwalk.total_scanned = 0 self.binwalk.total_scanned = 0
self.binwalk.scan_length = self.fd.length self.binwalk.scan_length = self.fd.length
...@@ -128,6 +130,7 @@ class FileStrings(object): ...@@ -128,6 +130,7 @@ class FileStrings(object):
(data, dlen) = self.fd.read_block() (data, dlen) = self.fd.read_block()
if self.binwalk:
self.binwalk.total_scanned = self.total_read self.binwalk.total_scanned = self.total_read
self.total_read += dlen self.total_read += dlen
...@@ -309,6 +312,7 @@ class FileStrings(object): ...@@ -309,6 +312,7 @@ class FileStrings(object):
string = results['description'] string = results['description']
if not ((plug_ret | plug_pre ) & plugins.PLUGIN_NO_DISPLAY): if not ((plug_ret | plug_pre ) & plugins.PLUGIN_NO_DISPLAY):
if self.binwalk:
self.binwalk.display.results(offset, [results]) self.binwalk.display.results(offset, [results])
self.valid_strings.append((offset, string)) self.valid_strings.append((offset, string))
return plug_ret return plug_ret
...@@ -354,7 +358,7 @@ class Strings(object): ...@@ -354,7 +358,7 @@ class Strings(object):
Class for performing a strings analysis against a list of files. Class for performing a strings analysis against a list of files.
''' '''
def __init__(self, file_names, binwalk, length=0, offset=0, n=0, block=0, algorithm=None, load_plugins=True, whitelist=[], blacklist=[]): def __init__(self, file_names, binwalk=None, length=0, offset=0, n=0, block=0, algorithm=None, load_plugins=True, whitelist=[], blacklist=[]):
''' '''
Class constructor. Class constructor.
...@@ -378,13 +382,14 @@ class Strings(object): ...@@ -378,13 +382,14 @@ class Strings(object):
self.n = n self.n = n
self.block = block self.block = block
self.algorithm = algorithm self.algorithm = algorithm
self.binwalk.scan_type = self.binwalk.STRINGS
self.file_strings = None self.file_strings = None
self.plugins = None
if self.binwalk:
self.binwalk.scan_type = self.binwalk.STRINGS
if load_plugins: if load_plugins:
self.plugins = plugins.Plugins(self.binwalk, whitelist=whitelist, blacklist=blacklist) self.plugins = plugins.Plugins(self.binwalk, whitelist=whitelist, blacklist=blacklist)
else:
self.plugins = None
def __enter__(self): def __enter__(self):
return self return self
...@@ -429,7 +434,9 @@ class Strings(object): ...@@ -429,7 +434,9 @@ class Strings(object):
self.plugins._load_plugins() self.plugins._load_plugins()
for file_name in self.file_names: for file_name in self.file_names:
if self.binwalk:
self.binwalk.display.header(file_name=file_name, description='Strings') self.binwalk.display.header(file_name=file_name, description='Strings')
results[file_name] = [] results[file_name] = []
self.file_strings = FileStrings(file_name, self.binwalk, self.length, self.offset, self.n, block=self.block, algorithm=self.algorithm, plugins=self.plugins) self.file_strings = FileStrings(file_name, self.binwalk, self.length, self.offset, self.n, block=self.block, algorithm=self.algorithm, plugins=self.plugins)
......
...@@ -165,3 +165,7 @@ ...@@ -165,3 +165,7 @@
>6 byte&0x10 0x10 multi-block stream >6 byte&0x10 0x10 multi-block stream
# See lzma file for LZMA signatures # See lzma file for LZMA signatures
0 string \xff\x06\x00\x00\x73\x4e\x61\x50\x70\x59 Snappy compression, stream identifier
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment