Commit 6d643073 by devttys0

Basic recusrive fuzzy hashing working. Renamed fuzzyhash.py to hashmatch.py.

parent bf87972e
......@@ -3,9 +3,26 @@ import io
import os
import re
import ast
import hashlib
import operator as op
from binwalk.compat import *
def file_md5(file_name):
'''
Generate an MD5 hash of the specified file.
@file_name - The file to hash.
Returns an MD5 hex digest string.
'''
md5 = hashlib.md5()
with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(128*md5.block_size), b''):
md5.update(chunk)
return md5.hexdigest()
def file_size(filename):
'''
Obtains the size of a given file.
......
import ctypes
import ctypes.util
from binwalk.compat import *
class FuzzyHash(object):
# Requires libfuzzy.so
LIBRARY_NAME = "fuzzy"
# Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html)
FUZZY_MAX_RESULT = 150
def __init__(self):
self.lib = ctypes.cdll.LoadLibrary(ctypes.util.find_library(self.LIBRARY_NAME))
def compare_files(self, file1, file2):
hash1 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
hash2 = ctypes.create_string_buffer(self.FUZZY_MAX_RESULT)
if self.lib.fuzzy_hash_filename(str2bytes(file1), hash1) == 0 and self.lib.fuzzy_hash_filename(str2bytes(file2), hash2) == 0:
return self.lib.fuzzy_compare(hash1, hash2)
return None
if __name__ == '__main__':
import sys
print (FuzzyHash().compare_files(sys.argv[1], sys.argv[2]))
import io
import sys
import hashlib
import csv as pycsv
from datetime import datetime
from binwalk.compat import *
from binwalk.common import file_md5
class PrettyPrint:
'''
......@@ -126,18 +126,6 @@ class PrettyPrint:
if not nolog:
self._log(data)
def _file_md5(self, file_name):
'''
Generate an MD5 hash of the specified file.
'''
md5 = hashlib.md5()
with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(128*md5.block_size), b''):
md5.update(chunk)
return md5.hexdigest()
def _append_to_data_parts(self, data, start, end):
'''
Intelligently appends data to self.string_parts.
......@@ -225,7 +213,7 @@ class PrettyPrint:
Returns None.
'''
nolog = False
md5sum = self._file_md5(file_name)
md5sum = file_md5(file_name)
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if self.csv:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment