#!/usr/bin/env python import os import re import sys import binwalk.hashmatch as hashmatch from binwalk.compat import * from getopt import GetoptError, gnu_getopt as GetOpt DEFAULT_CUTOFF = 50 def usage(fd): fd.write("\n") fd.write('Diff files or directories using Context Triggered Piecewise Hashing ("fuzzy" hashing).\n') fd.write("Craig Heffner, http://www.devttys0.com\n") fd.write("\n") fd.write("Usage: %s [OPTIONS] [NEEDLE] [HAYSTACK] [HAYSTACK] [HAYSTACK] ...\n" % os.path.basename(sys.argv[0])) fd.write("\n") fd.write("NEEDLE may be a file or a directory.\n") fd.write("HAYSTACKs must be either all files or all directories.\n") fd.write("\n") fd.write("Diffing Options:\n") fd.write("\t-d, --diff Only show files that are different\n") fd.write("\t-s, --same Only show files that are the same\n") fd.write("\t-S, --strings Diff strings inside files instead of the entire file\n") fd.write("\t-c, --cutoff=<n> Set the cutoff percentage (default: %d)\n" % DEFAULT_CUTOFF) fd.write("\t-m, --max=<n> Quit after n number of matches\n") fd.write("\n") fd.write("Filtering Options:\n") fd.write("\t-n, --name Only diff files whose base names are the same\n") fd.write("\t-l, --symlinks Don't ignore symlinks\n") fd.write("\t-y, --include-file=<match> Only diff against a specific file name (e.g., *.py, *.bin, etc)\n") fd.write("\t-x, --exclude-file=<match> Do not diff against a specific file name (e.g., *.py, *.bin, etc)\n") fd.write("\t-Y, --include-type=<type> Only diff against a certian file type (e.g., elf, jpeg, etc)\n") fd.write("\t-X, --exclude-type=<type> Do not diff against a certian file type (e.g., elf, jpeg, etc)\n") fd.write("\n") fd.write("General Options:\n") fd.write("\t-f, --file=<file> Log results to file\n") fd.write("\t-a, --abspath Display the absolute path of each file\n") fd.write("\t-c, --csv Log results to file in csv format\n") fd.write("\t-q, --quiet Suppress output to stdout\n") fd.write("\t-t, --term Format output to fit the terminal window\n") fd.write("\t-h, --help Show help\n") fd.write("\n") if fd == sys.stdout: sys.exit(0) else: sys.exit(1) def main(): results = [] options = [] arguments = [] file_list = [] include_files = [] exclude_files = [] include_types = [] exclude_types = [] types = {} matches = {} abspath = False log_file = None log_csv = False fit_to_width = False quiet = False strings = False symlinks = False name = False same = None max_results = None cutoff = 0 short_options = "acdf:hlm:no:qSstx:X:y:Y:" long_options = [ "abspath", "help", "cutoff=", "strings", "same", "diff", "max=", "symlinks", "name", "file=", "csv", "term", "quiet", ] if len(sys.argv) < 3: usage(sys.stderr) try: opts, args = GetOpt(sys.argv[1:], short_options, long_options) except GetoptError as e: sys.stderr.write("%s\n" % str(e)) usage(sys.stderr) for opt, arg in opts: if opt in ("-h", "--help"): usage(sys.stdout) elif opt in ("-S", "--strings"): strings = True elif opt in ("-l", "--symlinks"): symlinks = True elif opt in ("-n", "--name"): name = True elif opt in ("-s", "--same"): same = True elif opt in ("-d", "--diff"): same = False elif opt in ("-t", "--term"): fit_to_width = True elif opt in ("-c", "--csv"): log_csv = True elif opt in ("-q", "--quiet"): quiet = True elif opt in ("-f", "--file"): log_file = arg elif opt in ("-m", "--max"): max_results = int(arg, 0) elif opt in ("-o", "--cutoff"): cutoff = int(arg, 0) elif opt in ("-y", "--include-file"): include_files.append(arg) elif opt in ("-x", "--exclude-file"): exclude_files.append(arg) elif opt in ("-Y", "--include-type"): include_types.append(arg.lower()) elif opt in ("-X", "--exclude-types"): exclude_types.append(arg.lower()) elif opt in ("-a", "--abspath"): abspath = True # Keep track of the options and arguments. # This is used later to determine which argv entries are file names. options.append(opt) options.append("%s%s" % (opt, arg)) options.append("%s=%s" % (opt, arg)) arguments.append(arg) # Treat any command line options not processed by getopt as target file paths. for opt in sys.argv[1:]: if opt not in arguments and opt not in options and not opt.startswith('-'): file_list.append(opt) if same is None: same = True elif cutoff == 0: cutoff = DEFAULT_CUTOFF if include_files: matches[True] = include_files if exclude_files: matches[False] = exclude_files if include_types: types[True] = include_types if exclude_types: types[False] = exclude_types if len(file_list) >= 2: rehash = hashmatch.HashMatch(cutoff=cutoff, strings=strings, same=same, symlinks=symlinks, name=name, max_results=max_results, display=True, quiet=quiet, log=log_file, csv=log_csv, format_to_screen=fit_to_width, abspath=abspath, types=types, matches=matches) if os.path.isfile(file_list[0]): if os.path.isfile(file_list[1]): rehash.files(file_list[0], file_list[1:]) else: rehash.file(file_list[0], file_list[1:]) else: rehash.directories(file_list[0], file_list[1:]) if __name__ == "__main__": main()