#!/usr/bin/env python

import os
import re
import sys
import binwalk.hashmatch as hashmatch
from binwalk.compat import *
from getopt import GetoptError, gnu_getopt as GetOpt

DEFAULT_CUTOFF = 50

def usage(fd):
	fd.write("\n")
	fd.write('Diff files or directories using Context Triggered Piecewise Hashing ("fuzzy" hashing).\n')
	fd.write("Craig Heffner, http://www.devttys0.com\n")

	fd.write("\n")
	fd.write("Usage: %s [OPTIONS] [NEEDLE] [HAYSTACK] [HAYSTACK] [HAYSTACK] ...\n" % os.path.basename(sys.argv[0]))

	fd.write("\n")
	fd.write("NEEDLE may be a file or a directory.\n")
	fd.write("HAYSTACKs must be either all files or all directories.\n")
	
	fd.write("\n")
	fd.write("Diffing Options:\n")
	fd.write("\t-d, --diff                    Only show files that are different\n")
	fd.write("\t-s, --same                    Only show files that are the same\n")
	fd.write("\t-S, --strings                 Diff strings inside files instead of the entire file\n")
	fd.write("\t-c, --cutoff=<n>              Set the cutoff percentage (default: %d)\n" % DEFAULT_CUTOFF)
	fd.write("\t-m, --max=<n>                 Quit after n number of matches\n")
	
	fd.write("\n")
	fd.write("Filtering Options:\n")
	fd.write("\t-n, --name                    Only diff files whose base names are the same\n")
	fd.write("\t-l, --symlinks                Don't ignore symlinks\n")
	fd.write("\t-y, --include-file=<match>    Only diff against a specific file name (e.g., *.py, *.bin, etc)\n")
	fd.write("\t-x, --exclude-file=<match>    Do not diff against a specific file name (e.g., *.py, *.bin, etc)\n")
	fd.write("\t-Y, --include-type=<type>     Only diff against a certian file type (e.g., elf, jpeg, etc)\n")
	fd.write("\t-X, --exclude-type=<type>     Do not diff against a certian file type (e.g., elf, jpeg, etc)\n")
	
	fd.write("\n")
	fd.write("General Options:\n")
	fd.write("\t-f, --file=<file>             Log results to file\n")
	fd.write("\t-a, --abspath                 Display the absolute path of each file\n")
	fd.write("\t-c, --csv                     Log results to file in csv format\n")
	fd.write("\t-q, --quiet                   Suppress output to stdout\n")
	fd.write("\t-t, --term                    Format output to fit the terminal window\n")
	fd.write("\t-h, --help                    Show help\n")

	fd.write("\n")

	if fd == sys.stdout:
		sys.exit(0)
	else:
		sys.exit(1)

def main():

	results = []
	options = []
	arguments = []
	file_list = []
	include_files = []
	exclude_files = []
	include_types = []
	exclude_types = []

	types = {}
	matches = {}

	abspath = False
	log_file = None
	log_csv = False
	fit_to_width = False
	quiet = False	
	strings = False
	symlinks = False
	name = False
	same = None
	max_results = None
	cutoff = 0

	short_options = "acdf:hlm:no:qSstx:X:y:Y:"
	long_options = [
			"abspath",
			"help",
			"cutoff=",
			"strings",
			"same",
			"diff",
			"max=",
			"symlinks",
			"name",
			"file=",
			"csv",
			"term",
			"quiet",
	]

	if len(sys.argv) < 3:
		usage(sys.stderr)

	try:
		opts, args = GetOpt(sys.argv[1:], short_options, long_options)
	except GetoptError as e:
		sys.stderr.write("%s\n" % str(e))
		usage(sys.stderr)

	for opt, arg in opts:
		if opt in ("-h", "--help"):
			usage(sys.stdout)
		elif opt in ("-S", "--strings"):
			strings = True
		elif opt in ("-l", "--symlinks"):
			symlinks = True
		elif opt in ("-n", "--name"):
			name = True
		elif opt in ("-s", "--same"):
			same = True
		elif opt in ("-d", "--diff"):
			same = False
		elif opt in ("-t", "--term"):
			fit_to_width = True
		elif opt in ("-c", "--csv"):
			log_csv = True
		elif opt in ("-q", "--quiet"):
			quiet = True
		elif opt in ("-f", "--file"):
			log_file = arg
		elif opt in ("-m", "--max"):
			max_results = int(arg, 0)
		elif opt in ("-o", "--cutoff"):
			cutoff = int(arg, 0)
		elif opt in ("-y", "--include-file"):
			include_files.append(arg)
		elif opt in ("-x", "--exclude-file"):
			exclude_files.append(arg)
		elif opt in ("-Y", "--include-type"):
			include_types.append(arg.lower())
		elif opt in ("-X", "--exclude-types"):
			exclude_types.append(arg.lower())
		elif opt in ("-a", "--abspath"):
			abspath = True

		# Keep track of the options and arguments.
		# This is used later to determine which argv entries are file names.
		options.append(opt)
		options.append("%s%s" % (opt, arg))
		options.append("%s=%s" % (opt, arg))
		arguments.append(arg)

	# Treat any command line options not processed by getopt as target file paths.
	for opt in sys.argv[1:]:
		if opt not in arguments and opt not in options and not opt.startswith('-'):
			file_list.append(opt)

	if same is None:
		same = True
	elif cutoff == 0:
		cutoff = DEFAULT_CUTOFF

	if include_files:
		matches[True] = include_files
	if exclude_files:
		matches[False] = exclude_files
	
	if include_types:
		types[True] = include_types
	if exclude_types:
		types[False] = exclude_types

	if len(file_list) >= 2:
		rehash = hashmatch.HashMatch(cutoff=cutoff,
						strings=strings,
						same=same,
						symlinks=symlinks, 
						name=name, 
						max_results=max_results,
						display=True,
						quiet=quiet,
						log=log_file,
						csv=log_csv,
						format_to_screen=fit_to_width,
						abspath=abspath,
						types=types,
						matches=matches)

		if os.path.isfile(file_list[0]):
			if os.path.isfile(file_list[1]):
				rehash.files(file_list[0], file_list[1:])
			else:
				rehash.file(file_list[0], file_list[1:])
		else:
			rehash.directories(file_list[0], file_list[1:])


if __name__ == "__main__":
	main()