1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python
import os
import re
import sys
import binwalk.hashmatch as hashmatch
from binwalk.compat import *
from getopt import GetoptError, gnu_getopt as GetOpt
def usage(fd):
fd.write("\n")
fd.write('Diff files or directories using Context Triggered Piecewise Hashing ("fuzzy" hashing).\n')
fd.write("Craig Heffner, http://www.devttys0.com\n")
fd.write("\n")
fd.write("Usage: %s [OPTIONS] [NEEDLE] [HAYSTACK] [HAYSTACK] [HAYSTACK] ...\n" % os.path.basename(sys.argv[0]))
fd.write("\n")
fd.write("NEEDLE may be a file or a directory.\n")
fd.write("HAYSTACKs must be either all files or all directories.\n")
fd.write("\n")
fd.write("Diffing Options:\n")
fd.write("\t-d, --diff Show files that are different (default)\n")
fd.write("\t-s, --same Show files that are the same\n")
fd.write("\t-S, --strings Diff strings inside files instead of the entire file\n")
fd.write("\t-c, --cutoff=<n> Set the cutoff percentage (default: 50%)\n")
fd.write("\t-m, --max=<n> Quit after n number of matches\n")
fd.write("\n")
fd.write("Filtering Options:\n")
fd.write("\t-n, --name Only diff files whose base names are the same\n")
fd.write("\t-l, --symlinks Don't ignore symlinks\n")
fd.write("\t-y, --include-file=<match> Only diff against a specific file name (e.g., *.py, *.bin, etc)\n")
fd.write("\t-x, --exclude-file=<match> Do not diff against a specific file name (e.g., *.py, *.bin, etc)\n")
fd.write("\t-Y, --include-type=<type> Only diff against a certian file type (e.g., elf, jpeg, etc)\n")
fd.write("\t-X, --exclude-type=<type> Do not diff against a certian file type (e.g., elf, jpeg, etc)\n")
fd.write("\n")
fd.write("General Options:\n")
fd.write("\t-f, --file=<file> Log results to file\n")
fd.write("\t-c, --csv Log results to file in csv format\n")
fd.write("\t-q, --quiet Suppress output to stdout\n")
fd.write("\t-t, --term Format output to fit the terminal window\n")
fd.write("\t-h, --help Show help\n")
fd.write("\n")
if fd == sys.stdout:
sys.exit(0)
else:
sys.exit(1)
def main():
results = []
options = []
arguments = []
file_list = []
include_files = []
exclude_files = []
include_types = []
exclude_types = []
types = {}
matches = {}
log_file = None
log_csv = False
fit_to_width = False
quiet = False
strings = False
symlinks = False
name = False
same = False
cutoff = None
max_results = None
short_options = "cdf:hlm:no:qSstx:X:y:Y:"
long_options = [
"help",
"cutoff=",
"strings",
"same",
"diff",
"max=",
"symlinks",
"name",
"file=",
"csv",
"term",
"quiet",
]
try:
opts, args = GetOpt(sys.argv[1:], short_options, long_options)
except GetoptError as e:
sys.stderr.write("%s\n" % str(e))
usage(sys.stderr)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage(sys.stdout)
elif opt in ("-S", "--strings"):
strings = True
elif opt in ("-l", "--symlinks"):
symlinks = True
elif opt in ("-n", "--name"):
name = True
elif opt in ("-s", "--same"):
same = True
elif opt in ("-d", "--diff"):
same = False
elif opt in ("-t", "--term"):
fit_to_width = True
elif opt in ("-c", "--csv"):
log_csv = True
elif opt in ("-q", "--quiet"):
quiet = True
elif opt in ("-f", "--file"):
log_file = arg
elif opt in ("-m", "--max"):
max_results = int(arg, 0)
elif opt in ("-o", "--cutoff"):
cutoff = int(arg, 0)
elif opt in ("-y", "--include-file"):
include_files.append(arg)
elif opt in ("-x", "--exclude-file"):
exclude_files.append(arg)
elif opt in ("-Y", "--include-type"):
include_types.append(arg.lower())
elif opt in ("-X", "--exclude-types"):
exclude_types.append(arg.lower())
# Keep track of the options and arguments.
# This is used later to determine which argv entries are file names.
options.append(opt)
options.append("%s%s" % (opt, arg))
options.append("%s=%s" % (opt, arg))
arguments.append(arg)
# Treat any command line options not processed by getopt as target file paths.
for opt in sys.argv[1:]:
if opt not in arguments and opt not in options and not opt.startswith('-'):
file_list.append(opt)
if include_files:
matches[True] = include_files
if exclude_files:
matches[False] = exclude_files
if include_types:
types[True] = include_types
if exclude_types:
types[False] = exclude_types
if len(file_list) >= 2:
rehash = hashmatch.HashMatch(cutoff=cutoff,
strings=strings,
same=same,
symlinks=symlinks,
name=name,
max_results=max_results,
display=True,
quiet=quiet,
log=log_file,
csv=log_csv,
format_to_screen=fit_to_width,
types=types,
matches=matches)
if os.path.isfile(file_list[0]):
if os.path.isfile(file_list[1]):
rehash.files(file_list[0], file_list[1:])
else:
rehash.file(file_list[0], file_list[1:])
else:
rehash.directories(file_list[0], file_list[1:])
if __name__ == "__main__":
main()