Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
binwalk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
binwalk
Commits
7eebc482
Commit
7eebc482
authored
Dec 03, 2013
by
devttys0
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added hashmatch.py.
parent
6d643073
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
174 additions
and
0 deletions
+174
-0
hashmatch.py
src/binwalk/hashmatch.py
+174
-0
No files found.
src/binwalk/hashmatch.py
0 → 100644
View file @
7eebc482
import
os
import
re
import
fnmatch
import
ctypes
import
ctypes.util
import
magic
from
binwalk.compat
import
*
from
binwalk.common
import
file_md5
class
HashMatch
(
object
):
# Requires libfuzzy.so
LIBRARY_NAME
=
"fuzzy"
# Max result is 148 (http://ssdeep.sourceforge.net/api/html/fuzzy_8h.html)
FUZZY_MAX_RESULT
=
150
# Files smaller than this won't produce meaningful fuzzy results (from ssdeep.h)
FUZZY_MIN_FILE_SIZE
=
4096
FUZZY_DEFAULT_CUTOFF
=
50
def
__init__
(
self
,
cutoff
=
None
,
fuzzy
=
True
,
same
=
False
,
missing
=
False
,
symlinks
=
False
,
matches
=
{},
types
=
{}):
'''
Class constructor.
@cutoff - The fuzzy cutoff which determines if files are different or not.
@fuzy - Set to True to do fuzzy hashing; set to False to do traditional hashing.
@same - Set to True to show files that are the same, False to show files that are different.
@missing - Set to True to show missing files.
@symlinks - Set to True to include symbolic link files.
@matches - A dictionary of file names to diff.
@types - A dictionary of file types to diff.
Returns None.
'''
self
.
cutoff
=
cutoff
self
.
fuzzy
=
fuzzy
self
.
show_same
=
same
self
.
show_missing
=
missing
self
.
symlinks
=
symlinks
self
.
matches
=
matches
self
.
types
=
types
self
.
magic
=
magic
.
open
(
0
)
self
.
magic
.
load
()
self
.
lib
=
ctypes
.
cdll
.
LoadLibrary
(
ctypes
.
util
.
find_library
(
self
.
LIBRARY_NAME
))
if
self
.
cutoff
is
None
:
self
.
cutoff
=
self
.
FUZZY_DEFAULT_CUTOFF
for
k
in
get_keys
(
self
.
types
):
self
.
types
[
k
]
=
re
.
compile
(
self
.
types
[
k
])
def
files
(
self
,
file1
,
file2
):
'''
Fuzzy diff two files.
@file1 - The first file to diff.
@file2 - The second file to diff.
Returns the match percentage.
Returns None on error.
'''
if
self
.
fuzzy
:
hash1
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
hash2
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
try
:
if
self
.
lib
.
fuzzy_hash_filename
(
str2bytes
(
file1
),
hash1
)
==
0
and
self
.
lib
.
fuzzy_hash_filename
(
str2bytes
(
file2
),
hash2
)
==
0
:
if
hash1
.
raw
==
hash2
.
raw
:
return
100
else
:
return
self
.
lib
.
fuzzy_compare
(
hash1
,
hash2
)
except
Exception
as
e
:
print
"WARNING: Exception while performing fuzzy comparison:"
,
e
else
:
if
file_md5
(
file1
)
==
file_md5
(
file2
):
return
100
return
None
def
is_match
(
self
,
match
):
'''
Returns True if the match value is greater than or equal to the cutoff.
Returns False if the match value is less than the cutoff.
'''
return
(
match
>=
self
.
cutoff
)
def
_get_file_list
(
self
,
directory
):
'''
Generates a directory tree, including/excluding files as specified in self.matches and self.types.
@directory - The root directory to start from.
Returns a set of file paths, excluding the root directory.
'''
file_list
=
[]
# Normalize directory path so that we can exclude it from each individual file path
directory
=
os
.
path
.
abspath
(
directory
)
+
os
.
path
.
sep
for
(
root
,
dirs
,
files
)
in
os
.
walk
(
directory
):
# Don't include the root directory in the file paths
root
=
''
.
join
(
root
.
split
(
directory
,
1
)[
1
:])
# Get a list of files, with or without symlinks as specified during __init__
files
=
[
os
.
path
.
join
(
root
,
f
)
for
f
in
files
if
self
.
symlinks
or
not
os
.
path
.
islink
(
f
)]
# If no filters were specified, return all files
if
not
self
.
types
and
not
self
.
matches
:
file_list
+=
files
else
:
# Filter based on the file type, as reported by libmagic
if
self
.
types
:
for
f
in
files
:
for
(
include
,
type_regex
)
in
iterator
(
self
.
types
):
magic_result
=
self
.
magic
.
file
(
f
)
.
lower
()
match
=
type_regex
.
match
(
magic_result
)
# If this matched an include filter, or didn't match an exclude filter
if
(
match
and
include
)
or
(
not
match
and
not
include
):
file_list
.
append
(
f
)
# Filter based on file name
if
self
.
matches
:
for
(
include
,
file_filter
)
in
iterator
(
self
.
matches
):
matching_files
=
fnmatch
.
filter
(
files
,
file_filter
)
# If this is an include filter, add all matching files to the list
if
include
:
file_list
+=
matching_files
# Else, this add all files except those that matched to the list
else
:
file_list
+=
list
(
set
(
files
)
-
set
(
matching_files
))
return
set
(
file_list
)
def
directories
(
self
,
dir1
,
dir2
):
results
=
[]
dir1_files
=
self
.
_get_file_list
(
dir1
)
dir2_files
=
self
.
_get_file_list
(
dir2
)
for
f
in
dir1_files
:
if
f
in
dir2_files
:
file1
=
os
.
path
.
join
(
dir1
,
f
)
file2
=
os
.
path
.
join
(
dir2
,
f
)
m
=
self
.
files
(
file1
,
file2
)
matches
=
self
.
is_match
(
m
)
if
(
matches
and
self
.
show_same
)
or
(
not
matches
and
not
self
.
show_same
):
results
.
append
((
"
%3
d"
%
m
,
f
))
if
self
.
show_missing
:
results
+=
[(
'---'
,
f
)
for
f
in
(
dir1_files
-
dir2_files
)]
results
+=
[(
'+++'
,
f
)
for
f
in
(
dir2_files
-
dir1_files
)]
return
results
def
find_file
(
self
,
fname
,
directories
):
pass
if
__name__
==
'__main__'
:
import
sys
hmatch
=
HashMatch
(
missing
=
True
)
for
(
match
,
fname
)
in
hmatch
.
directories
(
sys
.
argv
[
1
],
sys
.
argv
[
2
]):
print
match
,
fname
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment