Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
binwalk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
binwalk
Commits
3992d1cd
Commit
3992d1cd
authored
Dec 04, 2013
by
devttys0
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial commit of rehash; some bug fixes / feature additions left to do in hashmatch.py
parent
48a1a48b
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
191 additions
and
32 deletions
+191
-32
rehash
src/bin/rehash
+121
-0
hashmatch.py
src/binwalk/hashmatch.py
+58
-29
binwalk
src/binwalk/magic/binwalk
+0
-0
smartstrings.py
src/binwalk/smartstrings.py
+12
-3
No files found.
src/bin/rehash
0 → 100755
View file @
3992d1cd
#!/usr/bin/env python
import
os
import
re
import
sys
import
magic
import
binwalk.hashmatch
as
hashmatch
from
binwalk.compat
import
*
from
getopt
import
GetoptError
,
gnu_getopt
as
GetOpt
def
usage
(
fd
):
fd
.
write
(
"Usage:
%
s [OPTIONS] [FILE | DIR] [FILE | DIR] ...
\n
"
%
sys
.
argv
[
0
])
def
main
():
results
=
[]
options
=
[]
arguments
=
[]
file_list
=
[]
types
=
{}
strings
=
False
symlinks
=
False
all_types
=
False
name
=
False
same
=
False
missing
=
False
cutoff
=
None
max_results
=
None
verbose
=
False
short_options
=
"c:hlmnSsvx:"
long_options
=
[
"help"
,
"cutoff="
,
"strings"
,
"show-same"
,
"show-missing"
,
"max="
,
"symlinks"
,
"name"
,
"file-type"
,
"file-name"
,
"verbose"
,
]
try
:
opts
,
args
=
GetOpt
(
sys
.
argv
[
1
:],
short_options
,
long_options
)
except
GetoptError
as
e
:
sys
.
stderr
.
write
(
"
%
s
\n
"
%
str
(
e
))
usage
(
sys
.
stderr
)
for
opt
,
arg
in
opts
:
if
opt
in
(
"-h"
,
"--help"
):
usage
(
sys
.
stdout
)
elif
opt
in
(
"-S"
,
"--strings"
):
strings
=
True
elif
opt
in
(
"-l"
,
"--symlinks"
):
symlinks
=
True
elif
opt
in
(
"-n"
,
"--name"
):
name
=
True
elif
opt
in
(
"-s"
,
"--show-same"
):
same
=
True
elif
opt
in
(
"-m"
,
"--show-missing"
):
missing
=
True
elif
opt
in
(
"-x"
,
"--max"
):
max_results
=
int
(
arg
,
0
)
elif
opt
in
(
"-c"
,
"--cutoff"
):
cutoff
=
int
(
arg
,
0
)
elif
opt
in
(
"-v"
,
"--verbose"
):
verbose
=
True
# Keep track of the options and arguments.
# This is used later to determine which argv entries are file names.
options
.
append
(
opt
)
options
.
append
(
"
%
s
%
s"
%
(
opt
,
arg
))
options
.
append
(
"
%
s=
%
s"
%
(
opt
,
arg
))
arguments
.
append
(
arg
)
# Treat any command line options not processed by getopt as target file paths.
for
opt
in
sys
.
argv
[
1
:]:
if
opt
not
in
arguments
and
opt
not
in
options
and
not
opt
.
startswith
(
'-'
):
file_list
.
append
(
opt
)
if
len
(
file_list
)
>=
2
:
rehash
=
hashmatch
.
HashMatch
(
cutoff
=
cutoff
,
strings
=
strings
,
symlinks
=
symlinks
,
name
=
name
,
same
=
same
,
missing
=
missing
,
max_results
=
max_results
,
verbose
=
verbose
)
if
os
.
path
.
isfile
(
file_list
[
0
]):
if
not
all_types
and
len
(
types
)
==
0
:
m
=
magic
.
open
(
0
)
m
.
load
()
file_type
=
m
.
file
(
file_list
[
0
])
if
file_type
:
types
[
True
]
=
re
.
escape
(
file_type
.
lower
())
if
os
.
path
.
isfile
(
file_list
[
1
]):
results
=
rehash
.
files
(
file_list
[
0
],
file_list
[
1
])
else
:
results
=
rehash
.
file
(
file_list
[
0
],
file_list
[
1
:])
else
:
for
f
in
file_list
:
if
not
os
.
path
.
isdir
(
f
):
print
(
"Invalid usage"
)
usage
(
sys
.
stderr
)
results
=
rehash
.
directories
(
file_list
[
0
],
file_list
[
1
])
for
(
match
,
fname
)
in
results
:
print
(
"
%
s
%
s"
%
(
match
,
fname
))
if
__name__
==
"__main__"
:
main
()
src/binwalk/hashmatch.py
View file @
3992d1cd
...
@@ -20,24 +20,24 @@ class HashMatch(object):
...
@@ -20,24 +20,24 @@ class HashMatch(object):
FUZZY_DEFAULT_CUTOFF
=
50
FUZZY_DEFAULT_CUTOFF
=
50
def
__init__
(
self
,
cutoff
=
None
,
fuzzy
=
True
,
strings
=
False
,
same
=
False
,
missing
=
False
,
symlinks
=
False
,
name
=
False
,
matches
=
{},
types
=
{}
):
def
__init__
(
self
,
cutoff
=
None
,
strings
=
False
,
same
=
False
,
missing
=
False
,
symlinks
=
False
,
name
=
False
,
max_results
=
None
,
matches
=
{},
types
=
{},
verbose
=
False
):
'''
'''
Class constructor.
Class constructor.
@cutoff - The fuzzy cutoff which determines if files are different or not.
@cutoff - The fuzzy cutoff which determines if files are different or not.
@fuzzy - Set to True to do fuzzy hashing; set to False to do traditional hashing.
@strings - Only hash strings inside of the file, not the entire file itself.
@strings - Only hash strings inside of the file, not the entire file itself.
@same - Set to True to show files that are the same, False to show files that are different.
@same - Set to True to show files that are the same, False to show files that are different.
@missing - Set to True to show missing files.
@missing - Set to True to show missing files.
@symlinks - Set to True to include symbolic link files.
@symlinks - Set to True to include symbolic link files.
@name - Set to True to only compare files whose base names match.
@name - Set to True to only compare files whose base names match.
@max_results - Stop searching after x number of matches.
@matches - A dictionary of file names to diff.
@matches - A dictionary of file names to diff.
@types - A dictionary of file types to diff.
@types - A dictionary of file types to diff.
@verbose - Enable verbose mode.
Returns None.
Returns None.
'''
'''
self
.
cutoff
=
cutoff
self
.
cutoff
=
cutoff
self
.
fuzzy
=
fuzzy
self
.
strings
=
strings
self
.
strings
=
strings
self
.
show_same
=
same
self
.
show_same
=
same
self
.
show_missing
=
missing
self
.
show_missing
=
missing
...
@@ -45,6 +45,10 @@ class HashMatch(object):
...
@@ -45,6 +45,10 @@ class HashMatch(object):
self
.
matches
=
matches
self
.
matches
=
matches
self
.
name
=
name
self
.
name
=
name
self
.
types
=
types
self
.
types
=
types
self
.
max_results
=
max_results
self
.
verbose
=
verbose
self
.
total
=
0
self
.
magic
=
magic
.
open
(
0
)
self
.
magic
=
magic
.
open
(
0
)
self
.
magic
.
load
()
self
.
magic
.
load
()
...
@@ -58,9 +62,13 @@ class HashMatch(object):
...
@@ -58,9 +62,13 @@ class HashMatch(object):
self
.
types
[
k
]
=
re
.
compile
(
self
.
types
[
k
])
self
.
types
[
k
]
=
re
.
compile
(
self
.
types
[
k
])
def
_get_strings
(
self
,
fname
):
def
_get_strings
(
self
,
fname
):
return
''
.
join
([
string
for
(
offset
,
string
)
in
binwalk
.
smartstrings
.
FileStrings
(
fname
,
n
=
10
)
.
strings
()])
return
''
.
join
([
string
for
(
offset
,
string
)
in
binwalk
.
smartstrings
.
FileStrings
(
fname
,
n
=
10
,
block
=
None
)
.
strings
()])
def
files
(
self
,
file1
,
file2
):
def
_print
(
self
,
message
):
if
self
.
verbose
:
print
(
message
)
def
_compare_files
(
self
,
file1
,
file2
):
'''
'''
Fuzzy diff two files.
Fuzzy diff two files.
...
@@ -73,7 +81,10 @@ class HashMatch(object):
...
@@ -73,7 +81,10 @@ class HashMatch(object):
status
=
0
status
=
0
if
not
self
.
name
or
os
.
path
.
basename
(
file1
)
==
os
.
path
.
basename
(
file2
):
if
not
self
.
name
or
os
.
path
.
basename
(
file1
)
==
os
.
path
.
basename
(
file2
):
if
self
.
fuzzy
:
if
os
.
path
.
exists
(
file1
)
and
os
.
path
.
exists
(
file2
):
self
.
_print
(
"Checking
%
s ->
%
s"
%
(
file1
,
file2
))
hash1
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
hash1
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
hash2
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
hash2
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
...
@@ -98,11 +109,7 @@ class HashMatch(object):
...
@@ -98,11 +109,7 @@ class HashMatch(object):
else
:
else
:
return
self
.
lib
.
fuzzy_compare
(
hash1
,
hash2
)
return
self
.
lib
.
fuzzy_compare
(
hash1
,
hash2
)
except
Exception
as
e
:
except
Exception
as
e
:
print
"WARNING: Exception while performing fuzzy comparison:"
,
e
print
"WARNING: Exception while doing fuzzy hash:"
,
e
elif
not
self
.
strings
:
if
file_md5
(
file1
)
==
file_md5
(
file2
):
return
100
return
None
return
None
...
@@ -111,7 +118,7 @@ class HashMatch(object):
...
@@ -111,7 +118,7 @@ class HashMatch(object):
Returns True if the match value is greater than or equal to the cutoff.
Returns True if the match value is greater than or equal to the cutoff.
Returns False if the match value is less than the cutoff.
Returns False if the match value is less than the cutoff.
'''
'''
return
(
match
>=
self
.
cutoff
)
return
(
match
is
not
None
and
match
>=
self
.
cutoff
)
def
_get_file_list
(
self
,
directory
):
def
_get_file_list
(
self
,
directory
):
'''
'''
...
@@ -141,7 +148,11 @@ class HashMatch(object):
...
@@ -141,7 +148,11 @@ class HashMatch(object):
if
self
.
types
:
if
self
.
types
:
for
f
in
files
:
for
f
in
files
:
for
(
include
,
type_regex
)
in
iterator
(
self
.
types
):
for
(
include
,
type_regex
)
in
iterator
(
self
.
types
):
magic_result
=
self
.
magic
.
file
(
f
)
.
lower
()
try
:
magic_result
=
self
.
magic
.
file
(
os
.
path
.
join
(
directory
,
f
))
.
lower
()
except
Exception
as
e
:
magic_result
=
''
match
=
type_regex
.
match
(
magic_result
)
match
=
type_regex
.
match
(
magic_result
)
# If this matched an include filter, or didn't match an exclude filter
# If this matched an include filter, or didn't match an exclude filter
...
@@ -162,6 +173,12 @@ class HashMatch(object):
...
@@ -162,6 +173,12 @@ class HashMatch(object):
return
set
(
file_list
)
return
set
(
file_list
)
def
files
(
self
,
file1
,
file2
):
m
=
self
.
_compare_files
(
file1
,
file2
)
if
m
is
None
:
m
=
0
return
[(
m
,
file2
)]
def
file
(
self
,
fname
,
directories
):
def
file
(
self
,
fname
,
directories
):
'''
'''
Search for a particular file in multiple directories.
Search for a particular file in multiple directories.
...
@@ -172,45 +189,57 @@ class HashMatch(object):
...
@@ -172,45 +189,57 @@ class HashMatch(object):
Returns a list of tuple results.
Returns a list of tuple results.
'''
'''
matching_files
=
[]
matching_files
=
[]
self
.
total
=
0
for
directory
in
directories
:
for
directory
in
directories
:
for
f
in
self
.
_get_file_list
(
directory
):
for
f
in
self
.
_get_file_list
(
directory
):
f
=
os
.
path
.
join
(
directory
,
f
)
f
=
os
.
path
.
join
(
directory
,
f
)
m
=
self
.
files
(
fname
,
f
)
m
=
self
.
_compare_
files
(
fname
,
f
)
if
self
.
is_match
(
m
):
if
m
is
not
None
and
self
.
is_match
(
m
):
matching_files
.
append
((
m
,
f
))
matching_files
.
append
((
m
,
f
))
self
.
total
+=
1
if
self
.
max_results
and
self
.
total
>=
self
.
max_results
:
return
matching_files
return
matching_files
return
matching_files
def
directories
(
self
,
dir1
,
dir2
):
def
directories
(
self
,
source
,
dir_list
):
'''
'''
Search two directories for matching files.
Search two directories for matching files.
@
dir1 - First directory
.
@
source - Source directory to compare everything to
.
@dir
2 - Second directory
.
@dir
_list - Compare files in source to files in these directories
.
Returns a list of tuple results.
Returns a list of tuple results.
'''
'''
results
=
[]
results
=
[]
self
.
total
=
0
source_files
=
self
.
_get_file_list
(
source
)
dir1_files
=
self
.
_get_file_list
(
dir1
)
for
directory
in
dir_list
:
dir2_files
=
self
.
_get_file_list
(
dir2
)
dir_files
=
self
.
_get_file_list
(
directory
)
for
f
in
dir1
_files
:
for
f
in
source
_files
:
if
f
in
dir2
_files
:
if
f
in
dir
_files
:
file1
=
os
.
path
.
join
(
dir1
,
f
)
file1
=
os
.
path
.
join
(
source
,
f
)
file2
=
os
.
path
.
join
(
dir2
,
f
)
file2
=
os
.
path
.
join
(
directory
,
f
)
m
=
self
.
files
(
file1
,
file2
)
m
=
self
.
_compare_
files
(
file1
,
file2
)
if
m
is
not
None
:
if
m
is
not
None
:
matches
=
self
.
is_match
(
m
)
matches
=
self
.
is_match
(
m
)
if
(
matches
and
self
.
show_same
)
or
(
not
matches
and
not
self
.
show_same
):
if
(
matches
and
self
.
show_same
)
or
(
not
matches
and
not
self
.
show_same
):
results
.
append
((
"
%3
d"
%
m
,
f
))
results
.
append
((
"
%3
d"
%
m
,
f
))
if
self
.
show_missing
:
self
.
total
+=
1
results
+=
[(
'---'
,
f
)
for
f
in
(
dir1_files
-
dir2_files
)]
if
self
.
max_results
and
self
.
total
>=
self
.
max_results
:
results
+=
[(
'+++'
,
f
)
for
f
in
(
dir2_files
-
dir1_files
)]
return
results
if
self
.
show_missing
and
len
(
dir_list
)
==
1
:
results
+=
[(
'---'
,
f
)
for
f
in
(
source_files
-
dir_files
)]
results
+=
[(
'+++'
,
f
)
for
f
in
(
dir_files
-
source_files
)]
return
results
return
results
...
@@ -218,7 +247,7 @@ class HashMatch(object):
...
@@ -218,7 +247,7 @@ class HashMatch(object):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
sys
import
sys
hmatch
=
HashMatch
(
strings
=
True
,
name
=
True
)
hmatch
=
HashMatch
(
strings
=
True
,
name
=
False
,
types
=
{
True
:
"^elf"
}
)
print
hmatch
.
file
(
sys
.
argv
[
1
],
sys
.
argv
[
2
:])
print
hmatch
.
file
(
sys
.
argv
[
1
],
sys
.
argv
[
2
:])
#for (match, fname) in hmatch.directories(sys.argv[1], sys.argv[2]):
#for (match, fname) in hmatch.directories(sys.argv[1], sys.argv[2]):
#for (match, fname) in hmatch.find_file(sys.argv[1], sys.argv[2:]):
#for (match, fname) in hmatch.find_file(sys.argv[1], sys.argv[2:]):
...
...
src/binwalk/magic/binwalk
View file @
3992d1cd
No preview for this file type
src/binwalk/smartstrings.py
View file @
3992d1cd
...
@@ -41,7 +41,7 @@ class FileStrings(object):
...
@@ -41,7 +41,7 @@ class FileStrings(object):
@length - The number of bytes in the file to analyze.
@length - The number of bytes in the file to analyze.
@offset - The starting offset into the file to begin analysis.
@offset - The starting offset into the file to begin analysis.
@n - The minimum valid string length.
@n - The minimum valid string length.
@block - The block size to use
when performing
entropy analysis.
@block - The block size to use
iwhen performing entropy analysis. Set to None to skip
entropy analysis.
@algorithm - The entropy algorithm to use when performing entropy analysis.
@algorithm - The entropy algorithm to use when performing entropy analysis.
@plugins - An instance of the Plugins class.
@plugins - An instance of the Plugins class.
...
@@ -59,22 +59,31 @@ class FileStrings(object):
...
@@ -59,22 +59,31 @@ class FileStrings(object):
self
.
valid_strings
=
[]
self
.
valid_strings
=
[]
self
.
external_validators
=
[]
self
.
external_validators
=
[]
self
.
plugins
=
plugins
self
.
plugins
=
plugins
self
.
block
=
block
if
not
self
.
n
:
if
not
self
.
n
:
self
.
n
=
self
.
MIN_STRING_LENGTH
self
.
n
=
self
.
MIN_STRING_LENGTH
if
self
.
block
is
not
None
:
# Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
# Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
# Give fake file results list to prevent FileEntropy from doing too much analysis.
# Give fake file results list to prevent FileEntropy from doing too much analysis.
with
entropy
.
FileEntropy
(
file_name
,
block
=
block
,
file_results
=
[
'foo'
])
as
e
:
with
entropy
.
FileEntropy
(
file_name
,
block
=
self
.
block
,
file_results
=
[
'foo'
])
as
e
:
(
self
.
x
,
self
.
y
,
self
.
average_entropy
)
=
e
.
analyze
(
algorithm
=
algorithm
)
(
self
.
x
,
self
.
y
,
self
.
average_entropy
)
=
e
.
analyze
(
algorithm
=
algorithm
)
for
i
in
range
(
0
,
len
(
self
.
x
)):
for
i
in
range
(
0
,
len
(
self
.
x
)):
self
.
entropy
[
self
.
x
[
i
]]
=
self
.
y
[
i
]
self
.
entropy
[
self
.
x
[
i
]]
=
self
.
y
[
i
]
# Make sure our block size matches the entropy analysis's block size
# Make sure our block size matches the entropy analysis's block size
self
.
block
=
e
.
block
self
.
block
=
e
.
block
# Make sure the starting offset is a multiple of the block size; else, when later checking
# Make sure the starting offset is a multiple of the block size; else, when later checking
# the entropy analysis, block offsets won't line up.
# the entropy analysis, block offsets won't line up.
self
.
start
-=
(
self
.
start
%
self
.
block
)
self
.
start
-=
(
self
.
start
%
self
.
block
)
else
:
i
=
0
self
.
block
=
common
.
BlockFile
.
READ_BLOCK_SIZE
# Fake the entropy scan
while
i
<
common
.
file_size
(
file_name
):
self
.
entropy
[
i
]
=
1.0
i
+=
self
.
block
self
.
fd
=
common
.
BlockFile
(
file_name
,
'r'
,
length
=
length
,
offset
=
self
.
start
)
self
.
fd
=
common
.
BlockFile
(
file_name
,
'r'
,
length
=
length
,
offset
=
self
.
start
)
# TODO: This is not optimal. We should read in larger chunks and process it into self.block chunks.
# TODO: This is not optimal. We should read in larger chunks and process it into self.block chunks.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment