Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
binwalk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
binwalk
Commits
3992d1cd
Commit
3992d1cd
authored
Dec 04, 2013
by
devttys0
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial commit of rehash; some bug fixes / feature additions left to do in hashmatch.py
parent
48a1a48b
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
205 additions
and
46 deletions
+205
-46
rehash
src/bin/rehash
+121
-0
hashmatch.py
src/binwalk/hashmatch.py
+62
-33
binwalk
src/binwalk/magic/binwalk
+0
-0
smartstrings.py
src/binwalk/smartstrings.py
+22
-13
No files found.
src/bin/rehash
0 → 100755
View file @
3992d1cd
#!/usr/bin/env python
import
os
import
re
import
sys
import
magic
import
binwalk.hashmatch
as
hashmatch
from
binwalk.compat
import
*
from
getopt
import
GetoptError
,
gnu_getopt
as
GetOpt
def
usage
(
fd
):
fd
.
write
(
"Usage:
%
s [OPTIONS] [FILE | DIR] [FILE | DIR] ...
\n
"
%
sys
.
argv
[
0
])
def
main
():
results
=
[]
options
=
[]
arguments
=
[]
file_list
=
[]
types
=
{}
strings
=
False
symlinks
=
False
all_types
=
False
name
=
False
same
=
False
missing
=
False
cutoff
=
None
max_results
=
None
verbose
=
False
short_options
=
"c:hlmnSsvx:"
long_options
=
[
"help"
,
"cutoff="
,
"strings"
,
"show-same"
,
"show-missing"
,
"max="
,
"symlinks"
,
"name"
,
"file-type"
,
"file-name"
,
"verbose"
,
]
try
:
opts
,
args
=
GetOpt
(
sys
.
argv
[
1
:],
short_options
,
long_options
)
except
GetoptError
as
e
:
sys
.
stderr
.
write
(
"
%
s
\n
"
%
str
(
e
))
usage
(
sys
.
stderr
)
for
opt
,
arg
in
opts
:
if
opt
in
(
"-h"
,
"--help"
):
usage
(
sys
.
stdout
)
elif
opt
in
(
"-S"
,
"--strings"
):
strings
=
True
elif
opt
in
(
"-l"
,
"--symlinks"
):
symlinks
=
True
elif
opt
in
(
"-n"
,
"--name"
):
name
=
True
elif
opt
in
(
"-s"
,
"--show-same"
):
same
=
True
elif
opt
in
(
"-m"
,
"--show-missing"
):
missing
=
True
elif
opt
in
(
"-x"
,
"--max"
):
max_results
=
int
(
arg
,
0
)
elif
opt
in
(
"-c"
,
"--cutoff"
):
cutoff
=
int
(
arg
,
0
)
elif
opt
in
(
"-v"
,
"--verbose"
):
verbose
=
True
# Keep track of the options and arguments.
# This is used later to determine which argv entries are file names.
options
.
append
(
opt
)
options
.
append
(
"
%
s
%
s"
%
(
opt
,
arg
))
options
.
append
(
"
%
s=
%
s"
%
(
opt
,
arg
))
arguments
.
append
(
arg
)
# Treat any command line options not processed by getopt as target file paths.
for
opt
in
sys
.
argv
[
1
:]:
if
opt
not
in
arguments
and
opt
not
in
options
and
not
opt
.
startswith
(
'-'
):
file_list
.
append
(
opt
)
if
len
(
file_list
)
>=
2
:
rehash
=
hashmatch
.
HashMatch
(
cutoff
=
cutoff
,
strings
=
strings
,
symlinks
=
symlinks
,
name
=
name
,
same
=
same
,
missing
=
missing
,
max_results
=
max_results
,
verbose
=
verbose
)
if
os
.
path
.
isfile
(
file_list
[
0
]):
if
not
all_types
and
len
(
types
)
==
0
:
m
=
magic
.
open
(
0
)
m
.
load
()
file_type
=
m
.
file
(
file_list
[
0
])
if
file_type
:
types
[
True
]
=
re
.
escape
(
file_type
.
lower
())
if
os
.
path
.
isfile
(
file_list
[
1
]):
results
=
rehash
.
files
(
file_list
[
0
],
file_list
[
1
])
else
:
results
=
rehash
.
file
(
file_list
[
0
],
file_list
[
1
:])
else
:
for
f
in
file_list
:
if
not
os
.
path
.
isdir
(
f
):
print
(
"Invalid usage"
)
usage
(
sys
.
stderr
)
results
=
rehash
.
directories
(
file_list
[
0
],
file_list
[
1
])
for
(
match
,
fname
)
in
results
:
print
(
"
%
s
%
s"
%
(
match
,
fname
))
if
__name__
==
"__main__"
:
main
()
src/binwalk/hashmatch.py
View file @
3992d1cd
...
...
@@ -20,24 +20,24 @@ class HashMatch(object):
FUZZY_DEFAULT_CUTOFF
=
50
def
__init__
(
self
,
cutoff
=
None
,
fuzzy
=
True
,
strings
=
False
,
same
=
False
,
missing
=
False
,
symlinks
=
False
,
name
=
False
,
matches
=
{},
types
=
{}
):
def
__init__
(
self
,
cutoff
=
None
,
strings
=
False
,
same
=
False
,
missing
=
False
,
symlinks
=
False
,
name
=
False
,
max_results
=
None
,
matches
=
{},
types
=
{},
verbose
=
False
):
'''
Class constructor.
@cutoff - The fuzzy cutoff which determines if files are different or not.
@fuzzy - Set to True to do fuzzy hashing; set to False to do traditional hashing.
@strings - Only hash strings inside of the file, not the entire file itself.
@same - Set to True to show files that are the same, False to show files that are different.
@missing - Set to True to show missing files.
@symlinks - Set to True to include symbolic link files.
@name - Set to True to only compare files whose base names match.
@max_results - Stop searching after x number of matches.
@matches - A dictionary of file names to diff.
@types - A dictionary of file types to diff.
@verbose - Enable verbose mode.
Returns None.
'''
self
.
cutoff
=
cutoff
self
.
fuzzy
=
fuzzy
self
.
strings
=
strings
self
.
show_same
=
same
self
.
show_missing
=
missing
...
...
@@ -45,6 +45,10 @@ class HashMatch(object):
self
.
matches
=
matches
self
.
name
=
name
self
.
types
=
types
self
.
max_results
=
max_results
self
.
verbose
=
verbose
self
.
total
=
0
self
.
magic
=
magic
.
open
(
0
)
self
.
magic
.
load
()
...
...
@@ -58,9 +62,13 @@ class HashMatch(object):
self
.
types
[
k
]
=
re
.
compile
(
self
.
types
[
k
])
def
_get_strings
(
self
,
fname
):
return
''
.
join
([
string
for
(
offset
,
string
)
in
binwalk
.
smartstrings
.
FileStrings
(
fname
,
n
=
10
)
.
strings
()])
return
''
.
join
([
string
for
(
offset
,
string
)
in
binwalk
.
smartstrings
.
FileStrings
(
fname
,
n
=
10
,
block
=
None
)
.
strings
()])
def
files
(
self
,
file1
,
file2
):
def
_print
(
self
,
message
):
if
self
.
verbose
:
print
(
message
)
def
_compare_files
(
self
,
file1
,
file2
):
'''
Fuzzy diff two files.
...
...
@@ -73,7 +81,10 @@ class HashMatch(object):
status
=
0
if
not
self
.
name
or
os
.
path
.
basename
(
file1
)
==
os
.
path
.
basename
(
file2
):
if
self
.
fuzzy
:
if
os
.
path
.
exists
(
file1
)
and
os
.
path
.
exists
(
file2
):
self
.
_print
(
"Checking
%
s ->
%
s"
%
(
file1
,
file2
))
hash1
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
hash2
=
ctypes
.
create_string_buffer
(
self
.
FUZZY_MAX_RESULT
)
...
...
@@ -98,11 +109,7 @@ class HashMatch(object):
else
:
return
self
.
lib
.
fuzzy_compare
(
hash1
,
hash2
)
except
Exception
as
e
:
print
"WARNING: Exception while performing fuzzy comparison:"
,
e
elif
not
self
.
strings
:
if
file_md5
(
file1
)
==
file_md5
(
file2
):
return
100
print
"WARNING: Exception while doing fuzzy hash:"
,
e
return
None
...
...
@@ -111,7 +118,7 @@ class HashMatch(object):
Returns True if the match value is greater than or equal to the cutoff.
Returns False if the match value is less than the cutoff.
'''
return
(
match
>=
self
.
cutoff
)
return
(
match
is
not
None
and
match
>=
self
.
cutoff
)
def
_get_file_list
(
self
,
directory
):
'''
...
...
@@ -141,7 +148,11 @@ class HashMatch(object):
if
self
.
types
:
for
f
in
files
:
for
(
include
,
type_regex
)
in
iterator
(
self
.
types
):
magic_result
=
self
.
magic
.
file
(
f
)
.
lower
()
try
:
magic_result
=
self
.
magic
.
file
(
os
.
path
.
join
(
directory
,
f
))
.
lower
()
except
Exception
as
e
:
magic_result
=
''
match
=
type_regex
.
match
(
magic_result
)
# If this matched an include filter, or didn't match an exclude filter
...
...
@@ -162,6 +173,12 @@ class HashMatch(object):
return
set
(
file_list
)
def
files
(
self
,
file1
,
file2
):
m
=
self
.
_compare_files
(
file1
,
file2
)
if
m
is
None
:
m
=
0
return
[(
m
,
file2
)]
def
file
(
self
,
fname
,
directories
):
'''
Search for a particular file in multiple directories.
...
...
@@ -172,45 +189,57 @@ class HashMatch(object):
Returns a list of tuple results.
'''
matching_files
=
[]
self
.
total
=
0
for
directory
in
directories
:
for
f
in
self
.
_get_file_list
(
directory
):
f
=
os
.
path
.
join
(
directory
,
f
)
m
=
self
.
files
(
fname
,
f
)
if
self
.
is_match
(
m
):
m
=
self
.
_compare_
files
(
fname
,
f
)
if
m
is
not
None
and
self
.
is_match
(
m
):
matching_files
.
append
((
m
,
f
))
self
.
total
+=
1
if
self
.
max_results
and
self
.
total
>=
self
.
max_results
:
return
matching_files
return
matching_files
def
directories
(
self
,
dir1
,
dir2
):
def
directories
(
self
,
source
,
dir_list
):
'''
Search two directories for matching files.
@
dir1 - First directory
.
@dir
2 - Second directory
.
@
source - Source directory to compare everything to
.
@dir
_list - Compare files in source to files in these directories
.
Returns a list of tuple results.
'''
results
=
[]
self
.
total
=
0
source_files
=
self
.
_get_file_list
(
source
)
dir1_files
=
self
.
_get_file_list
(
dir1
)
dir2_files
=
self
.
_get_file_list
(
dir2
)
for
directory
in
dir_list
:
dir_files
=
self
.
_get_file_list
(
directory
)
for
f
in
source_files
:
if
f
in
dir_files
:
file1
=
os
.
path
.
join
(
source
,
f
)
file2
=
os
.
path
.
join
(
directory
,
f
)
for
f
in
dir1_files
:
if
f
in
dir2_files
:
file1
=
os
.
path
.
join
(
dir1
,
f
)
file2
=
os
.
path
.
join
(
dir2
,
f
)
m
=
self
.
_compare_files
(
file1
,
file2
)
if
m
is
not
None
:
matches
=
self
.
is_match
(
m
)
m
=
self
.
files
(
file1
,
file2
)
if
m
is
not
None
:
matches
=
self
.
is_match
(
m
)
if
(
matches
and
self
.
show_same
)
or
(
not
matches
and
not
self
.
show_same
):
results
.
append
((
"
%3
d"
%
m
,
f
))
if
(
matches
and
self
.
show_same
)
or
(
not
matches
and
not
self
.
show_same
):
results
.
append
((
"
%3
d"
%
m
,
f
))
self
.
total
+=
1
if
self
.
max_results
and
self
.
total
>=
self
.
max_results
:
return
results
if
self
.
show_missing
:
results
+=
[(
'---'
,
f
)
for
f
in
(
dir1_files
-
dir2
_files
)]
results
+=
[(
'+++'
,
f
)
for
f
in
(
dir
2_files
-
dir1
_files
)]
if
self
.
show_missing
and
len
(
dir_list
)
==
1
:
results
+=
[(
'---'
,
f
)
for
f
in
(
source_files
-
dir
_files
)]
results
+=
[(
'+++'
,
f
)
for
f
in
(
dir
_files
-
source
_files
)]
return
results
...
...
@@ -218,7 +247,7 @@ class HashMatch(object):
if
__name__
==
'__main__'
:
import
sys
hmatch
=
HashMatch
(
strings
=
True
,
name
=
True
)
hmatch
=
HashMatch
(
strings
=
True
,
name
=
False
,
types
=
{
True
:
"^elf"
}
)
print
hmatch
.
file
(
sys
.
argv
[
1
],
sys
.
argv
[
2
:])
#for (match, fname) in hmatch.directories(sys.argv[1], sys.argv[2]):
#for (match, fname) in hmatch.find_file(sys.argv[1], sys.argv[2:]):
...
...
src/binwalk/magic/binwalk
View file @
3992d1cd
No preview for this file type
src/binwalk/smartstrings.py
View file @
3992d1cd
...
...
@@ -41,7 +41,7 @@ class FileStrings(object):
@length - The number of bytes in the file to analyze.
@offset - The starting offset into the file to begin analysis.
@n - The minimum valid string length.
@block - The block size to use
when performing
entropy analysis.
@block - The block size to use
iwhen performing entropy analysis. Set to None to skip
entropy analysis.
@algorithm - The entropy algorithm to use when performing entropy analysis.
@plugins - An instance of the Plugins class.
...
...
@@ -59,22 +59,31 @@ class FileStrings(object):
self
.
valid_strings
=
[]
self
.
external_validators
=
[]
self
.
plugins
=
plugins
self
.
block
=
block
if
not
self
.
n
:
self
.
n
=
self
.
MIN_STRING_LENGTH
# Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
# Give fake file results list to prevent FileEntropy from doing too much analysis.
with
entropy
.
FileEntropy
(
file_name
,
block
=
block
,
file_results
=
[
'foo'
])
as
e
:
(
self
.
x
,
self
.
y
,
self
.
average_entropy
)
=
e
.
analyze
(
algorithm
=
algorithm
)
for
i
in
range
(
0
,
len
(
self
.
x
)):
self
.
entropy
[
self
.
x
[
i
]]
=
self
.
y
[
i
]
# Make sure our block size matches the entropy analysis's block size
self
.
block
=
e
.
block
# Make sure the starting offset is a multiple of the block size; else, when later checking
# the entropy analysis, block offsets won't line up.
self
.
start
-=
(
self
.
start
%
self
.
block
)
if
self
.
block
is
not
None
:
# Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
# Give fake file results list to prevent FileEntropy from doing too much analysis.
with
entropy
.
FileEntropy
(
file_name
,
block
=
self
.
block
,
file_results
=
[
'foo'
])
as
e
:
(
self
.
x
,
self
.
y
,
self
.
average_entropy
)
=
e
.
analyze
(
algorithm
=
algorithm
)
for
i
in
range
(
0
,
len
(
self
.
x
)):
self
.
entropy
[
self
.
x
[
i
]]
=
self
.
y
[
i
]
# Make sure our block size matches the entropy analysis's block size
self
.
block
=
e
.
block
# Make sure the starting offset is a multiple of the block size; else, when later checking
# the entropy analysis, block offsets won't line up.
self
.
start
-=
(
self
.
start
%
self
.
block
)
else
:
i
=
0
self
.
block
=
common
.
BlockFile
.
READ_BLOCK_SIZE
# Fake the entropy scan
while
i
<
common
.
file_size
(
file_name
):
self
.
entropy
[
i
]
=
1.0
i
+=
self
.
block
self
.
fd
=
common
.
BlockFile
(
file_name
,
'r'
,
length
=
length
,
offset
=
self
.
start
)
# TODO: This is not optimal. We should read in larger chunks and process it into self.block chunks.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment