Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
B
binwalk
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
binwalk
Commits
1c03b051
Commit
1c03b051
authored
Nov 07, 2014
by
devttys0
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added code comments to the magic.Magic class.
parent
2b82b156
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
160 additions
and
5 deletions
+160
-5
magic.py
src/binwalk/core/magic.py
+160
-5
No files found.
src/binwalk/core/magic.py
View file @
1c03b051
...
...
@@ -42,13 +42,27 @@ class SignatureResult(object):
class
SignatureLine
(
object
):
def
__init__
(
self
,
line
):
'''
Class constructor. Responsible for parsing a line from a signature file.
@line - A line from the signature file.
Returns None.
'''
self
.
tags
=
[]
self
.
original_
text
=
line
self
.
text
=
line
# Split the line on any white space; for this to work, backslash-escaped
# spaces ('\ ') are replaced with their escaped hex value ('\x20').
parts
=
line
.
replace
(
'
\\
'
,
'
\\
x20'
)
.
split
(
None
,
3
)
# The indentation level is determined by the number of '>' characters at
# the beginning of the signature line.
self
.
level
=
parts
[
0
]
.
count
(
'>'
)
# Get rid of the indentation characters and try to convert the remaining
# characters to an integer offset. This will fail if the offset is a complex
# value (e.g., '(4.l+16)').
self
.
offset
=
parts
[
0
]
.
replace
(
'>'
,
''
)
try
:
self
.
offset
=
int
(
self
.
offset
,
0
)
...
...
@@ -205,27 +219,50 @@ class Signature(object):
self
.
lines
.
append
(
line
)
class
Magic
(
object
):
'''
Primary class for loading signature files and scanning
blocks of arbitrary data for matching signatures.
'''
def
__init__
(
self
,
exclude
=
[],
include
=
[],
invalid
=
False
):
'''
Class constructor.
@include - A list of regex strings describing which signatures should be included in the scan results.
@exclude - A list of regex strings describing which signatures should not be included in the scan results.
@invalid - If set to True, invalid results will not be ignored.
Returns None.
'''
# Used to save the block of data passed to self.scan (see additional comments in self.scan)
self
.
data
=
""
# A list of Signature class objects, populated by self.parse (see also: self.load)
self
.
signatures
=
[]
self
.
show_invalid
=
invalid
self
.
includes
=
[
re
.
compile
(
x
)
for
x
in
include
]
self
.
excludes
=
[
re
.
compile
(
x
)
for
x
in
exclude
]
# Regex rule to replace backspace characters (an the preceeding character)
# in formatted signature strings (see self._analyze).
self
.
bspace
=
re
.
compile
(
".
\\\\
b"
)
# Regex rule to match printable ASCII characters in formatted signature
# strings (see self._analyze).
self
.
printable
=
re
.
compile
(
"[ -~]*"
)
def
_filtered
(
self
,
text
):
'''
Tests if a string should be filtered out or not.
@text - The string to check against filter rules.
Returns True if the string should be filtered out, i.e., not displayed.
Returns False if the string should be displayed.
'''
filtered
=
None
# Text is converted to lower case first, partially for historical
# purposes, but also because it simplifies writing filter rules
# (e.g., don't have to worry about case sensitivity).
text
=
text
.
lower
()
for
include
in
self
.
includes
:
...
...
@@ -233,6 +270,8 @@ class Magic(object):
filtered
=
False
break
# If exclusive include filters have been specified and did
# not match the text, then the text should be filtered out.
if
self
.
includes
and
filtered
==
None
:
return
True
...
...
@@ -241,77 +280,123 @@ class Magic(object):
filtered
=
True
break
# If no explicit exclude filters were matched, then the
# text should *not* be filtered.
if
filtered
==
None
:
filtered
=
False
return
filtered
def
_do_math
(
self
,
offset
,
expression
):
# (4.l+12)
'''
Parses and evaluates complex expressions, e.g., "(4.l+12)", "(6*32)", etc.
@offset - The offset inside self.data that the current signature starts at.
@expressions - The expression to evaluate.
Returns an integer value that is the result of the evaluated expression.
'''
# Does the expression contain an offset (e.g., "(4.l+12)")?
if
'.'
in
expression
:
# Split the offset field into the integer offset and type values (o and t respsectively)
(
o
,
t
)
=
expression
.
split
(
'.'
,
1
)
o
=
offset
+
int
(
o
.
split
(
'('
,
1
)[
1
],
0
)
t
=
t
[
0
]
try
:
# Big and little endian byte format
if
t
in
[
'b'
,
'B'
]:
v
=
struct
.
unpack
(
'b'
,
binwalk
.
core
.
compat
.
str2bytes
(
self
.
data
[
o
:
o
+
1
]))[
0
]
# Little endian short format
elif
t
==
's'
:
v
=
struct
.
unpack
(
'<h'
,
binwalk
.
core
.
compat
.
str2bytes
(
self
.
data
[
o
:
o
+
2
]))[
0
]
# Little endian long format
elif
t
==
'l'
:
v
=
struct
.
unpack
(
'<i'
,
binwalk
.
core
.
compat
.
str2bytes
(
self
.
data
[
o
:
o
+
4
]))[
0
]
# Big endian short format
elif
t
==
'S'
:
v
=
struct
.
unpack
(
'>h'
,
binwalk
.
core
.
compat
.
str2bytes
(
self
.
data
[
o
:
o
+
2
]))[
0
]
# Bit endian long format
elif
t
==
'L'
:
v
=
struct
.
unpack
(
'>i'
,
binwalk
.
core
.
compat
.
str2bytes
(
self
.
data
[
o
:
o
+
4
]))[
0
]
# struct.error is thrown if there is not enough bytes in self.data for the specified format type
except
struct
.
error
as
e
:
v
=
0
# Once the value at the specified offset is read from self.data, re-build the expression
# (e.g., "(4.l+12)" might be converted into "(256+12)".
v
=
"(
%
d
%
s"
%
(
v
,
expression
.
split
(
t
,
1
)[
1
])
#
(32+0x20
)
#
If no offset, then it's just an evaluatable math expression (e.g., "(32+0x20)"
)
else
:
v
=
expression
#
print ("Converted offset '%s' to '%s'" % (expression, v))
#
Evaluate the final expression
return
binwalk
.
core
.
common
.
MathExpression
(
v
)
.
value
def
_analyze
(
self
,
signature
,
offset
):
'''
Analyzes self.data for the specified signature data at the specified offset .
@signature - The signature to apply to the data.
@offset - The offset in self.data to apply the signature to.
Returns a dictionary of tags parsed from the data.
'''
description
=
[]
tag_strlen
=
None
max_line_level
=
0
tags
=
{
'id'
:
signature
.
id
,
'offset'
:
offset
,
'invalid'
:
False
}
# Apply each line of the signature to self.data, starting at the specified offset
for
line
in
signature
.
lines
:
# Ignore indentation levels above the current max indent level
if
line
.
level
<=
max_line_level
:
# If the relative offset of this signature line is just an integer value, use it
if
isinstance
(
line
.
offset
,
int
):
line_offset
=
line
.
offset
# Else, evaluate the complex expression
else
:
line_offset
=
self
.
_do_math
(
offset
,
line
.
offset
)
# The start of the data needed by this line is at offset + line_offset.
# The end of the data will be line.size bytes later.
start
=
offset
+
line_offset
end
=
start
+
line
.
size
# If the line has a packed format string, unpack it
if
line
.
pkfmt
:
try
:
dvalue
=
struct
.
unpack
(
line
.
pkfmt
,
binwalk
.
core
.
compat
.
str2bytes
(
self
.
data
[
start
:
end
]))[
0
]
# Not enough bytes left in self.data for the specified format size
except
struct
.
error
as
e
:
dvalue
=
0
# Else, this is a string
else
:
# Wildcard strings have line.value == None
if
line
.
value
is
None
:
# Check to see if this is a string whose size is known and has been specified on a previous
# signature line.
if
[
x
for
x
in
line
.
tags
if
x
.
name
==
'string'
]
and
binwalk
.
core
.
compat
.
has_key
(
tags
,
'strlen'
):
dvalue
=
self
.
data
[
start
:(
start
+
tags
[
'strlen'
])]
# Else, just terminate the string at the first newline, carriage return, or NULL byte
else
:
dvalue
=
self
.
data
[
start
:
end
]
.
split
(
'
\x00
'
)[
0
]
.
split
(
'
\r
'
)[
0
]
.
split
(
'
\r
'
)[
0
]
# Non-wildcard strings have a known length, specified in the signature line
else
:
dvalue
=
self
.
data
[
start
:
end
]
# Some integer values have special operations that need to be performed on them
# before comparison (e.g., "belong&0x0000FFFF"). Complex math expressions are
# supported here as well.
if
isinstance
(
dvalue
,
int
)
and
line
.
operator
:
# If the operator value of this signature line is just an integer value, use it
if
isinstance
(
line
.
opvalue
,
int
):
opval
=
line
.
opvalue
# Else, evaluate the complex expression
else
:
opval
=
self
.
_do_math
(
offset
,
line
.
opvalue
)
# Perform the specified operation
if
line
.
operator
==
'&'
:
dvalue
&=
opval
elif
line
.
operator
==
'|'
:
...
...
@@ -325,6 +410,7 @@ class Magic(object):
elif
line
.
operator
==
'/'
:
dvalue
/=
opval
# Does the data (dvalue) match the specified comparison?
if
((
line
.
value
is
None
)
or
(
line
.
condition
==
'='
and
dvalue
==
line
.
value
)
or
(
line
.
condition
==
'>'
and
dvalue
>
line
.
value
)
or
...
...
@@ -333,21 +419,32 @@ class Magic(object):
(
line
.
condition
==
'&'
and
(
dvalue
&
line
.
value
))
or
(
line
.
condition
==
'|'
and
(
dvalue
|
line
.
value
))):
# Up until this point, date fields are treated as integer values,
# but we want to display them as nicely formatted strings.
if
line
.
type
==
'date'
:
ts
=
datetime
.
datetime
.
utcfromtimestamp
(
dvalue
)
dvalue
=
ts
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
# Format the description string
# TODO: This is too simplistic of a check. What if '%%' is in the format string?
if
'
%
'
in
line
.
format
:
desc
=
line
.
format
%
dvalue
else
:
desc
=
line
.
format
# If there was any description string, append it to the list of description string parts
if
desc
:
description
.
append
(
desc
)
# Process tag keywords specified in the signature line. These have already been parsed out of the
# original format string so that they can be processed separately from the printed description string.
for
tag
in
line
.
tags
:
# Format the tag string
# TODO: This is too simplistic of a check. What if '%%' is in the format string?
if
isinstance
(
tag
.
value
,
str
)
and
'
%
'
in
tag
.
value
:
tags
[
tag
.
name
]
=
tag
.
value
%
dvalue
# Some tag values are intended to be integer values, so try to convert them as such
try
:
tags
[
tag
.
name
]
=
int
(
tags
[
tag
.
name
],
0
)
except
KeyboardInterrupt
as
e
:
...
...
@@ -355,6 +452,7 @@ class Magic(object):
except
Exception
as
e
:
pass
else
:
# Some tag values are intended to be integer values, so try to convert them as such
try
:
tags
[
tag
.
name
]
=
int
(
tag
.
value
,
0
)
except
KeyboardInterrupt
as
e
:
...
...
@@ -362,18 +460,24 @@ class Magic(object):
except
Exception
as
e
:
tags
[
tag
.
name
]
=
tag
.
value
# Abort abort abort
# Abort processing soon as this signature is marked invalid, unless invalid results
# were explicitly requested. This means that the sooner invalid checks are made in a
# given signature, the faster the scan can filter out false positives.
if
not
self
.
show_invalid
and
tags
[
'invalid'
]:
break
# If this line satisfied its comparison, +1 the max indentation level
max_line_level
=
line
.
level
+
1
else
:
# No match on the first line, abort
if
line
.
level
==
0
:
break
else
:
# If this line did not satisfy its comparison, then higher
# indentation levels will not be accepted.
max_line_level
=
line
.
level
# Join the formatted description strings and remove backspace characters (plus the preceeding character as well)
tags
[
'description'
]
=
self
.
bspace
.
sub
(
''
,
" "
.
join
(
description
))
# This should never happen
...
...
@@ -381,29 +485,58 @@ class Magic(object):
tags
[
'display'
]
=
False
tags
[
'invalid'
]
=
True
# If the formatted string contains non-printable characters, consider it invalid
if
self
.
printable
.
match
(
tags
[
'description'
])
.
group
()
!=
tags
[
'description'
]:
tags
[
'invalid'
]
=
True
return
tags
def
scan
(
self
,
data
,
dlen
=
None
):
'''
Scan a data block for matching signatures.
@data - A string of data to scan.
@dlen - If specified, signatures at offsets larger than dlen will be ignored.
Returns a list of SignatureResult objects.
'''
results
=
[]
matched_offsets
=
set
()
# It's expensive in Python to pass large strings around to various functions.
# Since data can potentially be quite a large string, make it available to other
# methods via a class attribute so that it doesn't need to be passed around to
# different methods over and over again.
self
.
data
=
data
# If dlen wasn't specified, search all of self.data
if
dlen
is
None
:
dlen
=
len
(
self
.
data
)
# Loop through each loaded signature
for
signature
in
self
.
signatures
:
# Use regex to search the data block for potential signature matches (fast)
for
match
in
signature
.
regex
.
finditer
(
self
.
data
):
# Take the offset of the start of the signature into account
offset
=
match
.
start
()
-
signature
.
offset
# Signatures are orderd based on the length of their magic bytes (largest first).
# If this offset has already been matched to a previous signature, ignore it unless
# self.show_invalid has been specified. Also ignore obviously invalid offsets (<1)
# as well as those outside the specified self.data range (dlen).
if
(
offset
not
in
matched_offsets
or
self
.
show_invalid
)
and
offset
>=
0
and
offset
<=
dlen
:
# Analyze the data at this offset using the current signature rule
tags
=
self
.
_analyze
(
signature
,
offset
)
# Generate a SignatureResult object and append it to the results list if the
# signature is valid, or if invalid results were requested.
if
not
tags
[
'invalid'
]
or
self
.
show_invalid
:
results
.
append
(
SignatureResult
(
**
tags
))
# Add this offset to the matched_offsets set, so that it can be ignored by
# subsequent loops.
matched_offsets
.
add
(
offset
)
# Sort results by offset
results
.
sort
(
key
=
lambda
x
:
x
.
offset
,
reverse
=
False
)
return
results
def
load
(
self
,
fname
):
...
...
@@ -420,25 +553,47 @@ class Magic(object):
fp
.
close
()
def
parse
(
self
,
lines
):
'''
Parse signature file lines.
@lines - A list of lines from a signature file.
Returns None.
'''
signature
=
None
for
line
in
lines
:
# Split at the first comment delimiter (if any) and strip the result
line
=
line
.
split
(
'#'
)[
0
]
.
strip
()
# Ignore blank lines and lines that are nothing but comments
if
line
:
# Parse this signature line
sigline
=
SignatureLine
(
line
)
# Level 0 means the first line of a signature entry
if
sigline
.
level
==
0
:
# If there is an existing signature, append it to the signature list,
# unless the text in its title field has been filtered by user-defined
# filter rules.
if
signature
:
if
not
self
.
_filtered
(
signature
.
title
):
self
.
signatures
.
append
(
signature
)
# Create a new signature object; use the size of self.signatures to
# assign each signature a unique ID.
signature
=
Signature
(
len
(
self
.
signatures
),
sigline
)
# Else, just append this line to the existing signature
elif
signature
:
signature
.
append
(
sigline
)
# If this is not the first line of a signature entry and there is no other
# existing signature entry, something is very wrong with the signature file.
else
:
raise
ParserException
(
"Invalid signature line: '
%
s'"
%
line
)
# Add the final signature to the signature list
if
signature
:
if
not
self
.
_filtered
(
signature
.
lines
[
0
]
.
format
):
self
.
signatures
.
append
(
signature
)
# Sort signatures by confidence (aka, length of their magic bytes), largest first
self
.
signatures
.
sort
(
key
=
lambda
x
:
x
.
confidence
,
reverse
=
True
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment