Commit e3ebc95e by devttys0

Merged pull request #8, with some modifications.

parent 75bc2e85
......@@ -550,6 +550,9 @@ class Binwalk(object):
i_set_results_offset = False
# Some signatures need to take into account the length of a given string
# when specifying additional offsets. Parse the string-len keyword to adjust
# for this prior to calling self.smart.parse.
magic_result = self.smart._parse_string_len(magic_result)
# Some file names are not NULL byte terminated, but rather their length is
......
......@@ -2,6 +2,8 @@
import io
import os
import re
import ast
import operator as op
from binwalk.compat import *
def file_size(filename):
......@@ -91,6 +93,44 @@ def unique_file_name(base_name, extension=''):
return fname
class MathExpression(object):
'''
Class for safely evaluating mathematical expressions from a string.
Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string
'''
OPERATORS = {
ast.Add: op.add,
ast.Sub: op.sub,
ast.Mult: op.mul,
ast.Div: op.truediv,
ast.Pow: op.pow,
ast.BitXor: op.xor
}
def __init__(self, expression):
self.expression = expression
try:
self.value = self.evaluate(self.expression)
except TypeError:
self.value = None
def evaluate(self, expr):
return self._eval(ast.parse(expr).body[0].value)
def _eval(self, node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.operator): # <operator>
return self.OPERATORS[type(node)]
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return self._eval(node.op)(self._eval(node.left), self._eval(node.right))
else:
raise TypeError(node)
class BlockFile(io.FileIO):
'''
Abstraction class for accessing binary files.
......
......@@ -118,8 +118,8 @@ class Extractor:
except:
pass
# Verify that the match string and file extension were retrieved.
if match and r['extension']:
# Verify that the match string was retrieved.
if match:
self.append_rule(r)
def remove_rule(self, text):
......
......@@ -123,6 +123,14 @@
# character-header formats and thus are strings, not numbers.
#0 string 070707 ASCII cpio archive (pre-SVR4 or odc)
# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack.
# This keyword is not intended to be passed a string (%s), and doing so can open
# up the possibility of keyword injection by a malicious file. This works here though, because:
#
# 1) It would result in an invalid CPIO file (invalid size)
# 2) All valid keywords are longer than 8 bytes, so a valid one can't be
# injected in the %.8s field.
0 string 070701 ASCII cpio archive (SVR4 with no CRC),
>110 byte 0 invalid
#>110 byte !0x2F
......@@ -421,7 +429,7 @@
>>4 lelong =694224000 \b, invalid date:
>>4 lelong >694224000 \b, last modified:
>4 ledate x %s
>4 lelong x \b{epoch:%d}
>4 lelong x \b{file-epoch:%d}
# Zlib signatures
# Too short to be useful on their own; see:
......
import re
from binwalk.compat import *
from binwalk.common import str2int, get_quoted_strings
from binwalk.common import str2int, get_quoted_strings, MathExpression
class SmartSignature:
'''
......@@ -22,12 +22,12 @@ class SmartSignature:
'filename' : '%sfile-name:' % KEYWORD_DELIM_START,
'filesize' : '%sfile-size:' % KEYWORD_DELIM_START,
'raw-string' : '%sraw-string:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block
'string-len' : '%sstring-len:' % KEYWORD_DELIM_START,
'string-len' : '%sstring-len:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block
'raw-size' : '%sraw-string-length:' % KEYWORD_DELIM_START,
'adjust' : '%soffset-adjust:' % KEYWORD_DELIM_START,
'delay' : '%sextract-delay:' % KEYWORD_DELIM_START,
'year' : '%syear:' % KEYWORD_DELIM_START,
'epoch' : '%sepoch:' % KEYWORD_DELIM_START,
'year' : '%sfile-year:' % KEYWORD_DELIM_START,
'epoch' : '%sfile-epoch:' % KEYWORD_DELIM_START,
'raw-replace' : '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
'one-of-many' : '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
......@@ -189,10 +189,10 @@ class SmartSignature:
arg = self._get_keyword_arg(data, keyword)
if arg:
if re.match("[0-9\+\-\*]*",arg):
value = eval(arg)
else:
self.invalid = True
value = MathExpression(arg).value
if value is None:
value = 0
self.invalid = True
return value
......@@ -248,19 +248,32 @@ class SmartSignature:
@data - String to parse.
Returns strings length.
Returns parsed data string.
'''
if not self.ignore_smart_signatures and self._is_valid(data):
# Get the raw string keyword arg
raw_string = self._get_keyword_arg(data, 'string-len')
# Was a string-len keyword specified?
if raw_string:
# Is the raw string length arg is a numeric value?
# Replace all instances of string-len in data with supplied string lenth
# Also strip out everything after the string-len keyword, including the keyword itself.
data = re.sub(self.KEYWORDS['string-len']+".+?%s" % self.KEYWORD_DELIM_END, str(len(raw_string)),data)
if raw_string:
# Convert the string to an integer as a sanity check
try:
string_length = "%d" % str2int(raw_string)
except:
string_length = '0'
# If the keyword is nested (e.g., {file-offset:{string-len:%s}}), then the returned
# data string needs to end with KEYWORD_DELIM_END. Note that this only allows for
# one-level nesting.
if data.endswith(self.KEYWORD_DELIM_END*2):
end_char = self.KEYWORD_DELIM_END
else:
end_char = ''
# Strip out *everything* after the string-len keyword, including the keyword itself.
# Failure to do so can potentially allow keyword injection from a maliciously created file.
data = data.split(self.KEYWORDS['string-len'])[0] + string_length + end_char
return data
def _strip_tags(self, data):
......
......@@ -123,6 +123,14 @@
# character-header formats and thus are strings, not numbers.
#0 string 070707 ASCII cpio archive (pre-SVR4 or odc)
# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack.
# This keyword is not intended to be passed a string (%s), and doing so can open
# up the possibility of keyword injection by a malicious file. This works here though, because:
#
# 1) It would result in an invalid CPIO file (invalid size)
# 2) All valid keywords are longer than 8 bytes, so a valid one can't be
# injected in the %.8s field.
0 string 070701 ASCII cpio archive (SVR4 with no CRC),
>110 byte 0 invalid
#>110 byte !0x2F
......
......@@ -131,7 +131,7 @@
>>4 lelong =694224000 \b, invalid date:
>>4 lelong >694224000 \b, last modified:
>4 ledate x %s
>4 lelong x \b{epoch:%d}
>4 lelong x \b{file-epoch:%d}
# Zlib signatures
# Too short to be useful on their own; see:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment