From e3ebc95e38f9654fbb4f75e41acfb9d1cf32009f Mon Sep 17 00:00:00 2001 From: devttys0 <heffnercj@gmail.com> Date: Sun, 1 Dec 2013 11:14:56 -0500 Subject: [PATCH] Merged pull request #8, with some modifications. --- src/binwalk/__init__.py | 3 +++ src/binwalk/common.py | 40 ++++++++++++++++++++++++++++++++++++++++ src/binwalk/extractor.py | 4 ++-- src/binwalk/magic/binwalk | 10 +++++++++- src/binwalk/smartsignature.py | 43 ++++++++++++++++++++++++++++--------------- src/magic/archives | 8 ++++++++ src/magic/compressed | 2 +- 7 files changed, 91 insertions(+), 19 deletions(-) diff --git a/src/binwalk/__init__.py b/src/binwalk/__init__.py index 8d6b966..724b22f 100644 --- a/src/binwalk/__init__.py +++ b/src/binwalk/__init__.py @@ -550,6 +550,9 @@ class Binwalk(object): i_set_results_offset = False + # Some signatures need to take into account the length of a given string + # when specifying additional offsets. Parse the string-len keyword to adjust + # for this prior to calling self.smart.parse. magic_result = self.smart._parse_string_len(magic_result) # Some file names are not NULL byte terminated, but rather their length is diff --git a/src/binwalk/common.py b/src/binwalk/common.py index 54b7cf9..f5e5557 100644 --- a/src/binwalk/common.py +++ b/src/binwalk/common.py @@ -2,6 +2,8 @@ import io import os import re +import ast +import operator as op from binwalk.compat import * def file_size(filename): @@ -91,6 +93,44 @@ def unique_file_name(base_name, extension=''): return fname + +class MathExpression(object): + ''' + Class for safely evaluating mathematical expressions from a string. + Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string + ''' + + OPERATORS = { + ast.Add: op.add, + ast.Sub: op.sub, + ast.Mult: op.mul, + ast.Div: op.truediv, + ast.Pow: op.pow, + ast.BitXor: op.xor + } + + def __init__(self, expression): + self.expression = expression + + try: + self.value = self.evaluate(self.expression) + except TypeError: + self.value = None + + def evaluate(self, expr): + return self._eval(ast.parse(expr).body[0].value) + + def _eval(self, node): + if isinstance(node, ast.Num): # <number> + return node.n + elif isinstance(node, ast.operator): # <operator> + return self.OPERATORS[type(node)] + elif isinstance(node, ast.BinOp): # <left> <operator> <right> + return self._eval(node.op)(self._eval(node.left), self._eval(node.right)) + else: + raise TypeError(node) + + class BlockFile(io.FileIO): ''' Abstraction class for accessing binary files. diff --git a/src/binwalk/extractor.py b/src/binwalk/extractor.py index 656133a..ece7911 100644 --- a/src/binwalk/extractor.py +++ b/src/binwalk/extractor.py @@ -118,8 +118,8 @@ class Extractor: except: pass - # Verify that the match string and file extension were retrieved. - if match and r['extension']: + # Verify that the match string was retrieved. + if match: self.append_rule(r) def remove_rule(self, text): diff --git a/src/binwalk/magic/binwalk b/src/binwalk/magic/binwalk index 17cfbad..ea04ce3 100644 --- a/src/binwalk/magic/binwalk +++ b/src/binwalk/magic/binwalk @@ -123,6 +123,14 @@ # character-header formats and thus are strings, not numbers. #0 string 070707 ASCII cpio archive (pre-SVR4 or odc) +# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack. +# This keyword is not intended to be passed a string (%s), and doing so can open +# up the possibility of keyword injection by a malicious file. This works here though, because: +# +# 1) It would result in an invalid CPIO file (invalid size) +# 2) All valid keywords are longer than 8 bytes, so a valid one can't be +# injected in the %.8s field. + 0 string 070701 ASCII cpio archive (SVR4 with no CRC), >110 byte 0 invalid #>110 byte !0x2F @@ -421,7 +429,7 @@ >>4 lelong =694224000 \b, invalid date: >>4 lelong >694224000 \b, last modified: >4 ledate x %s ->4 lelong x \b{epoch:%d} +>4 lelong x \b{file-epoch:%d} # Zlib signatures # Too short to be useful on their own; see: diff --git a/src/binwalk/smartsignature.py b/src/binwalk/smartsignature.py index 6467e48..921c1f1 100644 --- a/src/binwalk/smartsignature.py +++ b/src/binwalk/smartsignature.py @@ -1,6 +1,6 @@ import re from binwalk.compat import * -from binwalk.common import str2int, get_quoted_strings +from binwalk.common import str2int, get_quoted_strings, MathExpression class SmartSignature: ''' @@ -22,12 +22,12 @@ class SmartSignature: 'filename' : '%sfile-name:' % KEYWORD_DELIM_START, 'filesize' : '%sfile-size:' % KEYWORD_DELIM_START, 'raw-string' : '%sraw-string:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block - 'string-len' : '%sstring-len:' % KEYWORD_DELIM_START, + 'string-len' : '%sstring-len:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block 'raw-size' : '%sraw-string-length:' % KEYWORD_DELIM_START, 'adjust' : '%soffset-adjust:' % KEYWORD_DELIM_START, 'delay' : '%sextract-delay:' % KEYWORD_DELIM_START, - 'year' : '%syear:' % KEYWORD_DELIM_START, - 'epoch' : '%sepoch:' % KEYWORD_DELIM_START, + 'year' : '%sfile-year:' % KEYWORD_DELIM_START, + 'epoch' : '%sfile-epoch:' % KEYWORD_DELIM_START, 'raw-replace' : '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END), 'one-of-many' : '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END), @@ -189,10 +189,10 @@ class SmartSignature: arg = self._get_keyword_arg(data, keyword) if arg: - if re.match("[0-9\+\-\*]*",arg): - value = eval(arg) - else: - self.invalid = True + value = MathExpression(arg).value + if value is None: + value = 0 + self.invalid = True return value @@ -248,19 +248,32 @@ class SmartSignature: @data - String to parse. - Returns strings length. + Returns parsed data string. ''' if not self.ignore_smart_signatures and self._is_valid(data): + # Get the raw string keyword arg raw_string = self._get_keyword_arg(data, 'string-len') # Was a string-len keyword specified? - if raw_string: - # Is the raw string length arg is a numeric value? - - # Replace all instances of string-len in data with supplied string lenth - # Also strip out everything after the string-len keyword, including the keyword itself. - data = re.sub(self.KEYWORDS['string-len']+".+?%s" % self.KEYWORD_DELIM_END, str(len(raw_string)),data) + if raw_string: + # Convert the string to an integer as a sanity check + try: + string_length = "%d" % str2int(raw_string) + except: + string_length = '0' + + # If the keyword is nested (e.g., {file-offset:{string-len:%s}}), then the returned + # data string needs to end with KEYWORD_DELIM_END. Note that this only allows for + # one-level nesting. + if data.endswith(self.KEYWORD_DELIM_END*2): + end_char = self.KEYWORD_DELIM_END + else: + end_char = '' + + # Strip out *everything* after the string-len keyword, including the keyword itself. + # Failure to do so can potentially allow keyword injection from a maliciously created file. + data = data.split(self.KEYWORDS['string-len'])[0] + string_length + end_char return data def _strip_tags(self, data): diff --git a/src/magic/archives b/src/magic/archives index e80a19d..4151e4f 100644 --- a/src/magic/archives +++ b/src/magic/archives @@ -123,6 +123,14 @@ # character-header formats and thus are strings, not numbers. #0 string 070707 ASCII cpio archive (pre-SVR4 or odc) +# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack. +# This keyword is not intended to be passed a string (%s), and doing so can open +# up the possibility of keyword injection by a malicious file. This works here though, because: +# +# 1) It would result in an invalid CPIO file (invalid size) +# 2) All valid keywords are longer than 8 bytes, so a valid one can't be +# injected in the %.8s field. + 0 string 070701 ASCII cpio archive (SVR4 with no CRC), >110 byte 0 invalid #>110 byte !0x2F diff --git a/src/magic/compressed b/src/magic/compressed index 9a07e95..955c64d 100644 --- a/src/magic/compressed +++ b/src/magic/compressed @@ -131,7 +131,7 @@ >>4 lelong =694224000 \b, invalid date: >>4 lelong >694224000 \b, last modified: >4 ledate x %s ->4 lelong x \b{epoch:%d} +>4 lelong x \b{file-epoch:%d} # Zlib signatures # Too short to be useful on their own; see: -- libgit2 0.26.0