From e3ebc95e38f9654fbb4f75e41acfb9d1cf32009f Mon Sep 17 00:00:00 2001
From: devttys0 <heffnercj@gmail.com>
Date: Sun, 1 Dec 2013 11:14:56 -0500
Subject: [PATCH] Merged pull request #8, with some modifications.

---
 src/binwalk/__init__.py       |  3 +++
 src/binwalk/common.py         | 40 ++++++++++++++++++++++++++++++++++++++++
 src/binwalk/extractor.py      |  4 ++--
 src/binwalk/magic/binwalk     | 10 +++++++++-
 src/binwalk/smartsignature.py | 43 ++++++++++++++++++++++++++++---------------
 src/magic/archives            |  8 ++++++++
 src/magic/compressed          |  2 +-
 7 files changed, 91 insertions(+), 19 deletions(-)

diff --git a/src/binwalk/__init__.py b/src/binwalk/__init__.py
index 8d6b966..724b22f 100644
--- a/src/binwalk/__init__.py
+++ b/src/binwalk/__init__.py
@@ -550,6 +550,9 @@ class Binwalk(object):
 
 						i_set_results_offset = False
 
+						# Some signatures need to take into account the length of a given string
+						# when specifying additional offsets. Parse the string-len keyword to adjust
+						# for this prior to calling self.smart.parse.
 						magic_result = self.smart._parse_string_len(magic_result)
 
 						# Some file names are not NULL byte terminated, but rather their length is
diff --git a/src/binwalk/common.py b/src/binwalk/common.py
index 54b7cf9..f5e5557 100644
--- a/src/binwalk/common.py
+++ b/src/binwalk/common.py
@@ -2,6 +2,8 @@
 import io
 import os
 import re
+import ast
+import operator as op
 from binwalk.compat import *
 
 def file_size(filename):
@@ -91,6 +93,44 @@ def unique_file_name(base_name, extension=''):
 
 	return fname
 
+
+class MathExpression(object):
+	'''
+	Class for safely evaluating mathematical expressions from a string.
+	Stolen from: http://stackoverflow.com/questions/2371436/evaluating-a-mathematical-expression-in-a-string
+	'''
+
+	OPERATORS = {
+		ast.Add: op.add,
+		ast.Sub: op.sub,
+		ast.Mult: op.mul,
+		ast.Div: op.truediv, 
+		ast.Pow: op.pow, 
+		ast.BitXor: op.xor
+	}
+
+	def __init__(self, expression):
+		self.expression = expression
+
+		try:
+			self.value = self.evaluate(self.expression)
+		except TypeError:
+			self.value = None
+
+	def evaluate(self, expr):
+		return self._eval(ast.parse(expr).body[0].value)
+
+	def _eval(self, node):
+		if isinstance(node, ast.Num): # <number>
+			return node.n
+		elif isinstance(node, ast.operator): # <operator>
+			return self.OPERATORS[type(node)]
+		elif isinstance(node, ast.BinOp): # <left> <operator> <right>
+			return self._eval(node.op)(self._eval(node.left), self._eval(node.right))
+		else:
+			raise TypeError(node)
+
+
 class BlockFile(io.FileIO):
 	'''
 	Abstraction class for accessing binary files.
diff --git a/src/binwalk/extractor.py b/src/binwalk/extractor.py
index 656133a..ece7911 100644
--- a/src/binwalk/extractor.py
+++ b/src/binwalk/extractor.py
@@ -118,8 +118,8 @@ class Extractor:
 			except:
 				pass
 
-			# Verify that the match string and file extension were retrieved.
-			if match and r['extension']:
+			# Verify that the match string was retrieved.
+			if match: 
 				self.append_rule(r)
 
 	def remove_rule(self, text):
diff --git a/src/binwalk/magic/binwalk b/src/binwalk/magic/binwalk
index 17cfbad..ea04ce3 100644
--- a/src/binwalk/magic/binwalk
+++ b/src/binwalk/magic/binwalk
@@ -123,6 +123,14 @@
 # character-header formats and thus are strings, not numbers.
 #0       string          070707          ASCII cpio archive (pre-SVR4 or odc)
 
+# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack.
+#          This keyword is not intended to be passed a string (%s), and doing so can open
+#          up the possibility of keyword injection by a malicious file. This works here though, because:
+#
+#          	1) It would result in an invalid CPIO file (invalid size)
+#               2) All valid keywords are longer than 8 bytes, so a valid one can't be
+#                  injected in the %.8s field.
+
 0       string          070701          ASCII cpio archive (SVR4 with no CRC),
 >110	byte		0		invalid
 #>110	byte		!0x2F
@@ -421,7 +429,7 @@
 >>4	lelong		=694224000	\b, invalid date:
 >>4	lelong		>694224000	\b, last modified:
 >4      ledate          x               %s
->4	lelong		x		\b{epoch:%d}
+>4	lelong		x		\b{file-epoch:%d}
 
 # Zlib signatures
 # Too short to be useful on their own; see:
diff --git a/src/binwalk/smartsignature.py b/src/binwalk/smartsignature.py
index 6467e48..921c1f1 100644
--- a/src/binwalk/smartsignature.py
+++ b/src/binwalk/smartsignature.py
@@ -1,6 +1,6 @@
 import re
 from binwalk.compat import *
-from binwalk.common import str2int, get_quoted_strings
+from binwalk.common import str2int, get_quoted_strings, MathExpression
 
 class SmartSignature:
 	'''
@@ -22,12 +22,12 @@ class SmartSignature:
 		'filename'		: '%sfile-name:' % KEYWORD_DELIM_START,
 		'filesize'		: '%sfile-size:' % KEYWORD_DELIM_START,
 		'raw-string'		: '%sraw-string:' % KEYWORD_DELIM_START,	# This one is special and must come last in a signature block
-		'string-len'		: '%sstring-len:' % KEYWORD_DELIM_START,
+		'string-len'		: '%sstring-len:' % KEYWORD_DELIM_START,	# This one is special and must come last in a signature block
 		'raw-size'		: '%sraw-string-length:' % KEYWORD_DELIM_START,
 		'adjust'		: '%soffset-adjust:' % KEYWORD_DELIM_START,
 		'delay'			: '%sextract-delay:' % KEYWORD_DELIM_START,
-		'year'			: '%syear:' % KEYWORD_DELIM_START,
-		'epoch'			: '%sepoch:' % KEYWORD_DELIM_START,
+		'year'			: '%sfile-year:' % KEYWORD_DELIM_START,
+		'epoch'			: '%sfile-epoch:' % KEYWORD_DELIM_START,
 
 		'raw-replace'		: '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
 		'one-of-many'		: '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
@@ -189,10 +189,10 @@ class SmartSignature:
 
 		arg = self._get_keyword_arg(data, keyword)
 		if arg:
-			if re.match("[0-9\+\-\*]*",arg):
-			    value = eval(arg)
-			else:
-			    self.invalid = True
+			value = MathExpression(arg).value
+			if value is None:
+				value = 0
+				self.invalid = True
 
 		return value
 
@@ -248,19 +248,32 @@ class SmartSignature:
 
 		@data - String to parse.
 
-		Returns strings length.
+		Returns parsed data string.
 		'''
 		if not self.ignore_smart_signatures and self._is_valid(data):
+
 			# Get the raw string  keyword arg
 			raw_string = self._get_keyword_arg(data, 'string-len')
 
 			# Was a string-len  keyword specified?
-			if raw_string:				
-				# Is the raw string  length arg is a numeric value?
-				
-				# Replace all instances of string-len in data with supplied string lenth
-				# Also strip out everything after the string-len keyword, including the keyword itself.
-				data = re.sub(self.KEYWORDS['string-len']+".+?%s" % self.KEYWORD_DELIM_END, str(len(raw_string)),data)
+			if raw_string:
+				# Convert the string to an integer as a sanity check
+				try:
+					string_length = "%d" % str2int(raw_string)
+				except:
+					string_length = '0'
+
+				# If the keyword is nested (e.g., {file-offset:{string-len:%s}}), then the returned
+				# data string needs to end with KEYWORD_DELIM_END. Note that this only allows for
+				# one-level nesting.
+				if data.endswith(self.KEYWORD_DELIM_END*2):
+					end_char = self.KEYWORD_DELIM_END
+				else:
+					end_char = ''
+
+				# Strip out *everything* after the string-len keyword, including the keyword itself.
+				# Failure to do so can potentially allow keyword injection from a maliciously created file.
+				data = data.split(self.KEYWORDS['string-len'])[0] + string_length + end_char
 		return data
 
 	def _strip_tags(self, data):
diff --git a/src/magic/archives b/src/magic/archives
index e80a19d..4151e4f 100644
--- a/src/magic/archives
+++ b/src/magic/archives
@@ -123,6 +123,14 @@
 # character-header formats and thus are strings, not numbers.
 #0       string          070707          ASCII cpio archive (pre-SVR4 or odc)
 
+# WARNING: The jump-to-offset value in the ASCII spio signatures below is a terrible hack.
+#          This keyword is not intended to be passed a string (%s), and doing so can open
+#          up the possibility of keyword injection by a malicious file. This works here though, because:
+#
+#          	1) It would result in an invalid CPIO file (invalid size)
+#               2) All valid keywords are longer than 8 bytes, so a valid one can't be
+#                  injected in the %.8s field.
+
 0       string          070701          ASCII cpio archive (SVR4 with no CRC),
 >110	byte		0		invalid
 #>110	byte		!0x2F
diff --git a/src/magic/compressed b/src/magic/compressed
index 9a07e95..955c64d 100644
--- a/src/magic/compressed
+++ b/src/magic/compressed
@@ -131,7 +131,7 @@
 >>4	lelong		=694224000	\b, invalid date:
 >>4	lelong		>694224000	\b, last modified:
 >4      ledate          x               %s
->4	lelong		x		\b{epoch:%d}
+>4	lelong		x		\b{file-epoch:%d}
 
 # Zlib signatures
 # Too short to be useful on their own; see:
--
libgit2 0.26.0