Cleaned up smart.Signature code, fixed Python3 ctypes bugs.

5a52a5a8 · devttys0 · 881f0c18 · 5a52a5a8 · 5a52a5a8 · 5a52a5a8
Commit 5a52a5a8 authored Dec 22, 2013 by devttys0
5 changed files
--- a/src/binwalk/core/filter.py
+++ b/src/binwalk/core/filter.py
 import re
 import binwalk.core.common as common
-from binwalk.core.smart import SmartSignature
+from binwalk.core.smart import Signature
 from binwalk.core.compat import *
 class Filter:
@@ -32,7 +32,7 @@ class Filter:
 		self.grep_filters = []
 		self.show_invalid_results = show_invalid_results
 		self.exclusive_filter = False
-		self.smart = SmartSignature(self)
+		self.smart = Signature(self)
 	def include(self, match, exclusive=True):
 		'''
@@ -134,7 +134,7 @@ class Filter:
 		# Don't include quoted strings or keyword arguments in this search, as 
 		# strings from the target file may legitimately contain the INVALID_RESULT text.
-		if self.INVALID_RESULT in common.strip_quoted_strings(self.smart._strip_tags(data)):
+		if self.INVALID_RESULT in common.strip_quoted_strings(self.smart.strip_tags(data)):
 			return False
 		# There should be no non-printable characters in any of the data

--- a/src/binwalk/core/smart.py
+++ b/src/binwalk/core/smart.py
@@ -3,7 +3,36 @@ import binwalk.core.module
 from binwalk.core.compat import *
 from binwalk.core.common import get_quoted_strings, MathExpression
-class SmartSignature:
+class Tag(object):
+	TAG_DELIM_START = "{"
+	TAG_DELIM_END = "}"
+	TAG_ARG_SEPERATOR = ":"
+	def __init__(self, **kwargs):
+		self.name = None
+		self.keyword = None
+		self.type = None
+		self.handler = None
+		self.tag = None
+		for (k,v) in iterator(kwargs):
+			setattr(self, k, v)
+		if self.keyword is not None:
+			self.tag = self.TAG_DELIM_START + self.keyword
+			if self.type is None:
+				self.tag += self.TAG_DELIM_END
+			else:
+				self.tag += self.TAG_ARG_SEPERATOR
+		if self.handler is None:
+			if self.type == int:
+				self.handler = 'get_math_arg'
+			elif self.type == str:
+				self.handler = 'get_keyword_arg'
+class Signature(object):
 	'''
 	Class for parsing smart signature tags in libmagic result strings.
@@ -12,29 +41,28 @@ class SmartSignature:
 		from binwalk import SmartSignature
-		for (i, keyword) in SmartSignature().KEYWORDS.iteritems():
+		for tag in SmartSignature.TAGS:
-			print keyword
+			print tag.keyword
 	'''
-	KEYWORD_DELIM_START = "{"
+	TAGS = [
-	KEYWORD_DELIM_END = "}"
+		Tag(name='raw-string', keyword='raw-string', handler='parse_raw_string'),
-	KEYWORDS = {
+		Tag(name='string-len', keyword='string-len', handler='parse_string_len'),
-		'jump'					: '%sjump-to-offset:' % KEYWORD_DELIM_START,
+		Tag(name='math', keyword='math', handler='parse_math'),
-		'filename'				: '%sfile-name:' % KEYWORD_DELIM_START,
+		Tag(name='one-of-many', keyword='one-of-many', handler='one_of_many'),
-		'filesize'				: '%sfile-size:' % KEYWORD_DELIM_START,
-		'raw-string'			: '%sraw-string:' % KEYWORD_DELIM_START,	# This one is special and must come last in a signature block
+		Tag(name='jump', keyword='jump-to-offset', type=int),
-		'string-len'			: '%sstring-len:' % KEYWORD_DELIM_START,	# This one is special and must come last in a signature block
+		Tag(name='name', keyword='file-name', type=str),
-		'raw-size'				: '%sraw-string-length:' % KEYWORD_DELIM_START,
+		Tag(name='size', keyword='file-size', type=int),
-		'adjust'				: '%soffset-adjust:' % KEYWORD_DELIM_START,
+		Tag(name='adjust', keyword='offset-adjust', type=int),
-		'delay'					: '%sextract-delay:' % KEYWORD_DELIM_START,
+		Tag(name='delay', keyword='extract-delay', type=str),
-		'year'					: '%sfile-year:' % KEYWORD_DELIM_START,
+		Tag(name='year', keyword='file-year', type=str),
-		'epoch'					: '%sfile-epoch:' % KEYWORD_DELIM_START,
+		Tag(name='epoch', keyword='file-epoch', type=int),
-		'math'					: '%smath:' % KEYWORD_DELIM_START,
+		Tag(name='raw-size', keyword='raw-string-length'),
-		'raw-replace'			: '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
+		Tag(name='raw-replace', keyword='raw-replace'),
-		'one-of-many'			: '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
+		Tag(name='string-len-replace', keyword='string-len'),
-		'string-len-replace'	: '%sstring-len%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
+	]
-	}
 	def __init__(self, filter, ignore_smart_signatures=False):
 		'''
@@ -58,20 +86,7 @@ class SmartSignature:
 		Returns a dictionary of parsed values.
 		'''
-		results = {
+		results = {}
-			'offset'		: '',		# Offset where the match was found, filled in by Binwalk.single_scan.
-			'description'	: '',		# The libmagic data string, stripped of all keywords
-			'name'			: '',		# The original name of the file, if known
-			'delay'			: '',		# Extract delay description
-			'extract'		: '',		# Name of the extracted file, filled in by Binwalk.single_scan.
-			'jump'			: 0,		# The relative offset to resume the scan from
-			'size'			: 0,		# The size of the file, if known
-			'adjust'		: 0,		# The relative offset to add to the reported offset
-			'year'			: 0,		# The file's creation/modification year, if reported in the signature
-			'epoch'			: 0,		# The file's creation/modification epoch time, if reported in the signature
-			'valid'			: True,		# Set to False if parsed numerical values appear invalid
-		}
 		self.valid = True
 		# If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious), 
@@ -79,57 +94,28 @@ class SmartSignature:
 		if self.ignore_smart_signatures:
 			results['description'] = data
 		else:
-			# Calculate and replace special keywords/values
+			for tag in self.TAGS:
-			data = self._parse_raw_strings(data)
+				if tag.handler is not None:
-			data = self._parse_string_len(data)
+					(data, arg) = getattr(self, tag.handler)(data, tag)
-			data = self._replace_maths(data)
-			# Parse the offset-adjust value. This is used to adjust the reported offset at which 
-			# a signature was located due to the fact that MagicParser.match expects all signatures
-			# to be located at offset 0, which some wil not be.
-			results['adjust'] = self._get_math_arg(data, 'adjust')
-			# Parse the file-size value. This is used to determine how many bytes should be extracted
-			# when extraction is enabled. If not specified, everything to the end of the file will be
-			# extracted (see Binwalk.scan).
-			try:
-				results['size'] = int(self._get_math_arg(data, 'filesize'), 0)
-			except KeyboardInterrupt as e:
-				raise e
-			except Exception:
-				pass
-			try:
+					if isinstance(arg, type(False)) and arg == False:
-				results['year'] = int(self._get_keyword_arg(data, 'year'), 0)
+						self.valid = False
-			except KeyboardInterrupt as e:
+					elif tag.type is not None:
-				raise e
+						results[tag.name] = arg
-			except Exception:
-				pass
-			try:
-				results['epoch'] = int(self._get_keyword_arg(data, 'epoch'), 0)
-			except KeyboardInterrupt as e:
-				raise e
-			except Exception:
-				pass
-			results['delay'] = self._get_keyword_arg(data, 'delay')
-			# Parse the string for the jump-to-offset keyword.
-			# This keyword is honored, even if this string result is one of many.
-			results['jump'] = self._get_math_arg(data, 'jump')
-			# If this is one of many, don't do anything and leave description as a blank string.
+			results['description'] = self.strip_tags(data)
-			# Else, strip all keyword tags from the string and process additional keywords as necessary.
-			if not self._one_of_many(data):
-				results['name'] = self._get_keyword_arg(data, 'filename').strip('"')
-				results['description'] = self._strip_tags(data)
 		results['valid'] = self.valid
 		return binwalk.core.module.Result(**results)
-	def _is_valid(self, data):
+	def tag_lookup(self, keyword):
+		for tag in self.TAGS:
+			if tag.keyword == keyword:
+				return tag
+		return None
+	def is_valid(self, data):
 		'''
 		Validates that result data does not contain smart keywords in file-supplied strings.
@@ -144,15 +130,15 @@ class SmartSignature:
 		quoted_data = get_quoted_strings(data)
 		# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
-		if quoted_data and self.KEYWORD_DELIM_START in quoted_data:
+		if quoted_data and Tag.TAG_DELIM_START in quoted_data:
 			# If so, check to see if the quoted data contains any of our keywords.
 			# If any keywords are found inside of quoted data, consider the keywords invalid.
-			for (name, keyword) in iterator(self.KEYWORDS):
+			for tag in self.TAGS:
-				if keyword in quoted_data:
+				if tag.tag in quoted_data:
 					return False
 		return True
-	def _safe_string(self, data):
+	def safe_string(self, data):
 		'''
 		Strips out quoted data (i.e., data taken directly from a file).
 		'''
@@ -161,28 +147,28 @@ class SmartSignature:
 			data = data.replace(quoted_string, "")
 		return data
-	def _one_of_many(self, data):
+	def one_of_many(self, data, tag):
 		'''
 		Determines if a given data string is one result of many.
 		@data - String result data.
-		Returns True if the string result is one of many.
+		Returns False if the string result is one of many and should not be displayed.
-		Returns False if the string result is not one of many.
+		Returns True if the string result is not one of many and should be displayed.
 		'''
 		if self.filter.valid_result(data):
 			if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
-				return True
+				return (data, False)
-			if self.KEYWORDS['one-of-many'] in data:
+		if tag.tag in data:
 			# Only match on the data before the first comma, as that is typically unique and static
 			self.last_one_of_many = data.split(',')[0]
 		else:
 			self.last_one_of_many = None
-		return False
+		return (data, True)
-	def _get_keyword_arg(self, data, keyword):
+	def get_keyword_arg(self, data, tag):
 		'''
 		Retrieves the argument for keywords that specify arguments.
@@ -193,14 +179,14 @@ class SmartSignature:
 		Returns a blank string on failure.
 		'''
 		arg = ''
-		data = self._safe_string(data)
+		data = self.safe_string(data)
-		if has_key(self.KEYWORDS, keyword) and self.KEYWORDS[keyword] in data:
+		if tag.tag in data:
-			arg = data.split(self.KEYWORDS[keyword])[1].split(self.KEYWORD_DELIM_END)[0]
+			arg = data.split(tag.tag)[1].split(tag.TAG_DELIM_END)[0]
-		return arg
+		return (data, arg)
-	def _get_math_arg(self, data, keyword):
+	def get_math_arg(self, data, tag):
 		'''
 		Retrieves the argument for keywords that specifiy mathematical expressions as arguments.
@@ -211,37 +197,16 @@ class SmartSignature:
 		'''
 		value = 0
-		arg = self._get_keyword_arg(data, keyword)
+		(data, arg) = self.get_keyword_arg(data, tag)
 		if arg:
 			value = MathExpression(arg).value
 			if value is None:
 				value = 0
 				self.valid = False
-		return value
+		return (data, value)
-	def _jump(self, data):
-		'''
-		Obtains the jump-to-offset value of a signature, if any.
-		@data - String result data.
-		Returns the offset to jump to.
+	def parse_math(self, data, tag):
-		'''
-		offset = 0
-		offset_str = self._get_keyword_arg(data, 'jump')
-		if offset_str:
-			try:
-				offset = int(offset_str, 0)
-			except KeyboardInterrupt as e:
-				raise e
-			except Exception:
-				pass
-		return offset
-	def _replace_maths(self, data):
 		'''
 		Replace math keywords with the requested values.
@@ -249,15 +214,15 @@ class SmartSignature:
 		Returns the modified string result data.
 		'''
-		while self.KEYWORDS['math'] in data:
+		while tag.keyword in data:
-			arg = self._get_keyword_arg(data, 'math')
+			(data, arg) = self.get_keyword_arg(data, tag.name)
-			v = '%s%s%s' % (self.KEYWORDS['math'], arg, self.KEYWORD_DELIM_END)
+			v = '%s%s%s' % (tag.keyword, arg, self.TAG_DELIM_END)
-			math_value = "%d" % self._get_math_arg(data, 'math')
+			math_value = "%d" % self.get_math_arg(data, tag.name)
 			data = data.replace(v, math_value)
-		return data
+		return (data, None)
-	def _parse_raw_strings(self, data):
+	def parse_raw_string(self, data, raw_str_tag):
 		'''
 		Process strings that aren't NULL byte terminated, but for which we know the string length.
 		This should be called prior to any other smart parsing functions.
@@ -266,25 +231,27 @@ class SmartSignature:
 		Returns a parsed string.
 		'''
-		if not self.ignore_smart_signatures and self._is_valid(data):
+		if not self.ignore_smart_signatures and self.is_valid(data):
+			raw_size_tag = self.tag_lookup('raw-size')
+			raw_replace_tag = self.tag_lookup('raw-replace')
 			# Get the raw string  keyword arg
-			raw_string = self._get_keyword_arg(data, 'raw-string')
+			(data, raw_string) = self.get_keyword_arg(data, raw_str_tag)
 			# Was a raw string keyword specified?
 			if raw_string:
 				# Get the raw string length arg
-				raw_size = self._get_keyword_arg(data, 'raw-size')
+				(data, raw_size) = self.get_math_arg(data, raw_size_tag)
-				# Is the raw string  length arg is a numeric value?
-				if re.match('^-?[0-9]+$', raw_size):
 				# Replace all instances of raw-replace in data with raw_string[:raw_size]
 				# Also strip out everything after the raw-string keyword, including the keyword itself.
 				# Failure to do so may (will) result in non-printable characters and this string will be 
 				# marked as invalid when it shouldn't be.
-					data = data[:data.find(self.KEYWORDS['raw-string'])].replace(self.KEYWORDS['raw-replace'], '"' + raw_string[:int(raw_size, 0)] + '"')
+				data = data[:data.find(raw_str_tag.tag)].replace(raw_replace_tag.tag, '"' + raw_string[:raw_size] + '"')
-		return data
+		return (data, True)
-	def _parse_string_len(self, data):
+	def parse_string_len(self, data, str_len_tag):
 		'''
 		Process {string-len} macros. 
@@ -292,14 +259,16 @@ class SmartSignature:
 		Returns parsed data string.
 		'''
-		if not self.ignore_smart_signatures and self._is_valid(data):
+		if not self.ignore_smart_signatures and self.is_valid(data):
+			str_len_replace_tag = self.tag_lookup('string-len-replace')
 			# Get the raw string  keyword arg
-			raw_string = self._get_keyword_arg(data, 'string-len')
+			(data, raw_string) = self.get_keyword_arg(data, str_len_tag)
 			# Was a string-len  keyword specified?
 			if raw_string:
-				# Convert the string to an integer as a sanity check
+				# Get the string length
 				try:
 					string_length = '%d' % len(raw_string)
 				except KeyboardInterrupt as e:
@@ -309,10 +278,11 @@ class SmartSignature:
 				# Strip out *everything* after the string-len keyword, including the keyword itself.
 				# Failure to do so can potentially allow keyword injection from a maliciously created file.
-				data = data.split(self.KEYWORDS['string-len'])[0].replace(self.KEYWORDS['string-len-replace'], string_length)
+				data = data.split(str_len_tag.tag)[0].replace(str_len_replace_tag.tag, string_length)
-		return data
+		return (data, True)
-	def _strip_tags(self, data):
+	def strip_tags(self, data):
 		'''
 		Strips the smart tags from a result string.
@@ -321,10 +291,10 @@ class SmartSignature:
 		Returns a sanitized string.
 		'''
 		if not self.ignore_smart_signatures:
-			for (name, keyword) in iterator(self.KEYWORDS):
+			for tag in self.TAGS:
-				start = data.find(keyword)
+				start = data.find(tag.tag)
 				if start != -1:
-					end = data[start:].find(self.KEYWORD_DELIM_END)
+					end = data[start:].find(tag.TAG_DELIM_END)
 					if end != -1:
 						data = data.replace(data[start:start+end+1], "")
 		return data

--- a/src/binwalk/modules/compression.py
+++ b/src/binwalk/modules/compression.py
@@ -3,6 +3,7 @@
 import os
 import ctypes
 import ctypes.util
+from binwalk.core.compat import str2bytes
 from binwalk.core.module import Option, Kwarg, Module
 class Deflate(object):
@@ -46,7 +47,7 @@ class Deflate(object):
 	def decompress(self, data):
 		description = None
-		decomp_size = self.tinfl.is_deflated(data, len(data), 0)
+		decomp_size = self.tinfl.is_deflated(str2bytes(data), len(data), 0)
 		if decomp_size >= self.MIN_DECOMP_SIZE:
 			description = self.DESCRIPTION + ', uncompressed size >= %d' % decomp_size

--- a/src/binwalk/modules/signature.py
+++ b/src/binwalk/modules/signature.py
@@ -48,8 +48,8 @@ class Signature(Module):
 	VERBOSE_HEADER_FORMAT = "%s    %d"
 	def init(self):
-		# Create SmartSignature and MagicParser class instances. These are mostly for internal use.
+		# Create Signature and MagicParser class instances. These are mostly for internal use.
-		self.smart = binwalk.core.smart.SmartSignature(self.config.filter, ignore_smart_signatures=self.dumb_scan)
+		self.smart = binwalk.core.smart.Signature(self.config.filter, ignore_smart_signatures=self.dumb_scan)
 		self.parser = binwalk.core.parser.MagicParser(self.config.filter, self.smart)
 		# If a raw byte sequence was specified, build a magic file from that instead of using the default magic files

--- a/src/binwalk/plugins/zlibvalid.py
+++ b/src/binwalk/plugins/zlibvalid.py
 import ctypes
 import ctypes.util
+from binwalk.core.compat import str2bytes
 from binwalk.core.common import BlockFile
 class Plugin(object):
@@ -23,9 +24,9 @@ class Plugin(object):
 		# If this result is a zlib signature match, try to decompress the data
 		if self.tinfl and result.file and result.description.lower().startswith('zlib'):
 			# Seek to and read the suspected zlib data
-			fd = self.module.config.open_file(result.file.name, offset=result.offset)
+			fd = self.module.config.open_file(result.file.name, offset=result.offset, length=self.MAX_DATA_SIZE)
-			#BlockFile(result.file.name, offset=result.offset, swap=self.module.config.swap_size)
+			# Python3 ctypes needs a bytes object, not a str
-			data = fd.read(self.MAX_DATA_SIZE)
+			data = str2bytes(fd.read(self.MAX_DATA_SIZE))
 			fd.close()
 			# Check if this is valid zlib data