From e3d190b10c4b9016d74598e459293e6e82851b2e Mon Sep 17 00:00:00 2001 From: heffnercj <heffnercj@gmail.com> Date: Sun, 17 Nov 2013 11:07:54 -0500 Subject: [PATCH] Basic signature scans now work in python3; other features TBD --- src/binwalk/__init__.py | 5 ++++- src/binwalk/common.py | 2 ++ src/binwalk/compat.py | 4 ++-- src/binwalk/parser.py | 8 ++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/binwalk/__init__.py b/src/binwalk/__init__.py index 6bb34dc..6451666 100644 --- a/src/binwalk/__init__.py +++ b/src/binwalk/__init__.py @@ -539,8 +539,11 @@ class Binwalk(object): results = [] results_offset = -1 + # In python3 we need a bytes object to pass to magic.buffer + candidate_data = str2bytes(data[i+candidate:i+candidate+fd.MAX_TRAILING_SIZE]) + # Pass the data to libmagic, and split out multiple results into a list - for magic_result in self.parser.split(self.magic.buffer(data[i+candidate:i+candidate+fd.MAX_TRAILING_SIZE])): + for magic_result in self.parser.split(self.magic.buffer(candidate_data)): i_set_results_offset = False diff --git a/src/binwalk/common.py b/src/binwalk/common.py index d192abf..4fa9a4a 100644 --- a/src/binwalk/common.py +++ b/src/binwalk/common.py @@ -158,6 +158,8 @@ class BlockFile(io.FileIO): data = self.read(self.READ_BLOCK_SIZE + self.MAX_TRAILING_SIZE) if data and data is not None: + data = bytes2str(data) + # Get the actual length of the read in data dlen = len(data) seek_offset = dlen - self.READ_BLOCK_SIZE diff --git a/src/binwalk/compat.py b/src/binwalk/compat.py index e81e322..8212651 100644 --- a/src/binwalk/compat.py +++ b/src/binwalk/compat.py @@ -33,7 +33,7 @@ def str2bytes(string): For cross compatibility between Python 2 and Python 3 strings. ''' if isinstance(string, type('')) and sys.version_info.major > 2: - return bytes(string, 'ascii') + return bytes(string, 'latin1') else: return string @@ -42,7 +42,7 @@ def bytes2str(bs): For cross compatibility between Python 2 and Python 3 strings. ''' if isinstance(bs, type(b'')) and sys.version_info.major > 2: - return bs.decode('ascii') + return bs.decode('latin1') else: return bs diff --git a/src/binwalk/parser.py b/src/binwalk/parser.py index 1a4468a..9e323c1 100644 --- a/src/binwalk/parser.py +++ b/src/binwalk/parser.py @@ -141,6 +141,7 @@ class MagicParser: try: for line in io.FileIO(file_name).readlines(): + line = bytes2str(line) line_count += 1 # Check if this is the first line of a signature entry @@ -164,7 +165,7 @@ class MagicParser: # Keep writing lines of the signature to the temporary magic file until # we detect a signature that should not be included. if include: - self.fd.write(line) + self.fd.write(str2bytes(line)) self.build_signature_set() except Exception as e: @@ -191,14 +192,14 @@ class MagicParser: # Quick and dirty pre-filter. We are only concerned with the first line of a # signature, which will always start with a number. Make sure the first byte of # the line is a number; if not, don't process. - if bytes2str(line[:1]) < '0' or bytes2str(line[:1]) > '9': + if line[:1] < '0' or line[:1] > '9': return None try: # Split the line into white-space separated parts. # For this to work properly, replace escaped spaces ('\ ') with '\x20'. # This means the same thing, but doesn't confuse split(). - line_parts = bytes2str(line).replace('\\ ', '\\x20').split() + line_parts = line.replace('\\ ', '\\x20').split() entry['offset'] = line_parts[0] entry['type'] = line_parts[1] # The condition line may contain escaped sequences, so be sure to decode it properly. @@ -281,7 +282,6 @@ class MagicParser: Returns an ordered list of offsets inside of data at which candidate offsets were found. ''' candidate_offsets = [] - data = bytes2str(data) for regex in self.signature_set: candidate_offsets += [match.start() for match in regex.finditer(data) if match.start() < end] -- libgit2 0.26.0