Make sure binwalk always opens files with utf-8 encoding

In Python 3.4, if the interpreter is launched with e.g. LANG=C environment, binwalk fails with UnicodeDecodeError, since it tries to decode included magic files (such as src/binwalk/magic/linux) with ascii codec. This patch makes sure that utf-8 codec is always used.

Make sure binwalk always opens files with utf-8 encoding
In Python 3.4, if the interpreter is launched with e.g. LANG=C environment, binwalk fails with UnicodeDecodeError, since it tries to decode included magic files (such as src/binwalk/magic/linux) with ascii codec. This patch makes sure that utf-8 codec is always used.
c5ffe186 · Slavek Kabrda · b195008f · c5ffe186 · c5ffe186 · c5ffe186
Commit c5ffe186 authored Nov 27, 2015 by Slavek Kabrda
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

display.py src/binwalk/core/display.py +2 -1

magic.py src/binwalk/core/magic.py +2 -1

extractor.py src/binwalk/modules/extractor.py +2 -1

No files found.
--- a/src/binwalk/core/display.py
+++ b/src/binwalk/core/display.py
 # Code to handle displaying and logging of results.
 # Anything in binwalk that prints results to screen should use this class.
+import codecs
 import sys
 import csv as pycsv
 import datetime
@@ -29,7 +30,7 @@ class Display(object):
        self._configure_formatting()
        if log:
-            self.fp = open(log, "a")
+            self.fp = codecs.open(log, "a", encoding='utf-8')
            if csv:
                self.csv = pycsv.writer(self.fp)

--- a/src/binwalk/core/magic.py
+++ b/src/binwalk/core/magic.py
@@ -4,6 +4,7 @@
 __all__ = ['Magic']
+import codecs
 import re
 import struct
 import datetime
@@ -789,7 +790,7 @@ class Magic(object):
        Returns None.
        '''
-        fp = open(fname, "r")
+        fp = codecs.open(fname, "r", encoding='utf-8')
        lines = fp.readlines()
        self.parse(lines)
        fp.close()

--- a/src/binwalk/modules/extractor.py
+++ b/src/binwalk/modules/extractor.py
@@ -2,6 +2,7 @@
 # This is automatically invoked by core.module code if extraction has been
 # enabled by the user; other modules need not reference this module directly.
+import codecs
 import os
 import re
 import sys
@@ -317,7 +318,7 @@ class Extractor(Module):
        '''
        try:
            # Process each line from the extract file, ignoring comments
-            with open(fname, 'r') as f:
+            with codecs.open(fname, 'r', encoding='utf-8') as f:
                for rule in f.readlines():
                    self.add_rule(rule.split(self.COMMENT_DELIM, 1)[0])
        except KeyboardInterrupt as e: