Fix issue #149.

This is regression in introduced in #140. When a string in the metadata section contains invalid UTF-8 characters the behavior Python 2 is leave the string exactly as it appears in YARA, in Python 3 however the invalid characters are removed because Python 3 strings are not handled as bytes like in Python 2, they most have a valid encoding. PR #140 was an attempt to homogenize the behavior in both versions of Python, but it introduced this other issue.

Fix issue #149.
This is regression in introduced in #140. When a string in the metadata section contains invalid UTF-8 characters the behavior Python 2 is leave the string exactly as it appears in YARA, in Python 3 however the invalid characters are removed because Python 3 strings are not handled as bytes like in Python 2, they most have a valid encoding. PR #140 was an attempt to homogenize the behavior in both versions of Python, but it introduced this other issue.
cfd49c04 · Victor M. Alvarez · 286897d4 · cfd49c04 · cfd49c04
Commit cfd49c04 authored 5 years ago by Victor M. Alvarez
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 9 deletions

tests.py tests.py +44 -8

yara-python.c yara-python.c +1 -1

No files found.
--- a/tests.py
+++ b/tests.py
+#!/usr/local/bin/python
+# -*- coding: utf-8 -*-
 #
 # Copyright (c) 2007-2014. The YARA Authors. All Rights Reserved.
 #
@@ -692,24 +694,58 @@ class TestYara(unittest.TestCase):
            'rule test { condition: entrypoint >= 0 }',
        ])

-    def testMeta(self):
-
-        r = yara.compile(source=r'rule test { meta: a = "foo\x80bar" condition: true }')
-        self.assertTrue((list(r)[0].meta['a']) == 'foobar')
-
-    # This test ensures that anything after the NULL character is stripped.
+     # This test ensures that anything after the NULL character is stripped.
    def testMetaNull(self):

        r = yara.compile(source=r'rule test { meta: a = "foo\x00bar\x80" condition: true }')
        self.assertTrue((list(r)[0].meta['a']) == 'foo')

+    def testMeta(self):
+
+        r = yara.compile(source=r"""
+            rule test {
+                meta:
+                    a = "foo\x80bar"
+                    b = "ñ"
+                    c = "\xc3\xb1"
+                condition:
+                    true }
+            """)
+
+        meta = list(r)[0].meta
+
+        if sys.version_info > (3, 0):
+            self.assertTrue(meta['a'] == 'foobar')
+        else:
+            self.assertTrue(meta['a'] == 'foo\x80bar')
+
+        self.assertTrue(meta['b'] == 'ñ')
+        self.assertTrue(meta['c'] == 'ñ')
+
    # This test is similar to testMeta but it tests the meta data generated
    # when a Match object is created.
    def testScanMeta(self):

-        r = yara.compile(source=r'rule test { meta: a = "foo\x80bar" condition: true }')
+        r = yara.compile(source=r"""
+            rule test {
+                meta:
+                    a = "foo\x80bar"
+                    b = "ñ"
+                    c = "\xc3\xb1"
+                condition:
+                    true }
+             """)
+
        m = r.match(data='dummy')
-        self.assertTrue((list(m)[0].meta['a']) == 'foobar')
+        meta = list(m)[0].meta
+
+        if sys.version_info > (3, 0):
+            self.assertTrue(meta['a'] == 'foobar')
+        else:
+            self.assertTrue(meta['a'] == 'foo\x80bar')
+
+        self.assertTrue(meta['b'] == 'ñ')
+        self.assertTrue(meta['c'] == 'ñ')

    def testFilesize(self):


--- a/yara-python.c
+++ b/yara-python.c
@@ -50,7 +50,7 @@ typedef long Py_hash_t;
 #define PY_STRING_TO_C(x) PyUnicode_AsUTF8(x)
 #define PY_STRING_CHECK(x) PyUnicode_Check(x)
 #else
-#define PY_STRING(x) PyString_Decode(x, strlen(x), "utf-8", "ignore")
+#define PY_STRING(x) PyString_FromString(x)
 #define PY_STRING_TO_C(x) PyString_AsString(x)
 #define PY_STRING_CHECK(x) (PyString_Check(x) || PyUnicode_Check(x))
 #endif