Commit cfd49c04 by Victor M. Alvarez

Fix issue #149.

This is regression in introduced in #140. When a string in the metadata section contains invalid UTF-8 characters the behavior Python 2 is leave the string exactly as it appears in YARA, in Python 3 however the invalid characters are removed because Python 3 strings are not handled as bytes like in Python 2, they most have a valid encoding. PR #140 was an attempt to homogenize the behavior in both versions of Python, but it introduced this other issue.
parent 286897d4
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2007-2014. The YARA Authors. All Rights Reserved.
#
......@@ -692,24 +694,58 @@ class TestYara(unittest.TestCase):
'rule test { condition: entrypoint >= 0 }',
])
def testMeta(self):
r = yara.compile(source=r'rule test { meta: a = "foo\x80bar" condition: true }')
self.assertTrue((list(r)[0].meta['a']) == 'foobar')
# This test ensures that anything after the NULL character is stripped.
# This test ensures that anything after the NULL character is stripped.
def testMetaNull(self):
r = yara.compile(source=r'rule test { meta: a = "foo\x00bar\x80" condition: true }')
self.assertTrue((list(r)[0].meta['a']) == 'foo')
def testMeta(self):
r = yara.compile(source=r"""
rule test {
meta:
a = "foo\x80bar"
b = "ñ"
c = "\xc3\xb1"
condition:
true }
""")
meta = list(r)[0].meta
if sys.version_info > (3, 0):
self.assertTrue(meta['a'] == 'foobar')
else:
self.assertTrue(meta['a'] == 'foo\x80bar')
self.assertTrue(meta['b'] == 'ñ')
self.assertTrue(meta['c'] == 'ñ')
# This test is similar to testMeta but it tests the meta data generated
# when a Match object is created.
def testScanMeta(self):
r = yara.compile(source=r'rule test { meta: a = "foo\x80bar" condition: true }')
r = yara.compile(source=r"""
rule test {
meta:
a = "foo\x80bar"
b = "ñ"
c = "\xc3\xb1"
condition:
true }
""")
m = r.match(data='dummy')
self.assertTrue((list(m)[0].meta['a']) == 'foobar')
meta = list(m)[0].meta
if sys.version_info > (3, 0):
self.assertTrue(meta['a'] == 'foobar')
else:
self.assertTrue(meta['a'] == 'foo\x80bar')
self.assertTrue(meta['b'] == 'ñ')
self.assertTrue(meta['c'] == 'ñ')
def testFilesize(self):
......
......@@ -50,7 +50,7 @@ typedef long Py_hash_t;
#define PY_STRING_TO_C(x) PyUnicode_AsUTF8(x)
#define PY_STRING_CHECK(x) PyUnicode_Check(x)
#else
#define PY_STRING(x) PyString_Decode(x, strlen(x), "utf-8", "ignore")
#define PY_STRING(x) PyString_FromString(x)
#define PY_STRING_TO_C(x) PyString_AsString(x)
#define PY_STRING_CHECK(x) (PyString_Check(x) || PyUnicode_Check(x))
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment