diff --git a/tests.py b/tests.py index 3bb4680..feae88c 100644 --- a/tests.py +++ b/tests.py @@ -752,6 +752,28 @@ class TestYara(unittest.TestCase): self.assertTrue(meta['b'] == 'ñ') self.assertTrue(meta['c'] == 'ñ') + # This test is similar to testScanMeta but it tests for displaying multiple values in the meta data generated + # when a Match object is created (upon request). + def testDuplicateMeta(self): + r = yara.compile(source=""" + rule test { + meta: + a = 1 + a = 2 + b = 3 + condition: + true + } + """) + + # Default behaviour should produce a simple KV map and should use the 'latest' metadata value per field + meta = r.match(data="dummy")[0].meta + self.assertTrue(meta['a'] == 2 and meta['b'] == 3) + + # `allow_duplicate_metadata` flag should reveal all metadata values per field as a list + meta = r.match(data="dummy", allow_duplicate_metadata=True)[0].meta + self.assertTrue(meta['a'] == [1, 2] and meta['b'] == [3]) + def testFilesize(self): self.assertTrueRules([ diff --git a/yara-python.c b/yara-python.c index c6daac7..6c4fa1d 100644 --- a/yara-python.c +++ b/yara-python.c @@ -432,6 +432,7 @@ typedef struct _CALLBACK_DATA PyObject* warnings_callback; PyObject* console_callback; int which; + bool allow_duplicate_metadata; } CALLBACK_DATA; @@ -885,7 +886,6 @@ _exit: #define CALLBACK_NON_MATCHES 0x02 #define CALLBACK_ALL CALLBACK_MATCHES | CALLBACK_NON_MATCHES - int yara_callback( YR_SCAN_CONTEXT* context, int message, @@ -987,8 +987,24 @@ int yara_callback( else object = PY_STRING(meta->string); - PyDict_SetItemString(meta_list, meta->identifier, object); - Py_DECREF(object); + if (((CALLBACK_DATA*) user_data)->allow_duplicate_metadata){ + // Check if we already have an array under this key + PyObject* existing_item = PyDict_GetItemString(meta_list, meta->identifier); + // Append object to existing list + if (existing_item) + PyList_Append(existing_item, object); + else{ + //Otherwise, instantiate array and append object as first item + PyObject* new_list = PyList_New(0); + PyList_Append(new_list, object); + PyDict_SetItemString(meta_list, meta->identifier, new_list); + Py_DECREF(new_list); + } + } + else{ + PyDict_SetItemString(meta_list, meta->identifier, object); + Py_DECREF(object); + } } yr_rule_strings_foreach(rule, string) @@ -1594,8 +1610,9 @@ static PyObject* Rules_next( else object = PY_STRING(meta->string); - PyDict_SetItemString(meta_list, meta->identifier, object); - Py_DECREF(object); + PyDict_SetItemString(meta_list, meta->identifier, object); + Py_DECREF(object); + } rule->global = PyBool_FromLong(rules->iter_current_rule->flags & RULE_FLAGS_GLOBAL); @@ -1623,7 +1640,7 @@ static PyObject* Rules_match( "filepath", "pid", "data", "externals", "callback", "fast", "timeout", "modules_data", "modules_callback", "which_callbacks", "warnings_callback", - "console_callback", NULL + "console_callback", "allow_duplicate_metadata", NULL }; char* filepath = NULL; @@ -1648,11 +1665,12 @@ static PyObject* Rules_match( callback_data.warnings_callback = NULL; callback_data.console_callback = NULL; callback_data.which = CALLBACK_ALL; + callback_data.allow_duplicate_metadata = false; if (PyArg_ParseTupleAndKeywords( args, keywords, - "|sis*OOOiOOiOO", + "|sis*OOOiOOiOOb", kwlist, &filepath, &pid, @@ -1665,7 +1683,8 @@ static PyObject* Rules_match( &callback_data.modules_callback, &callback_data.which, &callback_data.warnings_callback, - &callback_data.console_callback)) + &callback_data.console_callback, + &callback_data.allow_duplicate_metadata)) { if (filepath == NULL && data.buf == NULL && pid == -1) { @@ -1729,6 +1748,9 @@ static PyObject* Rules_match( } } + if (callback_data.allow_duplicate_metadata == NULL) + callback_data.allow_duplicate_metadata = false; + if (yr_scanner_create(object->rules, &scanner) != 0) { return PyErr_Format(