Unverified Commit d29ca083 by cccs-rs Committed by GitHub

Allow metadata to contain a list of values (#201)

The `Rules.match` function now receives an optional `allow_duplicate_metadata=True` argument, which changes the structure of `Match.meta`. By default `Match.meta` is a dictionary with metadata names and their corresponding values, if a metadata name appears duplicated in a rule, the last value will be used. For example, consider the following rule:

```yara
rule demo {
   meta: 
     foo = "foo #1"
     foo = "foo #2"
     bar = "bar"
   condition:
      false
}
```

In that case `Match.meta` would be `{"foo": "foo #2", "bar": "bar"}` by default (`allow_duplicate_metadata=False`), but with `allow_duplicate_metadata=True` it would be: `{"foo": ["foo #1", "foo #2"], "bar": ["bar"]}`. 
parent e14f096e
......@@ -752,6 +752,28 @@ class TestYara(unittest.TestCase):
self.assertTrue(meta['b'] == 'ñ')
self.assertTrue(meta['c'] == 'ñ')
# This test is similar to testScanMeta but it tests for displaying multiple values in the meta data generated
# when a Match object is created (upon request).
def testDuplicateMeta(self):
r = yara.compile(source="""
rule test {
meta:
a = 1
a = 2
b = 3
condition:
true
}
""")
# Default behaviour should produce a simple KV map and should use the 'latest' metadata value per field
meta = r.match(data="dummy")[0].meta
self.assertTrue(meta['a'] == 2 and meta['b'] == 3)
# `allow_duplicate_metadata` flag should reveal all metadata values per field as a list
meta = r.match(data="dummy", allow_duplicate_metadata=True)[0].meta
self.assertTrue(meta['a'] == [1, 2] and meta['b'] == [3])
def testFilesize(self):
self.assertTrueRules([
......
......@@ -432,6 +432,7 @@ typedef struct _CALLBACK_DATA
PyObject* warnings_callback;
PyObject* console_callback;
int which;
bool allow_duplicate_metadata;
} CALLBACK_DATA;
......@@ -885,7 +886,6 @@ _exit:
#define CALLBACK_NON_MATCHES 0x02
#define CALLBACK_ALL CALLBACK_MATCHES | CALLBACK_NON_MATCHES
int yara_callback(
YR_SCAN_CONTEXT* context,
int message,
......@@ -987,8 +987,24 @@ int yara_callback(
else
object = PY_STRING(meta->string);
PyDict_SetItemString(meta_list, meta->identifier, object);
Py_DECREF(object);
if (((CALLBACK_DATA*) user_data)->allow_duplicate_metadata){
// Check if we already have an array under this key
PyObject* existing_item = PyDict_GetItemString(meta_list, meta->identifier);
// Append object to existing list
if (existing_item)
PyList_Append(existing_item, object);
else{
//Otherwise, instantiate array and append object as first item
PyObject* new_list = PyList_New(0);
PyList_Append(new_list, object);
PyDict_SetItemString(meta_list, meta->identifier, new_list);
Py_DECREF(new_list);
}
}
else{
PyDict_SetItemString(meta_list, meta->identifier, object);
Py_DECREF(object);
}
}
yr_rule_strings_foreach(rule, string)
......@@ -1594,8 +1610,9 @@ static PyObject* Rules_next(
else
object = PY_STRING(meta->string);
PyDict_SetItemString(meta_list, meta->identifier, object);
Py_DECREF(object);
PyDict_SetItemString(meta_list, meta->identifier, object);
Py_DECREF(object);
}
rule->global = PyBool_FromLong(rules->iter_current_rule->flags & RULE_FLAGS_GLOBAL);
......@@ -1623,7 +1640,7 @@ static PyObject* Rules_match(
"filepath", "pid", "data", "externals",
"callback", "fast", "timeout", "modules_data",
"modules_callback", "which_callbacks", "warnings_callback",
"console_callback", NULL
"console_callback", "allow_duplicate_metadata", NULL
};
char* filepath = NULL;
......@@ -1648,11 +1665,12 @@ static PyObject* Rules_match(
callback_data.warnings_callback = NULL;
callback_data.console_callback = NULL;
callback_data.which = CALLBACK_ALL;
callback_data.allow_duplicate_metadata = false;
if (PyArg_ParseTupleAndKeywords(
args,
keywords,
"|sis*OOOiOOiOO",
"|sis*OOOiOOiOOb",
kwlist,
&filepath,
&pid,
......@@ -1665,7 +1683,8 @@ static PyObject* Rules_match(
&callback_data.modules_callback,
&callback_data.which,
&callback_data.warnings_callback,
&callback_data.console_callback))
&callback_data.console_callback,
&callback_data.allow_duplicate_metadata))
{
if (filepath == NULL && data.buf == NULL && pid == -1)
{
......@@ -1729,6 +1748,9 @@ static PyObject* Rules_match(
}
}
if (callback_data.allow_duplicate_metadata == NULL)
callback_data.allow_duplicate_metadata = false;
if (yr_scanner_create(object->rules, &scanner) != 0)
{
return PyErr_Format(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment