implemented Cwe789 check (#347)

46bf2bd2 · van den Bosch · GitHub · 99042d01 · 46bf2bd2 · 46bf2bd2
Unverified Commit 46bf2bd2 authored 2 years ago by van den Bosch Committed by GitHub 2 years ago
9 changed files
--- a/src/caller/src/main.rs
+++ b/src/caller/src/main.rs
@@ -176,8 +176,9 @@ fn run_with_ghidra(args: &CmdlineArgs) {
    let analysis_results = AnalysisResults::new(&binary, &control_flow_graph, &project);
    let modules_depending_on_string_abstraction = BTreeSet::from_iter(["CWE78"]);
-    let modules_depending_on_pointer_inference =
+    let modules_depending_on_pointer_inference = BTreeSet::from_iter([
-        BTreeSet::from_iter(["CWE119", "CWE134", "CWE416", "CWE476", "Memory", "CWE190"]);
+        "CWE119", "CWE134", "CWE416", "CWE476", "Memory", "CWE190", "CWE789",
+    ]);
    let string_abstraction_needed = modules
        .iter()

--- a/src/config.json
+++ b/src/config.json
@@ -214,6 +214,16 @@
  "CWE782": {
    "symbols": []
  },
+  "CWE789": {
+    "stack_threshold": 7500,
+    "heap_threshold": 1000000,
+    "symbols": [
+      "xmalloc",
+      "malloc",
+      "realloc",
+      "calloc"
+    ]
+  },
  "check_path": {
    "_comment": "functions that take direct user input",
    "symbols": [

--- a/src/cwe_checker_lib/src/abstract_domain/mod.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/mod.rs
@@ -76,15 +76,19 @@ pub trait HasTop {
 /// It has a *Top* element, which is only characterized by its bytesize.
 pub trait RegisterDomain: AbstractDomain + SizedDomain + HasTop {
    /// Compute the (abstract) result of a binary operation
+    #[must_use]
    fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self;
    /// Compute the (abstract) result of a unary operation
+    #[must_use]
    fn un_op(&self, op: UnOpType) -> Self;
    /// Extract a sub-bitvector
+    #[must_use]
    fn subpiece(&self, low_byte: ByteSize, size: ByteSize) -> Self;
    /// Perform a typecast to extend a bitvector or to cast between integer and floating point types.
+    #[must_use]
    fn cast(&self, kind: CastOpType, width: ByteSize) -> Self;
    /// Return the bytesize of the result of the given binary operation.

--- a/src/cwe_checker_lib/src/checkers.rs
+++ b/src/cwe_checker_lib/src/checkers.rs
@@ -20,3 +20,4 @@ pub mod cwe_560;
 pub mod cwe_676;
 pub mod cwe_78;
 pub mod cwe_782;
+pub mod cwe_789;
--- a/src/cwe_checker_lib/src/checkers/cwe_190.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_190.rs
@@ -121,8 +121,7 @@ fn calloc_parm_mul_is_top(pir: &PointerInference, jmp_tid: &Tid, parms: Vec<&Arg
        pir.eval_parameter_arg_at_call(jmp_tid, parms[0]),
        pir.eval_parameter_arg_at_call(jmp_tid, parms[1]),
    ) {
-        nmeb.bin_op(BinOpType::IntMult, &size);
+        return !contains_only_non_top_absolute_value(&nmeb.bin_op(BinOpType::IntMult, &size));
-        return !contains_only_non_top_absolute_value(&nmeb);
    }
    false

--- a/src/cwe_checker_lib/src/checkers/cwe_789.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_789.rs
+//! This module implements a check for CWE-789: Memory Allocation with Excessive Size Value.
+//!
+//! Stack memory allocation and function calls like malloc are covered in this module.
+//! Excessive allocation of memory might destabilize programs on machines with limited resources.
+//!
+//! See <https://cwe.mitre.org/data/definitions/789.html> for a detailed description.
+//!
+//! ## How the check works
+//!
+//! Every instruction is checked if it assigns a new value to the stack pointer. If
+//! this is the case, the value range of the assignment is checked and if it
+//! exceeds the defined `stack_threshold` defined in config.json, a warning is generated.
+//! For calls like malloc, the provided argument is checked, if its value exceeds
+//! the defined `heap_threshold`. The covered function calls are defined in config.json.
+//! The defined thresholds are provided in bytes.
+//!
+//! ## False Positives
+//!
+//! ## False Negatives
+//!
+//! - At most one warning for stack memory allocation is created for each Function. This means multiple weaknesses
+//! are not detected individually.
+use crate::abstract_domain::DataDomain;
+use crate::abstract_domain::IntervalDomain;
+use crate::abstract_domain::RegisterDomain;
+use crate::abstract_domain::TryToInterval;
+use crate::analysis::pointer_inference::PointerInference;
+use crate::analysis::vsa_results::*;
+use crate::intermediate_representation::*;
+use crate::utils::log::CweWarning;
+use crate::utils::log::LogMessage;
+use crate::utils::symbol_utils::get_callsites;
+use crate::utils::symbol_utils::get_symbol_map;
+use crate::AnalysisResults;
+use crate::CweModule;
+use serde::Deserialize;
+use serde::Serialize;
+/// The module name and version
+pub static CWE_MODULE: CweModule = CweModule {
+    name: "CWE789",
+    version: "0.1",
+    run: check_cwe,
+};
+/// The configuration struct.
+/// If a threshold is exceeded, the warning is generated.
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
+pub struct Config {
+    stack_threshold: u64,
+    heap_threshold: u64,
+    symbols: Vec<String>,
+}
+/// Determines if `def` is an assignment on the stackpointer.
+fn is_assign_on_sp(def: &Def, sp: &Variable) -> bool {
+    if let &Def::Assign { var, value: _ } = &def {
+        if var == sp {
+            return true;
+        }
+    }
+    false
+}
+/// Determines if the interval holds values exceeding the threshold for stack allocations.
+fn exceeds_threshold_on_stack(interval: DataDomain<IntervalDomain>, threshold: u64) -> bool {
+    for rel_interval in interval.get_relative_values().values() {
+        if let Ok(offset) = rel_interval.try_to_interval() {
+            if let Ok(start) = offset.start.try_to_i128() {
+                if start < -i128::from(threshold) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+/// Determines if the interval holds values exceeding the threshold for heap allocations.
+fn exceeds_threshold_on_call(interval: DataDomain<IntervalDomain>, threshold: u64) -> bool {
+    if let Some(interval) = interval.get_absolute_value() {
+        if let Ok(offset) = interval.try_to_interval() {
+            if let Ok(end) = offset.end.try_to_u128() {
+                if end > u128::from(threshold) {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+/// Checks if the multiplication of element count and size parameters exceeds the threshold.
+fn multiply_args_for_calloc(
+    pir: &PointerInference,
+    jmp_tid: &Tid,
+    parms: Vec<&Arg>,
+) -> Option<DataDomain<IntervalDomain>> {
+    if let (Some(nmeb), Some(size)) = (
+        pir.eval_parameter_arg_at_call(jmp_tid, parms[0]),
+        pir.eval_parameter_arg_at_call(jmp_tid, parms[1]),
+    ) {
+        return Some(nmeb.bin_op(BinOpType::IntMult, &size));
+    }
+    None
+}
+/// Generate the CWE warning for a detected instance of the CWE.
+fn generate_cwe_warning(allocation: &Tid, is_stack_allocation: bool) -> CweWarning {
+    CweWarning::new(
+        CWE_MODULE.name,
+        CWE_MODULE.version,
+        format!(
+            "(Large memory allocation) Potential{}memory exhaustion at 0x{}",
+            match is_stack_allocation {
+                true => " stack ",
+                false => " heap ",
+            },
+            allocation.address
+        ),
+    )
+    .tids(vec![format!("{}", allocation)])
+    .addresses(vec![allocation.address.clone()])
+    .symbols(vec![])
+}
+/// Run the CWE check.
+/// For each function, we check calls of the defined functions and instructions that
+/// assign a value to the stackpointer.
+pub fn check_cwe(
+    analysis_results: &AnalysisResults,
+    cwe_params: &serde_json::Value,
+) -> (Vec<LogMessage>, Vec<CweWarning>) {
+    let project = analysis_results.project;
+    let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
+    let mut cwe_warnings = Vec::new();
+    let pir = analysis_results.pointer_inference.unwrap();
+    let symbol_map = get_symbol_map(project, &config.symbols);
+    'functions: for sub in project.program.term.subs.values() {
+        // Function call allocation case
+        for (_, jump, symbol) in get_callsites(sub, &symbol_map) {
+            if let Some(interval) = match symbol.name.as_str() {
+                "calloc" => multiply_args_for_calloc(
+                    pir,
+                    &jump.tid,
+                    vec![&symbol.parameters[0], &symbol.parameters[1]],
+                ),
+                "realloc" => pir.eval_parameter_arg_at_call(&jump.tid, &symbol.parameters[1]),
+                _ => pir.eval_parameter_arg_at_call(&jump.tid, &symbol.parameters[0]),
+            } {
+                if exceeds_threshold_on_call(interval, config.heap_threshold) {
+                    cwe_warnings.push(generate_cwe_warning(&jump.tid, false));
+                }
+            }
+        }
+        // Stack allocation case
+        for blk in &sub.term.blocks {
+            let assign_on_sp: Vec<&Term<Def>> = blk
+                .term
+                .defs
+                .iter()
+                .filter(|x| is_assign_on_sp(&x.term, &project.stack_pointer_register))
+                .collect();
+            for assign in assign_on_sp {
+                if let Some(interval) = pir.eval_value_at_def(&assign.tid) {
+                    if exceeds_threshold_on_stack(interval, config.stack_threshold) {
+                        cwe_warnings.push(generate_cwe_warning(&assign.tid, true));
+                        continue 'functions;
+                    }
+                }
+            }
+        }
+    }
+    cwe_warnings.dedup();
+    (Vec::new(), cwe_warnings)
+}
--- a/src/cwe_checker_lib/src/lib.rs
+++ b/src/cwe_checker_lib/src/lib.rs
@@ -130,6 +130,7 @@ pub fn get_modules() -> Vec<&'static CweModule> {
        &crate::checkers::cwe_560::CWE_MODULE,
        &crate::checkers::cwe_676::CWE_MODULE,
        &crate::checkers::cwe_782::CWE_MODULE,
+        &crate::checkers::cwe_789::CWE_MODULE,
        &crate::analysis::pointer_inference::CWE_MODULE,
    ]
 }

--- a/test/artificial_samples/cwe_789.c
+++ b/test/artificial_samples/cwe_789.c
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+int main(int argc, char* argv[]){
+	char buff[0xF4250]; // dec: 1000016
+	malloc(0xF4250);
+	return 0;
+}
\ No newline at end of file
--- a/test/src/lib.rs
+++ b/test/src/lib.rs
@@ -16,6 +16,7 @@ pub const WINDOWS_ARCHITECTURES: &[&str] = &["x64", "x86"];
 pub const WINDOWS_COMPILERS: &[&str] = &["mingw32-gcc"];
 /// A test case containing the necessary information to run an acceptance test.
+#[derive(Debug, PartialEq, Eq, Hash, Clone)]
 pub struct CweTestCase {
    /// The name of the cwe (according to the test file)
    cwe: &'static str,
@@ -640,4 +641,26 @@ mod tests {
            panic!();
        }
    }
+    #[test]
+    #[ignore]
+    fn cwe_789() {
+        let mut error_log = Vec::new();
+        let mut tests = all_test_cases("cwe_789", "CWE789");
+        mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
+        mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
+        mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
+        for test_case in tests {
+            let num_expected_occurences = 2;
+            if let Err(error) = test_case.run_test("[CWE789]", num_expected_occurences) {
+                error_log.push((test_case.get_filepath(), error));
+            }
+        }
+        if !error_log.is_empty() {
+            print_errors(error_log);
+        }
+    }
 }