Implement new check for CWE-415 and CWE-416 (#318)

242c5325 · Enkelmann · GitHub · 26f9844d · 242c5325 · 242c5325
Unverified Commit 242c5325 authored Apr 28, 2022 by Enkelmann Committed by GitHub Apr 28, 2022
11 changed files
--- a/src/caller/src/main.rs
+++ b/src/caller/src/main.rs
@@ -178,7 +178,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
    let modules_depending_on_string_abstraction = BTreeSet::from_iter(["CWE78"]);
    let modules_depending_on_pointer_inference =
-        BTreeSet::from_iter(["CWE119", "CWE134", "CWE476", "Memory"]);
+        BTreeSet::from_iter(["CWE119", "CWE134", "CWE416", "CWE476", "Memory"]);
    let string_abstraction_needed = modules
        .iter()

--- a/src/cwe_checker_lib/src/analysis/pointer_inference/mod.rs
+++ b/src/cwe_checker_lib/src/analysis/pointer_inference/mod.rs
@@ -29,7 +29,7 @@
 use super::fixpoint::Computation;
 use super::forward_interprocedural_fixpoint::GeneralizedContext;
 use super::interprocedural_fixpoint_generic::NodeValue;
-use crate::abstract_domain::{DataDomain, IntervalDomain, SizedDomain};
+use crate::abstract_domain::{AbstractIdentifier, DataDomain, IntervalDomain, SizedDomain};
 use crate::analysis::forward_interprocedural_fixpoint::Context as _;
 use crate::analysis::graph::{Graph, Node};
 use crate::intermediate_representation::*;
@@ -37,7 +37,7 @@ use crate::prelude::*;
 use crate::utils::log::*;
 use petgraph::graph::NodeIndex;
 use petgraph::visit::IntoNodeReferences;
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 mod context;
 pub mod object;
@@ -96,6 +96,9 @@ pub struct PointerInference<'a> {
    /// Maps certain TIDs like the TIDs of [`Jmp`] instructions to the pointer inference state at that TID.
    /// The map will be filled after the fixpoint computation finished.
    states_at_tids: HashMap<Tid, State>,
+    /// Maps the TIDs of call instructions to a map mapping callee IDs to the corresponding value in the caller.
+    /// The map will be filled after the fixpoint computation finished.
+    id_renaming_maps_at_calls: HashMap<Tid, BTreeMap<AbstractIdentifier, Data>>,
 }
 impl<'a> PointerInference<'a> {
@@ -145,6 +148,7 @@ impl<'a> PointerInference<'a> {
            values_at_defs: HashMap::new(),
            addresses_at_defs: HashMap::new(),
            states_at_tids: HashMap::new(),
+            id_renaming_maps_at_calls: HashMap::new(),
        }
    }
@@ -256,12 +260,12 @@ impl<'a> PointerInference<'a> {
        let context = self.computation.get_context().get_context();
        let graph = self.computation.get_graph();
        for node in graph.node_indices() {
+            match graph[node] {
+                Node::BlkStart(blk, _sub) => {
                    let node_state = match self.computation.get_node_value(node) {
                        Some(NodeValue::Value(value)) => value,
                        _ => continue,
                    };
-            match graph[node] {
-                Node::BlkStart(blk, _sub) => {
                    let mut state = node_state.clone();
                    for def in &blk.term.defs {
                        match &def.term {
@@ -291,12 +295,40 @@ impl<'a> PointerInference<'a> {
                    }
                }
                Node::BlkEnd(blk, _sub) => {
+                    let node_state = match self.computation.get_node_value(node) {
+                        Some(NodeValue::Value(value)) => value,
+                        _ => continue,
+                    };
                    for jmp in &blk.term.jmps {
                        self.states_at_tids
                            .insert(jmp.tid.clone(), node_state.clone());
                    }
                }
-                Node::CallSource { .. } | Node::CallReturn { .. } => (),
+                Node::CallSource { .. } => (),
+                Node::CallReturn {
+                    call: (caller_blk, _caller_sub),
+                    return_: _,
+                } => {
+                    let call_tid = match caller_blk.term.jmps.get(0) {
+                        Some(call) => &call.tid,
+                        _ => continue,
+                    };
+                    let (state_before_call, state_before_return) =
+                        match self.computation.get_node_value(node) {
+                            Some(NodeValue::CallFlowCombinator {
+                                call_stub: Some(state_before_call),
+                                interprocedural_flow: Some(state_before_return),
+                            }) => (state_before_call, state_before_return),
+                            _ => continue,
+                        };
+                    let id_to_data_map = context.create_callee_id_to_caller_data_map(
+                        state_before_call,
+                        state_before_return,
+                        call_tid,
+                    );
+                    self.id_renaming_maps_at_calls
+                        .insert(call_tid.clone(), id_to_data_map);
+                }
            }
        }
    }
@@ -307,6 +339,18 @@ impl<'a> PointerInference<'a> {
        self.states_at_tids.get(jmp_tid)
    }
+    /// Get the mapping from callee IDs to caller values for the given call.
+    /// This function only yields results after the fixpoint has been computed.
+    ///
+    /// Note that the maps may contain mappings from callee IDs to temporary caller IDs that get instantly removed from the caller
+    /// since they are not referenced in any caller object.
+    pub fn get_id_renaming_map_at_call_tid(
+        &self,
+        call_tid: &Tid,
+    ) -> Option<&BTreeMap<AbstractIdentifier, Data>> {
+        self.id_renaming_maps_at_calls.get(call_tid)
+    }
    /// Print information on dead ends in the control flow graph for debugging purposes.
    /// Ignore returns where there is no known caller stack id.
    #[allow(dead_code)]

--- a/src/cwe_checker_lib/src/checkers.rs
+++ b/src/cwe_checker_lib/src/checkers.rs
@@ -12,6 +12,7 @@ pub mod cwe_215;
 pub mod cwe_243;
 pub mod cwe_332;
 pub mod cwe_367;
+pub mod cwe_416;
 pub mod cwe_426;
 pub mod cwe_467;
 pub mod cwe_476;

--- a/src/cwe_checker_lib/src/checkers/cwe_119/context/mod.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_119/context/mod.rs
@@ -39,19 +39,19 @@ pub struct Context<'a> {
 impl<'a> Context<'a> {
    /// Create a new context object.
-    pub fn new(
+    pub fn new<'b>(
-        project: &'a Project,
+        analysis_results: &'b AnalysisResults<'a>,
-        graph: &'a Graph<'a>,
-        pointer_inference: &'a PointerInference<'a>,
-        function_signatures: &'a BTreeMap<Tid, FunctionSignature>,
-        analysis_results: &AnalysisResults,
        log_collector: crossbeam_channel::Sender<LogThreadMsg>,
-    ) -> Self {
+    ) -> Context<'a>
+    where
+        'a: 'b,
+    {
+        let project = analysis_results.project;
        Context {
            project,
-            graph,
+            graph: analysis_results.control_flow_graph,
-            pointer_inference,
+            pointer_inference: analysis_results.pointer_inference.unwrap(),
-            function_signatures,
+            function_signatures: analysis_results.function_signatures.unwrap(),
            callee_to_callsites_map: compute_callee_to_call_sites_map(project),
            param_replacement_map: compute_param_replacement_map(analysis_results),
            malloc_tid_to_object_size_map: compute_size_values_of_malloc_calls(analysis_results),

--- a/src/cwe_checker_lib/src/checkers/cwe_119/context/tests.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_119/context/tests.rs
@@ -18,14 +18,7 @@ impl<'a> Context<'a> {
        let analysis_results = Box::leak(analysis_results);
        let (log_collector, _) = crossbeam_channel::unbounded();
-        Context::new(
+        Context::new(analysis_results, log_collector)
-            analysis_results.project,
-            analysis_results.control_flow_graph,
-            analysis_results.pointer_inference.unwrap(),
-            analysis_results.function_signatures.unwrap(),
-            analysis_results,
-            log_collector,
-        )
    }
 }

--- a/src/cwe_checker_lib/src/checkers/cwe_119/mod.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_119/mod.rs
@@ -65,14 +65,7 @@ pub fn check_cwe(
 ) -> (Vec<LogMessage>, Vec<CweWarning>) {
    let log_thread = LogThread::spawn(LogThread::collect_and_deduplicate);
-    let context = Context::new(
+    let context = Context::new(analysis_results, log_thread.get_msg_sender());
-        analysis_results.project,
-        analysis_results.control_flow_graph,
-        analysis_results.pointer_inference.unwrap(),
-        analysis_results.function_signatures.unwrap(),
-        analysis_results,
-        log_thread.get_msg_sender(),
-    );
    let mut fixpoint_computation =
        crate::analysis::forward_interprocedural_fixpoint::create_computation(context, None);
@@ -91,5 +84,7 @@ pub fn check_cwe(
    fixpoint_computation.compute_with_max_steps(100);
-    log_thread.collect()
+    let (logs, mut cwe_warnings) = log_thread.collect();
+    cwe_warnings.sort();
+    (logs, cwe_warnings)
 }
--- a/src/cwe_checker_lib/src/checkers/cwe_416/context.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_416/context.rs
--- a/src/cwe_checker_lib/src/checkers/cwe_416/mod.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_416/mod.rs
+//! This module implements a check for CWE-415: Double Free and CWE-416: Use After Free.
+//!
+//! If a program tries to reference memory objects or other resources after they have been freed
+//! it can lead to crashes, unexpected behaviour or even arbitrary code execution.
+//! The same is true if the program tries to free the same resource more than once
+//! as this can lead to another unrelated resource being freed instead.
+//!
+//! See <https://cwe.mitre.org/data/definitions/415.html> and <https://cwe.mitre.org/data/definitions/416.html> for detailed descriptions.
+//!
+//! ## How the check works
+//!
+//! Using an interprocedural, bottom-up dataflow analysis
+//! based on the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`)
+//! the check keeps track of memory objects that have already been freed.
+//! If a pointer to an already freed object is used to access memory or provided as a parameter to another function
+//! then a CWE warning is generated.
+//! To prevent duplicate CWE warnings with the same root cause
+//! the check also keeps track of objects for which a CWE warning was already generated.
+//!
+//! ## False Positives
+//!
+//! - Since the analysis is not path-sensitive, infeasible paths may lead to false positives.
+//! - Any analysis imprecision of the pointer inference analysis
+//! that leads to assuming that a pointer can target more memory objects that it actually can target
+//! may lead to false positive CWE warnings in this check.
+//!
+//! ## False Negatives
+//!
+//! - Arrays of memory objects are not tracked by this analysis as we currently cannot distinguish different array elements in the analysis.
+//! Subsequently, CWEs corresponding to arrays of memory objects are not detected.
+//! - Memory objects not tracked by the Pointer Inference analysis or pointer targets missed by the Pointer Inference
+//! may lead to missed CWEs in this check.
+//! - The analysis currently only tracks pointers to objects that were freed by a call to `free`.
+//! If a memory object is freed by another external function then this may lead to false negatives in this check.
+use crate::prelude::*;
+use crate::utils::log::CweWarning;
+use crate::utils::log::LogMessage;
+use crate::utils::log::LogThread;
+use crate::CweModule;
+/// The module name and version
+pub static CWE_MODULE: CweModule = CweModule {
+    name: "CWE416",
+    version: "0.3",
+    run: check_cwe,
+};
+mod context;
+use context::Context;
+mod state;
+use state::State;
+/// Run the check for CWE-416: Use After Free.
+///
+/// This function prepares the bottom-up fixpoint computation
+/// by initializing the state at the start of each function with the empty state (i.e. no dangling objects known)
+/// and then executing the fixpoint algorithm.
+/// Returns collected log messages and CWE warnings.
+pub fn check_cwe(
+    analysis_results: &AnalysisResults,
+    _config: &serde_json::Value,
+) -> (Vec<LogMessage>, Vec<CweWarning>) {
+    let log_thread = LogThread::spawn(LogThread::collect_and_deduplicate);
+    let context = Context::new(analysis_results, log_thread.get_msg_sender());
+    let mut fixpoint_computation =
+        crate::analysis::forward_interprocedural_fixpoint::create_computation(context, None);
+    for (sub_tid, entry_node_of_sub) in
+        crate::analysis::graph::get_entry_nodes_of_subs(analysis_results.control_flow_graph)
+    {
+        let fn_start_state = State::new(sub_tid);
+        fixpoint_computation.set_node_value(
+            entry_node_of_sub,
+            crate::analysis::interprocedural_fixpoint_generic::NodeValue::Value(fn_start_state),
+        );
+    }
+    fixpoint_computation.compute_with_max_steps(100);
+    let (logs, mut cwe_warnings) = log_thread.collect();
+    cwe_warnings.sort();
+    (logs, cwe_warnings)
+}
--- a/src/cwe_checker_lib/src/checkers/cwe_416/state.rs
+++ b/src/cwe_checker_lib/src/checkers/cwe_416/state.rs
--- a/src/cwe_checker_lib/src/lib.rs
+++ b/src/cwe_checker_lib/src/lib.rs
@@ -123,6 +123,7 @@ pub fn get_modules() -> Vec<&'static CweModule> {
        &crate::checkers::cwe_243::CWE_MODULE,
        &crate::checkers::cwe_332::CWE_MODULE,
        &crate::checkers::cwe_367::CWE_MODULE,
+        &crate::checkers::cwe_416::CWE_MODULE,
        &crate::checkers::cwe_426::CWE_MODULE,
        &crate::checkers::cwe_467::CWE_MODULE,
        &crate::checkers::cwe_476::CWE_MODULE,

--- a/test/src/lib.rs
+++ b/test/src/lib.rs
@@ -423,7 +423,7 @@ mod tests {
    #[ignore]
    fn cwe_415() {
        let mut error_log = Vec::new();
-        let mut tests = all_test_cases("cwe_415", "Memory");
+        let mut tests = all_test_cases("cwe_415", "CWE416");
        mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
        mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
@@ -451,7 +451,7 @@ mod tests {
    #[ignore]
    fn cwe_416() {
        let mut error_log = Vec::new();
-        let mut tests = all_test_cases("cwe_416", "Memory");
+        let mut tests = all_test_cases("cwe_416", "CWE416");
        mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
        mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.