Add cmdline flag for printing analysis statistics (#210)

ded50dd3 · Enkelmann · GitHub · 8fb5f559 · ded50dd3 · ded50dd3
Unverified Commit ded50dd3 authored Aug 04, 2021 by Enkelmann Committed by GitHub Aug 04, 2021
4 changed files
--- a/src/caller/src/main.rs
+++ b/src/caller/src/main.rs
@@ -46,6 +46,11 @@ struct CmdlineArgs {
    #[structopt(long, short)]
    quiet: bool,

+    /// Include various statistics in the debug log messages.
+    /// This can be helpful for assessing the analysis quality for the input binary.
+    #[structopt(long, conflicts_with("quiet"))]
+    statistics: bool,
+
    /// Path to a configuration file for analysis of bare metal binaries.
    ///
    /// If this option is set then the input binary is treated as a bare metal binary regardless of its format.
@@ -172,7 +177,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
        .iter()
        .any(|module| modules_depending_on_pointer_inference.contains(&module.name))
    {
-        Some(analysis_results.compute_pointer_inference(&config["Memory"]))
+        Some(analysis_results.compute_pointer_inference(&config["Memory"], args.statistics))
    } else {
        None
    };
@@ -189,6 +194,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
            &control_flow_graph,
            serde_json::from_value(config["Memory"].clone()).unwrap(),
            true,
+            false,
        );
        return;
    }

--- a/src/cwe_checker_lib/src/analysis/pointer_inference/mod.rs
+++ b/src/cwe_checker_lib/src/analysis/pointer_inference/mod.rs
@@ -45,6 +45,7 @@ mod context;
 pub mod object;
 mod object_list;
 mod state;
+mod statistics;

 use context::Context;
 pub use state::State;
@@ -94,6 +95,7 @@ impl<'a> PointerInference<'a> {
        control_flow_graph: &'a Graph<'a>,
        config: Config,
        log_sender: crossbeam_channel::Sender<LogThreadMsg>,
+        print_stats: bool,
    ) -> PointerInference<'a> {
        let context = Context::new(
            project,
@@ -148,10 +150,12 @@ impl<'a> PointerInference<'a> {
            .collect();
        let mut fixpoint_computation =
            super::forward_interprocedural_fixpoint::create_computation(context, None);
+        if print_stats {
            let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!(
                "Pointer Inference: Adding {} entry points",
                entry_sub_to_entry_node_map.len()
            ))));
+        }
        for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
            fixpoint_computation.set_node_value(
                start_node_index,
@@ -239,7 +243,12 @@ impl<'a> PointerInference<'a> {
    /// and do not have a state assigned to them yet, as additional entry points.
    ///
    /// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
-    fn add_speculative_entry_points(&mut self, project: &Project, only_cfg_roots: bool) {
+    fn add_speculative_entry_points(
+        &mut self,
+        project: &Project,
+        only_cfg_roots: bool,
+        print_stats: bool,
+    ) {
        // TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
        let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
        for sub in project.program.term.subs.iter() {
@@ -272,10 +281,12 @@ impl<'a> PointerInference<'a> {
                }
            }
        }
+        if print_stats {
            self.log_debug(format!(
                "Pointer Inference: Adding {} speculative entry points",
                new_entry_points.len()
            ));
+        }
        for entry in new_entry_points {
            let sub_tid = start_block_to_sub_map
                [&self.computation.get_graph()[entry].get_block().tid]
@@ -319,17 +330,23 @@ impl<'a> PointerInference<'a> {
    /// Compute the results of the pointer inference fixpoint algorithm.
    /// Successively adds more functions as possible entry points
    /// to increase code coverage.
-    pub fn compute_with_speculative_entry_points(&mut self, project: &Project) {
+    pub fn compute_with_speculative_entry_points(&mut self, project: &Project, print_stats: bool) {
        self.compute();
+        if print_stats {
            self.count_blocks_with_state();
+        }
        // Now compute again with speculative entry points added
-        self.add_speculative_entry_points(project, true);
+        self.add_speculative_entry_points(project, true, print_stats);
        self.compute();
+        if print_stats {
            self.count_blocks_with_state();
+        }
        // Now compute again with all missed functions as additional entry points
-        self.add_speculative_entry_points(project, false);
+        self.add_speculative_entry_points(project, false, print_stats);
        self.compute();
+        if print_stats {
            self.count_blocks_with_state();
+        }

        if !self.computation.has_stabilized() {
            let worklist_size = self.computation.get_worklist().len();
@@ -338,6 +355,9 @@ impl<'a> PointerInference<'a> {
                worklist_size
            ));
        }
+        if print_stats {
+            statistics::compute_and_log_mem_access_stats(self);
+        }
    }

    /// Print information on dead ends in the control flow graph for debugging purposes.
@@ -438,6 +458,7 @@ pub fn run<'a>(
    control_flow_graph: &'a Graph<'a>,
    config: Config,
    print_debug: bool,
+    print_stats: bool,
 ) -> PointerInference<'a> {
    let logging_thread = LogThread::spawn(collect_all_logs);

@@ -447,9 +468,10 @@ pub fn run<'a>(
        control_flow_graph,
        config,
        logging_thread.get_msg_sender(),
+        print_stats,
    );

-    computation.compute_with_speculative_entry_points(project);
+    computation.compute_with_speculative_entry_points(project, print_stats);

    if print_debug {
        computation.print_compact_json();
@@ -513,7 +535,7 @@ mod tests {
                deallocation_symbols: vec!["free".to_string()],
            };
            let (log_sender, _) = crossbeam_channel::unbounded();
-            PointerInference::new(project, mem_image, graph, config, log_sender)
+            PointerInference::new(project, mem_image, graph, config, log_sender, false)
        }

        pub fn set_node_value(&mut self, node_value: State, node_index: NodeIndex) {

--- a/src/cwe_checker_lib/src/analysis/pointer_inference/statistics.rs
+++ b/src/cwe_checker_lib/src/analysis/pointer_inference/statistics.rs
+use super::*;
+use crate::abstract_domain::TryToBitvec;
+use crossbeam_channel::Sender;
+
+/// Compute various statistics about how exact memory accesses through `Load` and `Store` instructions are tracked.
+/// Print the results as debug-log-messages.
+pub fn compute_and_log_mem_access_stats(pointer_inference: &PointerInference) {
+    MemAccessStats::compute_and_log(pointer_inference);
+}
+
+#[derive(Default)]
+struct MemAccessStats {
+    all_mem_ops: u64,
+    contains_top_flag: u64,
+    empty_errors: u64,
+    is_only_top: u64,
+    global_mem_access: u64,
+    current_stack_access: u64,
+    non_current_stack_access: u64,
+    exact_target_with_exact_offset: u64,
+    exact_target_with_top_offset: u64,
+}
+
+impl MemAccessStats {
+    fn tracked_mem_ops(&self) -> u64 {
+        self.all_mem_ops - self.is_only_top - self.contains_top_flag - self.empty_errors
+    }
+
+    fn ops_with_exact_target_known(&self) -> u64 {
+        self.global_mem_access + self.current_stack_access + self.non_current_stack_access
+    }
+
+    fn print_general_stats(&self, log_collector: Sender<LogThreadMsg>) {
+        let all_mem_ops = self.all_mem_ops as f64;
+        let msg = format!(
+            "Pointer Inference: {} memory operations.\n\
+            \t{:.2}% tracked,\n\
+            \t{:.2}% partially tracked,\n\
+            \t{:.2}% untracked,\n\
+            \t{:.2}% errors.",
+            self.all_mem_ops,
+            self.tracked_mem_ops() as f64 / all_mem_ops * 100.,
+            self.contains_top_flag as f64 / all_mem_ops * 100.,
+            self.is_only_top as f64 / all_mem_ops * 100.,
+            self.empty_errors as f64 / all_mem_ops * 100.,
+        );
+        let log_msg = LogMessage::new_debug(msg);
+        let _ = log_collector.send(LogThreadMsg::Log(log_msg));
+    }
+
+    fn print_tracked_mem_ops_stats(&self, log_collector: Sender<LogThreadMsg>) {
+        let all_mem_ops = self.all_mem_ops as f64;
+        let msg = format!(
+            "Pointer Inference: {} ({:.2}%) memory operations with exactly known target. Of these are\n\
+            \t{:.2}% global memory access,\n\
+            \t{:.2}% current stack access,\n\
+            \t{:.2}% other (heap or stack) access,\n\
+            \t{:.2}% with constant offset,\n\
+            \t{:.2}% with unknown offset.",
+            self.ops_with_exact_target_known(),
+            self.ops_with_exact_target_known() as f64 / all_mem_ops * 100.,
+            self.global_mem_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
+            self.current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
+            self.non_current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
+            self.exact_target_with_exact_offset as f64 / self.ops_with_exact_target_known() as f64 * 100.,
+            self.exact_target_with_top_offset as f64 / self.ops_with_exact_target_known() as f64 * 100.,
+        );
+        let log_msg = LogMessage::new_debug(msg);
+        let _ = log_collector.send(LogThreadMsg::Log(log_msg));
+    }
+
+    fn count_for_def(&mut self, state: &State, def: &Term<Def>) {
+        use crate::abstract_domain::AbstractDomain;
+        match &def.term {
+            Def::Load { address, .. } | Def::Store { address, .. } => {
+                self.all_mem_ops += 1;
+                let address_val = state.eval(address);
+                if address_val.is_empty() {
+                    self.empty_errors += 1;
+                }
+                if address_val.is_top() {
+                    self.is_only_top += 1;
+                } else if address_val.contains_top() {
+                    self.contains_top_flag += 1;
+                }
+
+                if let Some(offset) = address_val.get_if_absolute_value() {
+                    self.global_mem_access += 1;
+                    if offset.try_to_bitvec().is_ok() {
+                        self.exact_target_with_exact_offset += 1;
+                    } else if offset.is_top() {
+                        self.exact_target_with_top_offset += 1;
+                    }
+                } else if let Some((id, offset)) = address_val.get_if_unique_target() {
+                    if *id == state.stack_id {
+                        self.current_stack_access += 1;
+                    } else {
+                        self.non_current_stack_access += 1;
+                    }
+                    if offset.try_to_bitvec().is_ok() {
+                        self.exact_target_with_exact_offset += 1;
+                    } else if offset.is_top() {
+                        self.exact_target_with_top_offset += 1;
+                    }
+                }
+            }
+            Def::Assign { .. } => (),
+        }
+    }
+
+    fn compute_and_log(pointer_inference: &PointerInference) {
+        use crate::analysis::forward_interprocedural_fixpoint::Context as _;
+
+        let mut stats = Self::default();
+        let graph = pointer_inference.computation.get_graph();
+        let context = pointer_inference.get_context();
+        for (node_id, node) in graph.node_references() {
+            if let Node::BlkStart(block, _sub) = node {
+                if let Some(state) = pointer_inference.computation.get_node_value(node_id) {
+                    let mut state = state.unwrap_value().clone();
+                    for def in &block.term.defs {
+                        stats.count_for_def(&state, def);
+                        state = match context.update_def(&state, def) {
+                            Some(new_state) => new_state,
+                            None => break,
+                        }
+                    }
+                }
+            }
+        }
+        stats.print_general_stats(pointer_inference.log_collector.clone());
+        stats.print_tracked_mem_ops_stats(pointer_inference.log_collector.clone());
+    }
+}
--- a/src/cwe_checker_lib/src/lib.rs
+++ b/src/cwe_checker_lib/src/lib.rs
@@ -161,13 +161,18 @@ impl<'a> AnalysisResults<'a> {

    /// Compute the pointer inference analysis.
    /// The result gets returned, but not saved to the `AnalysisResults` struct itself.
-    pub fn compute_pointer_inference(&'a self, config: &serde_json::Value) -> PointerInference<'a> {
+    pub fn compute_pointer_inference(
+        &'a self,
+        config: &serde_json::Value,
+        print_stats: bool,
+    ) -> PointerInference<'a> {
        crate::analysis::pointer_inference::run(
            self.project,
            self.runtime_memory_image,
            self.control_flow_graph,
            serde_json::from_value(config.clone()).unwrap(),
            false,
+            print_stats,
        )
    }