Unverified Commit ded50dd3 by Enkelmann Committed by GitHub

Add cmdline flag for printing analysis statistics (#210)

parent 8fb5f559
...@@ -46,6 +46,11 @@ struct CmdlineArgs { ...@@ -46,6 +46,11 @@ struct CmdlineArgs {
#[structopt(long, short)] #[structopt(long, short)]
quiet: bool, quiet: bool,
/// Include various statistics in the debug log messages.
/// This can be helpful for assessing the analysis quality for the input binary.
#[structopt(long, conflicts_with("quiet"))]
statistics: bool,
/// Path to a configuration file for analysis of bare metal binaries. /// Path to a configuration file for analysis of bare metal binaries.
/// ///
/// If this option is set then the input binary is treated as a bare metal binary regardless of its format. /// If this option is set then the input binary is treated as a bare metal binary regardless of its format.
...@@ -172,7 +177,7 @@ fn run_with_ghidra(args: &CmdlineArgs) { ...@@ -172,7 +177,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
.iter() .iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name)) .any(|module| modules_depending_on_pointer_inference.contains(&module.name))
{ {
Some(analysis_results.compute_pointer_inference(&config["Memory"])) Some(analysis_results.compute_pointer_inference(&config["Memory"], args.statistics))
} else { } else {
None None
}; };
...@@ -189,6 +194,7 @@ fn run_with_ghidra(args: &CmdlineArgs) { ...@@ -189,6 +194,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
&control_flow_graph, &control_flow_graph,
serde_json::from_value(config["Memory"].clone()).unwrap(), serde_json::from_value(config["Memory"].clone()).unwrap(),
true, true,
false,
); );
return; return;
} }
......
...@@ -45,6 +45,7 @@ mod context; ...@@ -45,6 +45,7 @@ mod context;
pub mod object; pub mod object;
mod object_list; mod object_list;
mod state; mod state;
mod statistics;
use context::Context; use context::Context;
pub use state::State; pub use state::State;
...@@ -94,6 +95,7 @@ impl<'a> PointerInference<'a> { ...@@ -94,6 +95,7 @@ impl<'a> PointerInference<'a> {
control_flow_graph: &'a Graph<'a>, control_flow_graph: &'a Graph<'a>,
config: Config, config: Config,
log_sender: crossbeam_channel::Sender<LogThreadMsg>, log_sender: crossbeam_channel::Sender<LogThreadMsg>,
print_stats: bool,
) -> PointerInference<'a> { ) -> PointerInference<'a> {
let context = Context::new( let context = Context::new(
project, project,
...@@ -148,10 +150,12 @@ impl<'a> PointerInference<'a> { ...@@ -148,10 +150,12 @@ impl<'a> PointerInference<'a> {
.collect(); .collect();
let mut fixpoint_computation = let mut fixpoint_computation =
super::forward_interprocedural_fixpoint::create_computation(context, None); super::forward_interprocedural_fixpoint::create_computation(context, None);
if print_stats {
let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!( let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!(
"Pointer Inference: Adding {} entry points", "Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len() entry_sub_to_entry_node_map.len()
)))); ))));
}
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() { for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value( fixpoint_computation.set_node_value(
start_node_index, start_node_index,
...@@ -239,7 +243,12 @@ impl<'a> PointerInference<'a> { ...@@ -239,7 +243,12 @@ impl<'a> PointerInference<'a> {
/// and do not have a state assigned to them yet, as additional entry points. /// and do not have a state assigned to them yet, as additional entry points.
/// ///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots. /// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
fn add_speculative_entry_points(&mut self, project: &Project, only_cfg_roots: bool) { fn add_speculative_entry_points(
&mut self,
project: &Project,
only_cfg_roots: bool,
print_stats: bool,
) {
// TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them. // TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new(); let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
for sub in project.program.term.subs.iter() { for sub in project.program.term.subs.iter() {
...@@ -272,10 +281,12 @@ impl<'a> PointerInference<'a> { ...@@ -272,10 +281,12 @@ impl<'a> PointerInference<'a> {
} }
} }
} }
if print_stats {
self.log_debug(format!( self.log_debug(format!(
"Pointer Inference: Adding {} speculative entry points", "Pointer Inference: Adding {} speculative entry points",
new_entry_points.len() new_entry_points.len()
)); ));
}
for entry in new_entry_points { for entry in new_entry_points {
let sub_tid = start_block_to_sub_map let sub_tid = start_block_to_sub_map
[&self.computation.get_graph()[entry].get_block().tid] [&self.computation.get_graph()[entry].get_block().tid]
...@@ -319,17 +330,23 @@ impl<'a> PointerInference<'a> { ...@@ -319,17 +330,23 @@ impl<'a> PointerInference<'a> {
/// Compute the results of the pointer inference fixpoint algorithm. /// Compute the results of the pointer inference fixpoint algorithm.
/// Successively adds more functions as possible entry points /// Successively adds more functions as possible entry points
/// to increase code coverage. /// to increase code coverage.
pub fn compute_with_speculative_entry_points(&mut self, project: &Project) { pub fn compute_with_speculative_entry_points(&mut self, project: &Project, print_stats: bool) {
self.compute(); self.compute();
if print_stats {
self.count_blocks_with_state(); self.count_blocks_with_state();
}
// Now compute again with speculative entry points added // Now compute again with speculative entry points added
self.add_speculative_entry_points(project, true); self.add_speculative_entry_points(project, true, print_stats);
self.compute(); self.compute();
if print_stats {
self.count_blocks_with_state(); self.count_blocks_with_state();
}
// Now compute again with all missed functions as additional entry points // Now compute again with all missed functions as additional entry points
self.add_speculative_entry_points(project, false); self.add_speculative_entry_points(project, false, print_stats);
self.compute(); self.compute();
if print_stats {
self.count_blocks_with_state(); self.count_blocks_with_state();
}
if !self.computation.has_stabilized() { if !self.computation.has_stabilized() {
let worklist_size = self.computation.get_worklist().len(); let worklist_size = self.computation.get_worklist().len();
...@@ -338,6 +355,9 @@ impl<'a> PointerInference<'a> { ...@@ -338,6 +355,9 @@ impl<'a> PointerInference<'a> {
worklist_size worklist_size
)); ));
} }
if print_stats {
statistics::compute_and_log_mem_access_stats(self);
}
} }
/// Print information on dead ends in the control flow graph for debugging purposes. /// Print information on dead ends in the control flow graph for debugging purposes.
...@@ -438,6 +458,7 @@ pub fn run<'a>( ...@@ -438,6 +458,7 @@ pub fn run<'a>(
control_flow_graph: &'a Graph<'a>, control_flow_graph: &'a Graph<'a>,
config: Config, config: Config,
print_debug: bool, print_debug: bool,
print_stats: bool,
) -> PointerInference<'a> { ) -> PointerInference<'a> {
let logging_thread = LogThread::spawn(collect_all_logs); let logging_thread = LogThread::spawn(collect_all_logs);
...@@ -447,9 +468,10 @@ pub fn run<'a>( ...@@ -447,9 +468,10 @@ pub fn run<'a>(
control_flow_graph, control_flow_graph,
config, config,
logging_thread.get_msg_sender(), logging_thread.get_msg_sender(),
print_stats,
); );
computation.compute_with_speculative_entry_points(project); computation.compute_with_speculative_entry_points(project, print_stats);
if print_debug { if print_debug {
computation.print_compact_json(); computation.print_compact_json();
...@@ -513,7 +535,7 @@ mod tests { ...@@ -513,7 +535,7 @@ mod tests {
deallocation_symbols: vec!["free".to_string()], deallocation_symbols: vec!["free".to_string()],
}; };
let (log_sender, _) = crossbeam_channel::unbounded(); let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(project, mem_image, graph, config, log_sender) PointerInference::new(project, mem_image, graph, config, log_sender, false)
} }
pub fn set_node_value(&mut self, node_value: State, node_index: NodeIndex) { pub fn set_node_value(&mut self, node_value: State, node_index: NodeIndex) {
......
use super::*;
use crate::abstract_domain::TryToBitvec;
use crossbeam_channel::Sender;
/// Compute various statistics about how exact memory accesses through `Load` and `Store` instructions are tracked.
/// Print the results as debug-log-messages.
pub fn compute_and_log_mem_access_stats(pointer_inference: &PointerInference) {
MemAccessStats::compute_and_log(pointer_inference);
}
#[derive(Default)]
struct MemAccessStats {
all_mem_ops: u64,
contains_top_flag: u64,
empty_errors: u64,
is_only_top: u64,
global_mem_access: u64,
current_stack_access: u64,
non_current_stack_access: u64,
exact_target_with_exact_offset: u64,
exact_target_with_top_offset: u64,
}
impl MemAccessStats {
fn tracked_mem_ops(&self) -> u64 {
self.all_mem_ops - self.is_only_top - self.contains_top_flag - self.empty_errors
}
fn ops_with_exact_target_known(&self) -> u64 {
self.global_mem_access + self.current_stack_access + self.non_current_stack_access
}
fn print_general_stats(&self, log_collector: Sender<LogThreadMsg>) {
let all_mem_ops = self.all_mem_ops as f64;
let msg = format!(
"Pointer Inference: {} memory operations.\n\
\t{:.2}% tracked,\n\
\t{:.2}% partially tracked,\n\
\t{:.2}% untracked,\n\
\t{:.2}% errors.",
self.all_mem_ops,
self.tracked_mem_ops() as f64 / all_mem_ops * 100.,
self.contains_top_flag as f64 / all_mem_ops * 100.,
self.is_only_top as f64 / all_mem_ops * 100.,
self.empty_errors as f64 / all_mem_ops * 100.,
);
let log_msg = LogMessage::new_debug(msg);
let _ = log_collector.send(LogThreadMsg::Log(log_msg));
}
fn print_tracked_mem_ops_stats(&self, log_collector: Sender<LogThreadMsg>) {
let all_mem_ops = self.all_mem_ops as f64;
let msg = format!(
"Pointer Inference: {} ({:.2}%) memory operations with exactly known target. Of these are\n\
\t{:.2}% global memory access,\n\
\t{:.2}% current stack access,\n\
\t{:.2}% other (heap or stack) access,\n\
\t{:.2}% with constant offset,\n\
\t{:.2}% with unknown offset.",
self.ops_with_exact_target_known(),
self.ops_with_exact_target_known() as f64 / all_mem_ops * 100.,
self.global_mem_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.non_current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.exact_target_with_exact_offset as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.exact_target_with_top_offset as f64 / self.ops_with_exact_target_known() as f64 * 100.,
);
let log_msg = LogMessage::new_debug(msg);
let _ = log_collector.send(LogThreadMsg::Log(log_msg));
}
fn count_for_def(&mut self, state: &State, def: &Term<Def>) {
use crate::abstract_domain::AbstractDomain;
match &def.term {
Def::Load { address, .. } | Def::Store { address, .. } => {
self.all_mem_ops += 1;
let address_val = state.eval(address);
if address_val.is_empty() {
self.empty_errors += 1;
}
if address_val.is_top() {
self.is_only_top += 1;
} else if address_val.contains_top() {
self.contains_top_flag += 1;
}
if let Some(offset) = address_val.get_if_absolute_value() {
self.global_mem_access += 1;
if offset.try_to_bitvec().is_ok() {
self.exact_target_with_exact_offset += 1;
} else if offset.is_top() {
self.exact_target_with_top_offset += 1;
}
} else if let Some((id, offset)) = address_val.get_if_unique_target() {
if *id == state.stack_id {
self.current_stack_access += 1;
} else {
self.non_current_stack_access += 1;
}
if offset.try_to_bitvec().is_ok() {
self.exact_target_with_exact_offset += 1;
} else if offset.is_top() {
self.exact_target_with_top_offset += 1;
}
}
}
Def::Assign { .. } => (),
}
}
fn compute_and_log(pointer_inference: &PointerInference) {
use crate::analysis::forward_interprocedural_fixpoint::Context as _;
let mut stats = Self::default();
let graph = pointer_inference.computation.get_graph();
let context = pointer_inference.get_context();
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block, _sub) = node {
if let Some(state) = pointer_inference.computation.get_node_value(node_id) {
let mut state = state.unwrap_value().clone();
for def in &block.term.defs {
stats.count_for_def(&state, def);
state = match context.update_def(&state, def) {
Some(new_state) => new_state,
None => break,
}
}
}
}
}
stats.print_general_stats(pointer_inference.log_collector.clone());
stats.print_tracked_mem_ops_stats(pointer_inference.log_collector.clone());
}
}
...@@ -161,13 +161,18 @@ impl<'a> AnalysisResults<'a> { ...@@ -161,13 +161,18 @@ impl<'a> AnalysisResults<'a> {
/// Compute the pointer inference analysis. /// Compute the pointer inference analysis.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself. /// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_pointer_inference(&'a self, config: &serde_json::Value) -> PointerInference<'a> { pub fn compute_pointer_inference(
&'a self,
config: &serde_json::Value,
print_stats: bool,
) -> PointerInference<'a> {
crate::analysis::pointer_inference::run( crate::analysis::pointer_inference::run(
self.project, self.project,
self.runtime_memory_image, self.runtime_memory_image,
self.control_flow_graph, self.control_flow_graph,
serde_json::from_value(config.clone()).unwrap(), serde_json::from_value(config.clone()).unwrap(),
false, false,
print_stats,
) )
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment