Unverified Commit ded50dd3 by Enkelmann Committed by GitHub

Add cmdline flag for printing analysis statistics (#210)

parent 8fb5f559
......@@ -46,6 +46,11 @@ struct CmdlineArgs {
#[structopt(long, short)]
quiet: bool,
/// Include various statistics in the debug log messages.
/// This can be helpful for assessing the analysis quality for the input binary.
#[structopt(long, conflicts_with("quiet"))]
statistics: bool,
/// Path to a configuration file for analysis of bare metal binaries.
///
/// If this option is set then the input binary is treated as a bare metal binary regardless of its format.
......@@ -172,7 +177,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
.iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name))
{
Some(analysis_results.compute_pointer_inference(&config["Memory"]))
Some(analysis_results.compute_pointer_inference(&config["Memory"], args.statistics))
} else {
None
};
......@@ -189,6 +194,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
&control_flow_graph,
serde_json::from_value(config["Memory"].clone()).unwrap(),
true,
false,
);
return;
}
......
......@@ -45,6 +45,7 @@ mod context;
pub mod object;
mod object_list;
mod state;
mod statistics;
use context::Context;
pub use state::State;
......@@ -94,6 +95,7 @@ impl<'a> PointerInference<'a> {
control_flow_graph: &'a Graph<'a>,
config: Config,
log_sender: crossbeam_channel::Sender<LogThreadMsg>,
print_stats: bool,
) -> PointerInference<'a> {
let context = Context::new(
project,
......@@ -148,10 +150,12 @@ impl<'a> PointerInference<'a> {
.collect();
let mut fixpoint_computation =
super::forward_interprocedural_fixpoint::create_computation(context, None);
let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
))));
if print_stats {
let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
))));
}
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
......@@ -239,7 +243,12 @@ impl<'a> PointerInference<'a> {
/// and do not have a state assigned to them yet, as additional entry points.
///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
fn add_speculative_entry_points(&mut self, project: &Project, only_cfg_roots: bool) {
fn add_speculative_entry_points(
&mut self,
project: &Project,
only_cfg_roots: bool,
print_stats: bool,
) {
// TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
for sub in project.program.term.subs.iter() {
......@@ -272,10 +281,12 @@ impl<'a> PointerInference<'a> {
}
}
}
self.log_debug(format!(
"Pointer Inference: Adding {} speculative entry points",
new_entry_points.len()
));
if print_stats {
self.log_debug(format!(
"Pointer Inference: Adding {} speculative entry points",
new_entry_points.len()
));
}
for entry in new_entry_points {
let sub_tid = start_block_to_sub_map
[&self.computation.get_graph()[entry].get_block().tid]
......@@ -319,17 +330,23 @@ impl<'a> PointerInference<'a> {
/// Compute the results of the pointer inference fixpoint algorithm.
/// Successively adds more functions as possible entry points
/// to increase code coverage.
pub fn compute_with_speculative_entry_points(&mut self, project: &Project) {
pub fn compute_with_speculative_entry_points(&mut self, project: &Project, print_stats: bool) {
self.compute();
self.count_blocks_with_state();
if print_stats {
self.count_blocks_with_state();
}
// Now compute again with speculative entry points added
self.add_speculative_entry_points(project, true);
self.add_speculative_entry_points(project, true, print_stats);
self.compute();
self.count_blocks_with_state();
if print_stats {
self.count_blocks_with_state();
}
// Now compute again with all missed functions as additional entry points
self.add_speculative_entry_points(project, false);
self.add_speculative_entry_points(project, false, print_stats);
self.compute();
self.count_blocks_with_state();
if print_stats {
self.count_blocks_with_state();
}
if !self.computation.has_stabilized() {
let worklist_size = self.computation.get_worklist().len();
......@@ -338,6 +355,9 @@ impl<'a> PointerInference<'a> {
worklist_size
));
}
if print_stats {
statistics::compute_and_log_mem_access_stats(self);
}
}
/// Print information on dead ends in the control flow graph for debugging purposes.
......@@ -438,6 +458,7 @@ pub fn run<'a>(
control_flow_graph: &'a Graph<'a>,
config: Config,
print_debug: bool,
print_stats: bool,
) -> PointerInference<'a> {
let logging_thread = LogThread::spawn(collect_all_logs);
......@@ -447,9 +468,10 @@ pub fn run<'a>(
control_flow_graph,
config,
logging_thread.get_msg_sender(),
print_stats,
);
computation.compute_with_speculative_entry_points(project);
computation.compute_with_speculative_entry_points(project, print_stats);
if print_debug {
computation.print_compact_json();
......@@ -513,7 +535,7 @@ mod tests {
deallocation_symbols: vec!["free".to_string()],
};
let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(project, mem_image, graph, config, log_sender)
PointerInference::new(project, mem_image, graph, config, log_sender, false)
}
pub fn set_node_value(&mut self, node_value: State, node_index: NodeIndex) {
......
use super::*;
use crate::abstract_domain::TryToBitvec;
use crossbeam_channel::Sender;
/// Compute various statistics about how exact memory accesses through `Load` and `Store` instructions are tracked.
/// Print the results as debug-log-messages.
pub fn compute_and_log_mem_access_stats(pointer_inference: &PointerInference) {
MemAccessStats::compute_and_log(pointer_inference);
}
#[derive(Default)]
struct MemAccessStats {
all_mem_ops: u64,
contains_top_flag: u64,
empty_errors: u64,
is_only_top: u64,
global_mem_access: u64,
current_stack_access: u64,
non_current_stack_access: u64,
exact_target_with_exact_offset: u64,
exact_target_with_top_offset: u64,
}
impl MemAccessStats {
fn tracked_mem_ops(&self) -> u64 {
self.all_mem_ops - self.is_only_top - self.contains_top_flag - self.empty_errors
}
fn ops_with_exact_target_known(&self) -> u64 {
self.global_mem_access + self.current_stack_access + self.non_current_stack_access
}
fn print_general_stats(&self, log_collector: Sender<LogThreadMsg>) {
let all_mem_ops = self.all_mem_ops as f64;
let msg = format!(
"Pointer Inference: {} memory operations.\n\
\t{:.2}% tracked,\n\
\t{:.2}% partially tracked,\n\
\t{:.2}% untracked,\n\
\t{:.2}% errors.",
self.all_mem_ops,
self.tracked_mem_ops() as f64 / all_mem_ops * 100.,
self.contains_top_flag as f64 / all_mem_ops * 100.,
self.is_only_top as f64 / all_mem_ops * 100.,
self.empty_errors as f64 / all_mem_ops * 100.,
);
let log_msg = LogMessage::new_debug(msg);
let _ = log_collector.send(LogThreadMsg::Log(log_msg));
}
fn print_tracked_mem_ops_stats(&self, log_collector: Sender<LogThreadMsg>) {
let all_mem_ops = self.all_mem_ops as f64;
let msg = format!(
"Pointer Inference: {} ({:.2}%) memory operations with exactly known target. Of these are\n\
\t{:.2}% global memory access,\n\
\t{:.2}% current stack access,\n\
\t{:.2}% other (heap or stack) access,\n\
\t{:.2}% with constant offset,\n\
\t{:.2}% with unknown offset.",
self.ops_with_exact_target_known(),
self.ops_with_exact_target_known() as f64 / all_mem_ops * 100.,
self.global_mem_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.non_current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.exact_target_with_exact_offset as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.exact_target_with_top_offset as f64 / self.ops_with_exact_target_known() as f64 * 100.,
);
let log_msg = LogMessage::new_debug(msg);
let _ = log_collector.send(LogThreadMsg::Log(log_msg));
}
fn count_for_def(&mut self, state: &State, def: &Term<Def>) {
use crate::abstract_domain::AbstractDomain;
match &def.term {
Def::Load { address, .. } | Def::Store { address, .. } => {
self.all_mem_ops += 1;
let address_val = state.eval(address);
if address_val.is_empty() {
self.empty_errors += 1;
}
if address_val.is_top() {
self.is_only_top += 1;
} else if address_val.contains_top() {
self.contains_top_flag += 1;
}
if let Some(offset) = address_val.get_if_absolute_value() {
self.global_mem_access += 1;
if offset.try_to_bitvec().is_ok() {
self.exact_target_with_exact_offset += 1;
} else if offset.is_top() {
self.exact_target_with_top_offset += 1;
}
} else if let Some((id, offset)) = address_val.get_if_unique_target() {
if *id == state.stack_id {
self.current_stack_access += 1;
} else {
self.non_current_stack_access += 1;
}
if offset.try_to_bitvec().is_ok() {
self.exact_target_with_exact_offset += 1;
} else if offset.is_top() {
self.exact_target_with_top_offset += 1;
}
}
}
Def::Assign { .. } => (),
}
}
fn compute_and_log(pointer_inference: &PointerInference) {
use crate::analysis::forward_interprocedural_fixpoint::Context as _;
let mut stats = Self::default();
let graph = pointer_inference.computation.get_graph();
let context = pointer_inference.get_context();
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block, _sub) = node {
if let Some(state) = pointer_inference.computation.get_node_value(node_id) {
let mut state = state.unwrap_value().clone();
for def in &block.term.defs {
stats.count_for_def(&state, def);
state = match context.update_def(&state, def) {
Some(new_state) => new_state,
None => break,
}
}
}
}
}
stats.print_general_stats(pointer_inference.log_collector.clone());
stats.print_tracked_mem_ops_stats(pointer_inference.log_collector.clone());
}
}
......@@ -161,13 +161,18 @@ impl<'a> AnalysisResults<'a> {
/// Compute the pointer inference analysis.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_pointer_inference(&'a self, config: &serde_json::Value) -> PointerInference<'a> {
pub fn compute_pointer_inference(
&'a self,
config: &serde_json::Value,
print_stats: bool,
) -> PointerInference<'a> {
crate::analysis::pointer_inference::run(
self.project,
self.runtime_memory_image,
self.control_flow_graph,
serde_json::from_value(config.clone()).unwrap(),
false,
print_stats,
)
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment