Unverified Commit 1ebd9bcb by Enkelmann Committed by GitHub

Generate call trace information for CWE-119 check (#365)

parent 9ad700d7
//! Generate call graphs out of a program term.
use std::collections::HashMap;
use crate::intermediate_representation::*;
use petgraph::graph::DiGraph;
use petgraph::{graph::DiGraph, graph::NodeIndex, visit::EdgeRef};
use std::collections::{BTreeSet, HashMap};
/// The graph type of a call graph
pub type CallGraph<'a> = DiGraph<Tid, &'a Term<Jmp>>;
......@@ -40,26 +39,105 @@ pub fn get_program_callgraph(program: &Term<Program>) -> CallGraph {
callgraph
}
/// Collect and return all call TIDs of call sequences that start in the function given by the `source_sub_tid`
/// and end in the function given by the `target_sub_tid`.
pub fn find_call_sequences_to_target(
callgraph: &CallGraph,
source_sub_tid: &Tid,
target_sub_tid: &Tid,
) -> BTreeSet<Tid> {
let source_node = callgraph
.node_indices()
.find(|node| callgraph[*node] == *source_sub_tid)
.unwrap_or_else(|| panic!("Function TID not found in call graph."));
find_call_sequences_from_node_to_target(callgraph, source_node, target_sub_tid, BTreeSet::new())
}
/// Recursively collects all call TIDs of call sequences that start in the function given by the `source_node` in the call graph
/// and end in the function given by the `target_sub_tid`.
fn find_call_sequences_from_node_to_target(
callgraph: &CallGraph,
source_node: NodeIndex,
target_sub_tid: &Tid,
visited_nodes: BTreeSet<NodeIndex>,
) -> BTreeSet<Tid> {
let mut call_tids = BTreeSet::new();
for edge in callgraph.edges_directed(source_node, petgraph::Direction::Outgoing) {
let (_, target_node) = callgraph.edge_endpoints(edge.id()).unwrap();
if callgraph[target_node] == *target_sub_tid {
call_tids.insert(edge.weight().tid.clone());
} else if !visited_nodes.contains(&target_node) {
let mut recursive_visited = visited_nodes.clone();
recursive_visited.insert(target_node);
let recursive_tids = find_call_sequences_from_node_to_target(
callgraph,
target_node,
target_sub_tid,
recursive_visited,
);
if !recursive_tids.is_empty() {
call_tids.extend(recursive_tids.into_iter());
call_tids.insert(edge.weight().tid.clone());
}
}
}
call_tids
}
#[cfg(test)]
pub mod tests {
use super::*;
use std::collections::BTreeMap;
/// Mock a function with calls to the given list of Sub-TIDs.
/// Each call gets a unique ID, so that the edges in the call graph will be distinguishable.
fn mock_sub_with_calls(sub_tid: &str, call_targets: &[&str]) -> Term<Sub> {
let mut sub = Sub::mock(sub_tid);
for (i, target) in call_targets.iter().enumerate() {
let call = Jmp::Call {
target: Tid::new(target),
return_: None,
};
let mut block = Blk::mock();
block.term.jmps.push(Term {
tid: Tid::new(format!("{}_call_{}_{}", sub_tid, target, i)),
term: call,
});
sub.term.blocks.push(block);
}
sub
}
#[test]
fn test_find_call_sequences_to_target() {
let mut project = Project::mock_x64();
let sub1 = mock_sub_with_calls("sub1", &["sub2", "sub2"]);
let sub2 = mock_sub_with_calls("sub2", &["sub3", "sub4"]);
let sub3 = mock_sub_with_calls("sub3", &[]);
let sub4 = mock_sub_with_calls("sub4", &[]);
project.program.term.subs = BTreeMap::from([
(Tid::new("sub1"), sub1),
(Tid::new("sub2"), sub2),
(Tid::new("sub3"), sub3),
(Tid::new("sub4"), sub4),
]);
let callgraph = get_program_callgraph(&project.program);
let call_tids =
find_call_sequences_to_target(&callgraph, &Tid::new("sub1"), &Tid::new("sub3"));
let call_tids: Vec<_> = call_tids.iter().map(|tid| format!("{}", tid)).collect();
assert_eq!(call_tids.len(), 3);
// Note that the order of elements is important in the sense that it needs to be deterministic.
assert_eq!(&call_tids[0], "sub1_call_sub2_0");
assert_eq!(&call_tids[1], "sub1_call_sub2_1");
assert_eq!(&call_tids[2], "sub2_call_sub3_0");
}
#[test]
fn test_get_program_callgraph() {
// Create a program with 2 functions and one call between them
let mut project = Project::mock_x64();
let mut caller = Sub::mock("caller");
let callee = Sub::mock("callee");
let call = Jmp::Call {
target: Tid::new("callee"),
return_: None,
};
let mut call_block = Blk::mock();
call_block.term.jmps.push(Term {
tid: Tid::new("call"),
term: call,
});
caller.term.blocks.push(call_block);
let caller = mock_sub_with_calls("caller", &["callee"]);
let callee = mock_sub_with_calls("callee", &[]);
project.program.term.subs.insert(Tid::new("caller"), caller);
project.program.term.subs.insert(Tid::new("callee"), callee);
// Test correctness of the call graph
......
use crate::abstract_domain::*;
use crate::analysis::callgraph::CallGraph;
use crate::analysis::function_signature::FunctionSignature;
use crate::analysis::graph::Graph;
use crate::analysis::pointer_inference::{Data, PointerInference};
......@@ -38,6 +39,8 @@ pub struct Context<'a> {
pub malloc_tid_to_object_size_map: HashMap<Tid, Data>,
/// A map that maps the TIDs of jump instructions to the function TID of the caller.
pub call_to_caller_fn_map: HashMap<Tid, Tid>,
/// The callgraph corresponding to the project.
pub callgraph: CallGraph<'a>,
/// A sender channel that can be used to collect logs in the corresponding logging thread.
pub log_collector: crossbeam_channel::Sender<LogThreadMsg>,
}
......@@ -52,6 +55,7 @@ impl<'a> Context<'a> {
'a: 'b,
{
let project = analysis_results.project;
let callgraph = crate::analysis::callgraph::get_program_callgraph(&project.program);
Context {
project,
graph: analysis_results.control_flow_graph,
......@@ -63,6 +67,7 @@ impl<'a> Context<'a> {
),
malloc_tid_to_object_size_map: compute_size_values_of_malloc_calls(analysis_results),
call_to_caller_fn_map: compute_call_to_caller_map(project),
callgraph,
log_collector,
}
}
......
......@@ -87,6 +87,18 @@ impl State {
) = context.compute_bounds_of_id(id, &self.stack_id)
{
out_of_bounds_access_warnings.push(format!("The object bound is based on the possible source value {:#} for the object ID.", source.to_json_compact()));
let call_sequence_tids = collect_tids_for_cwe_warning(
source.get_if_unique_target().unwrap().0,
self,
context,
);
out_of_bounds_access_warnings
.push(format!("Relevant callgraph TIDs: [{}]", call_sequence_tids));
} else {
out_of_bounds_access_warnings.push(format!(
"Relevant callgraph TIDs: [{}]",
self.stack_id.get_tid()
));
}
// Replace the bound with `Top` to prevent duplicate CWE warnings with the same root cause.
self.object_lower_bounds
......@@ -110,6 +122,18 @@ impl State {
) = context.compute_bounds_of_id(id, &self.stack_id)
{
out_of_bounds_access_warnings.push(format!("The object bound is based on the possible source value {:#} for the object ID.", source.to_json_compact()));
let call_sequence_tids = collect_tids_for_cwe_warning(
source.get_if_unique_target().unwrap().0,
self,
context,
);
out_of_bounds_access_warnings
.push(format!("Relevant callgraph TIDs: [{}]", call_sequence_tids));
} else {
out_of_bounds_access_warnings.push(format!(
"Relevant callgraph TIDs: [{}]",
self.stack_id.get_tid()
));
}
// Replace the bound with `Top` to prevent duplicate CWE warnings with the same root cause.
self.object_upper_bounds
......@@ -194,6 +218,58 @@ impl State {
}
}
/// Collect all relevant call sequence TIDs corresponding to a CWE warning.
/// This includes:
/// - The TID of a root function from which both the allocation site and the site of the CWE warning can be reached
/// - All call TID that are relevant for reaching the allocation site from the root function.
/// - All call TIDs that are relevant for reachting the site of the CWE warning.
/// This list is complete in the sense that all possible paths in the call graph from the root function to the CWE warning site
/// are covered by these calls.
///
/// The resulting list is returned as a string,
/// as it is currently only used for human-readable context information in the CWE warnings.
fn collect_tids_for_cwe_warning(
id: &AbstractIdentifier,
state: &State,
context: &Context,
) -> String {
use crate::analysis::callgraph::find_call_sequences_to_target;
let caller_tid = if context.project.program.term.subs.contains_key(id.get_tid()) {
// The ID is the stack ID of some function.
id.get_tid().clone()
} else {
// The ID corresponds to a malloc-like call
let root_call_tid = if let Some(root_call) = id.get_path_hints().last() {
root_call
} else {
id.get_tid()
};
context
.project
.program
.term
.find_sub_containing_jump(root_call_tid)
.expect("Caller corresponding to call does not exist.")
};
let mut tids = Vec::new();
tids.push(caller_tid.clone());
tids.extend(id.get_path_hints().iter().cloned());
if caller_tid != *state.stack_id.get_tid() {
// We also need the possible call sequences from the caller to the current function
let call_sequence_tids = find_call_sequences_to_target(
&context.callgraph,
&caller_tid,
state.stack_id.get_tid(),
);
tids.extend(call_sequence_tids.into_iter());
}
// Build a string out of the TID list
tids.iter()
.map(|tid| format!("{}", tid))
.reduce(|accum, elem| format!("{}, {}", accum, elem))
.unwrap()
}
#[cfg(test)]
pub mod tests {
use super::*;
......@@ -242,7 +318,7 @@ pub mod tests {
state
.check_address_access(&address, ByteSize::new(8), &context)
.len(),
1
2
);
// subsequent errors are suppressed
let address = Data::from_target(stack_id, Bitvector::from_i64(8).into());
......
......@@ -32,6 +32,22 @@ impl Program {
.flat_map(|(_, sub)| sub.term.blocks.iter())
.find(|block| block.tid == *tid)
}
/// Find the sub containing a specific jump instruction (including call instructions).
/// WARNING: The function simply iterates though all blocks,
/// i.e. it is very inefficient for large projects!
pub fn find_sub_containing_jump(&self, jmp_tid: &Tid) -> Option<Tid> {
for sub in self.subs.values() {
for blk in &sub.term.blocks {
for jmp in &blk.term.jmps {
if &jmp.tid == jmp_tid {
return Some(sub.tid.clone());
}
}
}
}
None
}
}
#[cfg(test)]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment