Unverified Commit 09e4239c by Enkelmann Committed by GitHub

Refactor of the pointer inference module (#78)

parent e82c19ce
......@@ -43,7 +43,8 @@ impl<T: RegisterDomain> DataDomain<T> {
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
if let Self::Pointer(pointer) = self {
let remaining_targets: BTreeMap<AbstractIdentifier, T> = pointer
.iter_targets()
.targets()
.iter()
.filter_map(|(id, offset)| {
if ids_to_remove.get(id).is_none() {
Some((id.clone(), offset.clone()))
......@@ -98,8 +99,8 @@ impl<T: RegisterDomain> RegisterDomain for DataDomain<T> {
(Pointer(pointer), MINUS, Value(value)) => Pointer(pointer.sub_from_offset(value)),
(Pointer(pointer_lhs), MINUS, Pointer(pointer_rhs)) => {
if pointer_lhs.ids().len() == 1 && pointer_rhs.ids().len() == 1 {
let (id_lhs, offset_lhs) = pointer_lhs.iter_targets().next().unwrap();
let (id_rhs, offset_rhs) = pointer_rhs.iter_targets().next().unwrap();
let (id_lhs, offset_lhs) = pointer_lhs.targets().iter().next().unwrap();
let (id_rhs, offset_rhs) = pointer_rhs.targets().iter().next().unwrap();
if id_lhs == id_rhs {
Self::Value(offset_lhs.bin_op(MINUS, offset_rhs))
} else {
......@@ -214,7 +215,7 @@ impl<T: RegisterDomain + Display> DataDomain<T> {
match self {
Self::Top(bitsize) => serde_json::Value::String(format!("Top:{}", bitsize)),
Self::Pointer(pointer) => {
let target_iter = pointer.iter_targets().map(|(id, offset)| {
let target_iter = pointer.targets().iter().map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
......
......@@ -108,9 +108,9 @@ impl<T: RegisterDomain> PointerDomain<T> {
result
}
/// Get an iterator over all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn iter_targets(&self) -> std::collections::btree_map::Iter<AbstractIdentifier, T> {
self.0.iter()
/// Get all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn targets(&self) -> &BTreeMap<AbstractIdentifier, T> {
&self.0
}
/// Get an iterator over all abstract IDs that the pointer may target.
......
......@@ -50,21 +50,26 @@ pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;
/// The node type of an interprocedural control flow graph
///
/// Each node carries a pointer to its associated block with it.
/// For `CallReturn`nodes the associated block is the callsite block (containing the call instruction)
/// and *not* the return block (containing the return instruction).
/// For `CallReturn`nodes the associated blocks are both the callsite block (containing the call instruction)
/// and the returning-from block (containing the return instruction).
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Node<'a> {
BlkStart(&'a Term<Blk>),
BlkEnd(&'a Term<Blk>),
CallReturn(&'a Term<Blk>),
CallReturn {
call: &'a Term<Blk>,
return_: &'a Term<Blk>,
},
}
impl<'a> Node<'a> {
/// Get the block corresponding to the node.
/// Get the block corresponding to the node for `BlkStart` and `BlkEnd` nodes.
/// panics if called on a `CallReturn` node.
pub fn get_block(&self) -> &'a Term<Blk> {
use Node::*;
match self {
BlkStart(blk) | BlkEnd(blk) | CallReturn(blk) => blk,
BlkStart(blk) | BlkEnd(blk) => blk,
CallReturn { .. } => panic!("get_block() is undefined for CallReturn nodes"),
}
}
}
......@@ -74,7 +79,11 @@ impl<'a> std::fmt::Display for Node<'a> {
match self {
Self::BlkStart(block) => write!(formatter, "BlkStart @ {}", block.tid),
Self::BlkEnd(block) => write!(formatter, "BlkEnd @ {}", block.tid),
Self::CallReturn(block) => write!(formatter, "CallReturn (caller @ {})", block.tid),
Self::CallReturn { call, return_ } => write!(
formatter,
"CallReturn @ {} (caller @ {})",
return_.tid, call.tid
),
}
}
}
......@@ -203,8 +212,7 @@ impl<'a> GraphBuilder<'a> {
/// Add all outgoing edges generated by calls and interprocedural jumps for a specific block to the graph.
/// Return edges are *not* added by this function.
fn add_outgoing_edges(&mut self, node: NodeIndex) {
let block: &'a Term<Blk> = self.graph[node].get_block();
fn add_outgoing_edges(&mut self, node: NodeIndex, block: &'a Term<Blk>) {
let jumps = block.term.jmps.as_slice();
match jumps {
[] => (), // Blocks without jumps are dead ends corresponding to control flow reconstruction errors.
......@@ -231,13 +239,17 @@ impl<'a> GraphBuilder<'a> {
}
for (call_node, return_to_node) in self.return_addresses[&return_from_sub.tid].iter() {
let call_block = self.graph[*call_node].get_block();
let return_from_block = self.graph[return_source].get_block();
let call_term = call_block
.term
.jmps
.iter()
.find(|jump| matches!(jump.term.kind, JmpKind::Call(_)))
.unwrap();
let cr_combine_node = self.graph.add_node(Node::CallReturn(call_block));
let cr_combine_node = self.graph.add_node(Node::CallReturn {
call: call_block,
return_: return_from_block,
});
self.graph
.add_edge(*call_node, cr_combine_node, Edge::CRCallStub);
self.graph
......@@ -267,8 +279,8 @@ impl<'a> GraphBuilder<'a> {
/// Add all non-return-instruction-related jump edges to the graph.
fn add_jump_and_call_edges(&mut self) {
for node in self.graph.node_indices() {
if let Node::BlkEnd(_) = self.graph[node] {
self.add_outgoing_edges(node);
if let Node::BlkEnd(block) = self.graph[node] {
self.add_outgoing_edges(node, block);
}
}
}
......@@ -298,8 +310,8 @@ pub fn get_indices_of_block_nodes<'a, I: Iterator<Item = &'a Tid>>(
let tids: HashSet<Tid> = block_tids.cloned().collect();
let mut tid_to_indices_map = HashMap::new();
for node_index in graph.node_indices() {
if let Some(tid) = tids.get(&graph[node_index].get_block().tid) {
if let Node::BlkStart(_block_term) = graph[node_index] {
if let Node::BlkStart(block_term) = graph[node_index] {
if let Some(tid) = tids.get(&block_term.tid) {
let start_index = node_index;
let end_index = graph.neighbors(start_index).next().unwrap();
tid_to_indices_map.insert(tid.clone(), (start_index, end_index));
......
......@@ -85,6 +85,7 @@ pub trait Context<'a> {
value: &Self::Value,
value_before_call: Option<&Self::Value>,
call_term: &Term<Jmp>,
return_term: &Term<Jmp>,
) -> Option<Self::Value>;
/// Transition function for calls to functions not contained in the binary.
......@@ -158,9 +159,10 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
) -> Option<Self::NodeValue> {
let graph = self.context.get_graph();
let (start_node, end_node) = graph.edge_endpoints(edge).unwrap();
let block_term = graph.node_weight(start_node).unwrap().get_block();
match graph.edge_weight(edge).unwrap() {
Edge::Block => {
let block_term = graph.node_weight(start_node).unwrap().get_block();
let value = node_value.unwrap_value();
let defs = &block_term.term.defs;
let end_val = defs.iter().try_fold(value.clone(), |accum, def| {
......@@ -183,11 +185,18 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
Edge::CRCombine(call_term) => match node_value {
NodeValue::Value(_) => panic!("Unexpected interprocedural fixpoint graph state"),
NodeValue::CallReturnCombinator { call, return_ } => {
let return_from_block = match graph.node_weight(start_node) {
Some(Node::CallReturn { call: _, return_ }) => return_,
_ => panic!("Malformed Control flow graph"),
};
let return_from_jmp = &return_from_block.term.jmps[0];
if let Some(return_value) = return_ {
match self
.context
.update_return(return_value, call.as_ref(), call_term)
{
match self.context.update_return(
return_value,
call.as_ref(),
call_term,
return_from_jmp,
) {
Some(val) => Some(NodeValue::Value(val)),
None => None,
}
......
//! The pointer inference analysis.
//!
//! The goal of the pointer inference analysis is to keep track of all memory objects and pointers
//! that the program knows about at specific program points during execution.
//! Possible memory management errors, like access to memory that may already have been freed,
//! are reported to the user.
//!
//! Keep in mind that the analysis operates on a best-effort basis.
//! In cases where we cannot know
//! whether an error is due to an error in the memory management of the program under analysis
//! or due to inexactness of the pointer inference analysis itself,
//! we try to treat is as the more likely (but not necessarily true) case of the two.
use super::interprocedural_fixpoint::{Computation, NodeValue};
use crate::abstract_domain::{BitvectorDomain, DataDomain};
use crate::analysis::graph::{Graph, Node};
......@@ -16,15 +29,20 @@ mod state;
use context::Context;
use state::State;
/// The version number of the analysis.
const VERSION: &str = "0.1";
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
/// A wrapper struct for the pointer inference computation object.
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> PointerInference<'a> {
/// Generate a new pointer inference compuation for a project.
pub fn new(
project: &'a Project,
cwe_sender: crossbeam_channel::Sender<CweWarning>,
......@@ -90,12 +108,14 @@ impl<'a> PointerInference<'a> {
}
}
/// Compute the fixpoint of the pointer inference analysis.
/// Has a `max_steps` bound for the fixpoint algorithm to prevent infinite loops.
pub fn compute(&mut self) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
}
/// Print results serialized as YAML to stdout
pub fn print_yaml(&self) {
// Print results serialized as YAML to stdout
let graph = self.computation.get_graph();
for (node_index, value) in self.computation.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
......@@ -111,6 +131,9 @@ impl<'a> PointerInference<'a> {
}
}
/// Generate a compacted json representation of the results.
/// Note that this output cannot be used for serialization/deserialization,
/// but is only intended for user output.
pub fn generate_compact_json(&self) -> serde_json::Value {
let graph = self.computation.get_graph();
let mut json_nodes = serde_json::Map::new();
......@@ -136,7 +159,7 @@ impl<'a> PointerInference<'a> {
/// Since indirect jumps and calls are not handled yet (TODO: change that),
/// the analysis may miss a *lot* of code in some cases.
/// To remedy this somewhat,
/// we mark all function starts, that are also roots in the control flow graph,
/// we mark all function starts, that are also roots in the control flow graph
/// and do not have a state assigned to them yet, as additional entry points.
///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
......@@ -192,6 +215,8 @@ impl<'a> PointerInference<'a> {
}
}
/// Print the number of blocks that have a state associated to them.
/// Intended for debug purposes.
fn count_blocks_with_state(&self) {
let graph = self.computation.get_graph();
let mut stateful_blocks: i64 = 0;
......@@ -220,6 +245,8 @@ impl<'a> PointerInference<'a> {
}
}
/// Generate and execute the pointer inference analysis.
/// Returns a vector of all found CWE warnings and a vector of all log messages generated during analysis.
pub fn run(project: &Project, print_debug: bool) -> (Vec<CweWarning>, Vec<String>) {
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, log_receiver) = crossbeam_channel::unbounded();
......@@ -256,6 +283,7 @@ pub fn run(project: &Project, print_debug: bool) -> (Vec<CweWarning>, Vec<String
)
}
/// Collect CWE warnings from the receiver until the channel is closed. Then return them.
fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Vec<CweWarning> {
let mut collected_warnings = HashMap::new();
while let Ok(warning) = receiver.recv() {
......@@ -272,6 +300,7 @@ fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Ve
.collect()
}
/// Collect log messages from the receiver until the channel is closed. Then return them.
fn collect_logs(receiver: crossbeam_channel::Receiver<LogMessage>) -> Vec<String> {
let mut logs_with_address = HashMap::new();
let mut general_logs = Vec::new();
......
......@@ -29,7 +29,7 @@ fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value)
let project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
crate::analysis::pointer_inference::run(&project, true); // TODO: This discard all CweWarnings and log messages. Change that?
crate::analysis::pointer_inference::run(&project, true); // Note: This discard all CweWarnings and log messages.
}
caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) {
......
......@@ -82,8 +82,6 @@ pub struct Program {
pub entry_points: Vec<Tid>,
}
// TODO: Add deserialization from Ocaml to the FFI module for project!
// TODO: Add other CPU-architecture specific data to this struct!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
......@@ -93,6 +91,12 @@ pub struct Project {
pub parameter_registers: Vec<String>,
}
impl Project {
pub fn get_pointer_bitsize(&self) -> BitSize {
self.stack_pointer_register.bitsize().unwrap()
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
pub var: Variable,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment