Commit 4b6ca3be by Enkelmann Committed by Enkelmann

Support basic blocks that are contained in more than one function.

parent c585d9de
......@@ -52,13 +52,18 @@ pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;
/// Each node carries a pointer to its associated block with it.
/// For `CallReturn`nodes the associated blocks are both the callsite block (containing the call instruction)
/// and the returning-from block (containing the return instruction).
///
/// Basic blocks are allowed to be contained in more than one `Sub`.
/// In the control flow graph such basic blocks occur once per subroutine they are contained in.
/// For this reason, the nodes also carry a pointer to the corresponding subroutine with them
/// to allow unambigous node identification.
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Node<'a> {
BlkStart(&'a Term<Blk>),
BlkEnd(&'a Term<Blk>),
BlkStart(&'a Term<Blk>, &'a Term<Sub>),
BlkEnd(&'a Term<Blk>, &'a Term<Sub>),
CallReturn {
call: &'a Term<Blk>,
return_: &'a Term<Blk>,
call: (&'a Term<Blk>, &'a Term<Sub>),
return_: (&'a Term<Blk>, &'a Term<Sub>),
},
}
......@@ -68,7 +73,7 @@ impl<'a> Node<'a> {
pub fn get_block(&self) -> &'a Term<Blk> {
use Node::*;
match self {
BlkStart(blk) | BlkEnd(blk) => blk,
BlkStart(blk, _sub) | BlkEnd(blk, _sub) => blk,
CallReturn { .. } => panic!("get_block() is undefined for CallReturn nodes"),
}
}
......@@ -77,12 +82,16 @@ impl<'a> Node<'a> {
impl<'a> std::fmt::Display for Node<'a> {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::BlkStart(block) => write!(formatter, "BlkStart @ {}", block.tid),
Self::BlkEnd(block) => write!(formatter, "BlkEnd @ {}", block.tid),
Self::BlkStart(block, sub) => {
write!(formatter, "BlkStart @ {} (sub {})", block.tid, sub.tid)
}
Self::BlkEnd(block, sub) => {
write!(formatter, "BlkEnd @ {} (sub {})", block.tid, sub.tid)
}
Self::CallReturn { call, return_ } => write!(
formatter,
"CallReturn @ {} (caller @ {})",
return_.tid, call.tid
"CallReturn @ {} (sub {}) (caller @ {} (sub {}))",
return_.0.tid, return_.1.tid, call.0.tid, call.1.tid
),
}
}
......@@ -112,10 +121,16 @@ struct GraphBuilder<'a> {
program: &'a Term<Program>,
extern_subs: HashSet<Tid>,
graph: Graph<'a>,
/// Denotes the NodeIndices of possible jump targets
jump_targets: HashMap<Tid, (NodeIndex, NodeIndex)>,
/// Denotes the NodeIndices of possible call targets
call_targets: HashMap<Tid, (NodeIndex, NodeIndex)>,
/// Denotes the NodeIndices of possible intraprocedural jump targets.
/// The keys are of the form (block_tid, sub_tid).
/// The values are of the form (BlkStart-node-index, BlkEnd-node-index).
jump_targets: HashMap<(Tid, Tid), (NodeIndex, NodeIndex)>,
/// for each function the list of return addresses of the corresponding call sites
return_addresses: HashMap<Tid, Vec<(NodeIndex, NodeIndex)>>,
/// A list of `BlkEnd` nodes for which outgoing edges still have to be added to the graph.
block_worklist: Vec<NodeIndex>,
}
impl<'a> GraphBuilder<'a> {
......@@ -125,35 +140,46 @@ impl<'a> GraphBuilder<'a> {
program,
extern_subs,
graph: Graph::new(),
call_targets: HashMap::new(),
jump_targets: HashMap::new(),
return_addresses: HashMap::new(),
block_worklist: Vec::new(),
}
}
/// add start and end nodes of a block and the connecting edge
fn add_block(&mut self, block: &'a Term<Blk>) {
let start = self.graph.add_node(Node::BlkStart(block));
let end = self.graph.add_node(Node::BlkEnd(block));
self.jump_targets.insert(block.tid.clone(), (start, end));
/// Add start and end nodes of a block and the connecting edge.
/// Also add the end node to the `block_worklist`.
fn add_block(&mut self, block: &'a Term<Blk>, sub: &'a Term<Sub>) -> (NodeIndex, NodeIndex) {
let start = self.graph.add_node(Node::BlkStart(block, sub));
let end = self.graph.add_node(Node::BlkEnd(block, sub));
self.jump_targets
.insert((block.tid.clone(), sub.tid.clone()), (start, end));
self.graph.add_edge(start, end, Edge::Block);
self.block_worklist.push(end);
(start, end)
}
/// add all blocks of the program to the graph
/// Add all blocks of the program to the graph.
///
/// Each block is only added once,
/// i.e. for blocks contained in more than one function the extra nodes have to be added separately later.
/// The `sub` a block is associated with is the `sub` that the block is contained in in the `program` struct.
fn add_program_blocks(&mut self) {
let subs = self.program.term.subs.iter();
let blocks = subs.map(|sub| sub.term.blocks.iter()).flatten();
for block in blocks {
self.add_block(block);
for sub in subs {
for block in sub.term.blocks.iter() {
self.add_block(block, sub);
}
}
}
/// add all subs to the jump targets so that call instructions can be linked to the starting block of the corresponding sub.
/// add all subs to the call targets so that call instructions can be linked to the starting block of the corresponding sub.
fn add_subs_to_jump_targets(&mut self) {
for sub in self.program.term.subs.iter() {
if !sub.term.blocks.is_empty() {
let start_block = &sub.term.blocks[0];
let target_index = self.jump_targets[&start_block.tid];
self.jump_targets.insert(sub.tid.clone(), target_index);
let target_index = self.jump_targets[&(start_block.tid.clone(), sub.tid.clone())];
self.call_targets.insert(sub.tid.clone(), target_index);
}
// TODO: Generate Log-Message for Subs without blocks.
}
......@@ -166,38 +192,62 @@ impl<'a> GraphBuilder<'a> {
jump: &'a Term<Jmp>,
untaken_conditional: Option<&'a Term<Jmp>>,
) {
let sub_term = match self.graph[source] {
Node::BlkEnd(_source_block, sub_term) => sub_term,
_ => panic!(),
};
match &jump.term {
Jmp::Branch(tid)
| Jmp::CBranch {
target: tid,
condition: _,
} => {
self.graph.add_edge(
source,
self.jump_targets[&tid].0,
Edge::Jump(jump, untaken_conditional),
);
if let Some((target_node, _)) =
self.jump_targets.get(&(tid.clone(), sub_term.tid.clone()))
{
self.graph.add_edge(
source,
*target_node,
Edge::Jump(jump, untaken_conditional),
);
} else {
let target_block = self.program.term.find_block(tid).unwrap();
let (target_node, _) = self.add_block(target_block, sub_term);
self.graph
.add_edge(source, target_node, Edge::Jump(jump, untaken_conditional));
}
}
Jmp::BranchInd(_) => (), // TODO: add handling of indirect edges!
Jmp::Call { target, return_ } => {
// first make sure that the return block exists
let return_to_node_option = if let Some(return_tid) = return_ {
if let Some((return_to_node, _)) = self
.jump_targets
.get(&(return_tid.clone(), sub_term.tid.clone()))
{
Some(*return_to_node)
} else {
let return_block = self.program.term.find_block(return_tid).unwrap();
Some(self.add_block(return_block, sub_term).0)
}
} else {
None
};
// now add the call edge
if self.extern_subs.contains(target) {
if let Some(return_tid) = return_ {
self.graph.add_edge(
source,
self.jump_targets[&return_tid].0,
Edge::ExternCallStub(jump),
);
if let Some(return_to_node) = return_to_node_option {
self.graph
.add_edge(source, return_to_node, Edge::ExternCallStub(jump));
}
} else {
if let Some(target_node) = self.jump_targets.get(&target) {
self.graph.add_edge(source, target_node.0, Edge::Call(jump));
if let Some((target_node, _)) = self.call_targets.get(&target) {
self.graph.add_edge(source, *target_node, Edge::Call(jump));
} // TODO: Log message for the else-case?
if let Some(ref return_tid) = return_ {
let return_index = self.jump_targets[return_tid].0;
if let Some(return_node) = return_to_node_option {
self.return_addresses
.entry(target.clone())
.and_modify(|vec| vec.push((source, return_index)))
.or_insert_with(|| vec![(source, return_index)]);
.and_modify(|vec| vec.push((source, return_node)))
.or_insert_with(|| vec![(source, return_node)]);
}
}
}
......@@ -218,12 +268,12 @@ impl<'a> GraphBuilder<'a> {
}
}
/// Add all outgoing edges generated by calls and interprocedural jumps for a specific block to the graph.
/// Add all outgoing edges generated by calls and intraprocedural jumps for a specific block to the graph.
/// Return edges are *not* added by this function.
fn add_outgoing_edges(&mut self, node: NodeIndex, block: &'a Term<Blk>) {
let jumps = block.term.jmps.as_slice();
match jumps {
[] => (), // Blocks without jumps are dead ends corresponding to control flow reconstruction errors.
[] => (), // Blocks without jumps are dead ends corresponding to control flow reconstruction errors or user-inserted dead ends.
[jump] => self.add_jump_edge(node, jump, None),
[if_jump, else_jump] => {
self.add_jump_edge(node, if_jump, None);
......@@ -239,14 +289,17 @@ impl<'a> GraphBuilder<'a> {
/// - an edge from the CallReturn node to the return-to site
fn add_call_return_node_and_edges(
&mut self,
return_from_sub: &Term<Sub>,
return_from_sub: &'a Term<Sub>,
return_source: NodeIndex,
) {
if self.return_addresses.get(&return_from_sub.tid).is_none() {
return;
}
for (call_node, return_to_node) in self.return_addresses[&return_from_sub.tid].iter() {
let call_block = self.graph[*call_node].get_block();
let (call_block, caller_sub) = match self.graph[*call_node] {
Node::BlkEnd(block, sub) => (block, sub),
_ => panic!(),
};
let return_from_block = self.graph[return_source].get_block();
let call_term = call_block
.term
......@@ -255,8 +308,8 @@ impl<'a> GraphBuilder<'a> {
.find(|jump| matches!(jump.term, Jmp::Call{..}))
.unwrap();
let cr_combine_node = self.graph.add_node(Node::CallReturn {
call: call_block,
return_: return_from_block,
call: (call_block, caller_sub),
return_: (return_from_block, return_from_sub),
});
self.graph
.add_edge(*call_node, cr_combine_node, Edge::CRCallStub);
......@@ -269,26 +322,31 @@ impl<'a> GraphBuilder<'a> {
/// Add all return instruction related edges and nodes to the graph (for all return instructions).
fn add_return_edges(&mut self) {
for sub in &self.program.term.subs {
for block in &sub.term.blocks {
let mut return_from_vec = Vec::new();
for node in self.graph.node_indices() {
if let Node::BlkEnd(block, sub) = self.graph[node] {
if block
.term
.jmps
.iter()
.any(|jmp| matches!(jmp.term, Jmp::Return(_)))
{
let return_from_node = self.jump_targets[&block.tid].1;
self.add_call_return_node_and_edges(sub, return_from_node);
return_from_vec.push((node, sub));
}
}
}
for (return_from_node, return_from_sub) in return_from_vec {
self.add_call_return_node_and_edges(return_from_sub, return_from_node);
}
}
/// Add all non-return-instruction-related jump edges to the graph.
fn add_jump_and_call_edges(&mut self) {
for node in self.graph.node_indices() {
if let Node::BlkEnd(block) = self.graph[node] {
self.add_outgoing_edges(node, block);
while !self.block_worklist.is_empty() {
let node = self.block_worklist.pop().unwrap();
match self.graph[node] {
Node::BlkEnd(block, _) => self.add_outgoing_edges(node, block),
_ => panic!(),
}
}
}
......@@ -309,26 +367,6 @@ pub fn get_program_cfg(program: &Term<Program>, extern_subs: HashSet<Tid>) -> Gr
builder.build()
}
/// For a given set of block TIDs generate a map from the TIDs to the indices of the BlkStart and BlkEnd nodes
/// corresponding to the block.
pub fn get_indices_of_block_nodes<'a, I: Iterator<Item = &'a Tid>>(
graph: &'a Graph,
block_tids: I,
) -> HashMap<Tid, (NodeIndex, NodeIndex)> {
let tids: HashSet<Tid> = block_tids.cloned().collect();
let mut tid_to_indices_map = HashMap::new();
for node_index in graph.node_indices() {
if let Node::BlkStart(block_term) = graph[node_index] {
if let Some(tid) = tids.get(&block_term.tid) {
let start_index = node_index;
let end_index = graph.neighbors(start_index).next().unwrap();
tid_to_indices_map.insert(tid.clone(), (start_index, end_index));
}
}
}
tid_to_indices_map
}
#[cfg(test)]
mod tests {
use super::*;
......@@ -371,10 +409,29 @@ mod tests {
blocks: vec![sub1_blk1, sub1_blk2],
},
};
let cond_jump = Jmp::CBranch {
target: Tid::new("sub1_blk1"),
condition: Expression::Const(Bitvector::from_u8(0)),
};
let cond_jump_term = Term {
tid: Tid::new("cond_jump"),
term: cond_jump,
};
let jump_term_2 = Term {
tid: Tid::new("jump2"),
term: Jmp::Branch(Tid::new("sub2_blk2")),
};
let sub2_blk1 = Term {
tid: Tid::new("sub2_blk1"),
term: Blk {
defs: Vec::new(),
jmps: vec![cond_jump_term, jump_term_2],
},
};
let sub2_blk2 = Term {
tid: Tid::new("sub2_blk2"),
term: Blk {
defs: Vec::new(),
jmps: vec![return_term],
},
};
......@@ -382,7 +439,7 @@ mod tests {
tid: Tid::new("sub2"),
term: Sub {
name: "sub2".to_string(),
blocks: vec![sub2_blk1],
blocks: vec![sub2_blk1, sub2_blk2],
},
};
let program = Term {
......@@ -401,7 +458,7 @@ mod tests {
let program = mock_program();
let graph = get_program_cfg(&program, HashSet::new());
println!("{}", serde_json::to_string_pretty(&graph).unwrap());
assert_eq!(graph.node_count(), 7);
assert_eq!(graph.edge_count(), 8);
assert_eq!(graph.node_count(), 14);
assert_eq!(graph.edge_count(), 18);
}
}
......@@ -185,7 +185,10 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
NodeValue::Value(_) => panic!("Unexpected interprocedural fixpoint graph state"),
NodeValue::CallReturnCombinator { call, return_ } => {
let return_from_block = match graph.node_weight(start_node) {
Some(Node::CallReturn { call: _, return_ }) => return_,
Some(Node::CallReturn {
call: _,
return_: (return_from_block, _),
}) => return_from_block,
_ => panic!("Malformed Control flow graph"),
};
let return_from_jmp = &return_from_block.term.jmps[0];
......
......@@ -140,7 +140,14 @@ fn context_problem_implementation() {
jmps: Vec::new(),
},
};
let target_node = crate::analysis::graph::Node::BlkStart(&target_block);
let sub = Term {
tid: Tid::new("caller_sub"),
term: Sub {
name: "caller_sub".into(),
blocks: vec![target_block.clone()],
},
};
let target_node = crate::analysis::graph::Node::BlkStart(&target_block, &sub);
let call = call_term("func");
let mut callee_state = context.update_call(&state, &call, &target_node).unwrap();
assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
......
......@@ -77,20 +77,22 @@ impl<'a> PointerInference<'a> {
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.iter().next() {
if let Some(entry_block) = sub.term.blocks.get(0) {
entry_sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
}
}
let tid_to_graph_indices_map = super::graph::get_indices_of_block_nodes(
&context.graph,
entry_sub_to_entry_blocks_map.values(),
);
let mut tid_to_graph_indices_map = HashMap::new();
for node in context.graph.node_indices() {
if let super::graph::Node::BlkStart(block, sub) = context.graph[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
let entry_sub_to_entry_node_map: HashMap<Tid, NodeIndex> = entry_sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
if let Some((start_node_index, _end_node_index)) =
tid_to_graph_indices_map.get(&block_tid)
if let Some(start_node_index) =
tid_to_graph_indices_map.get(&(block_tid, sub_tid.clone()))
{
Some((sub_tid.clone(), *start_node_index))
} else {
......@@ -200,14 +202,14 @@ impl<'a> PointerInference<'a> {
let graph = self.computation.get_graph();
let mut new_entry_points = Vec::new();
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block) = node {
if !(start_block_to_sub_map.get(&block.tid).is_none()
|| self.computation.get_node_value(node_id).is_some()
|| only_cfg_roots
&& graph
if let Node::BlkStart(block, sub) = node {
if start_block_to_sub_map.get(&block.tid) == Some(sub)
&& self.computation.get_node_value(node_id).is_none()
&& (!only_cfg_roots
|| graph
.neighbors_directed(node_id, Direction::Incoming)
.next()
.is_some())
.is_none())
{
new_entry_points.push(node_id);
}
......@@ -239,7 +241,7 @@ impl<'a> PointerInference<'a> {
let mut stateful_blocks: i64 = 0;
let mut all_blocks: i64 = 0;
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(_block) = node {
if let Node::BlkStart(_block, _sub) = node {
all_blocks += 1;
if self.computation.get_node_value(node_id).is_some() {
stateful_blocks += 1;
......
......@@ -213,7 +213,7 @@ impl AbstractObjectList {
possible_double_free_ids.push((id.clone(), error));
}
}
} else if let Some(id) = ids.iter().next() {
} else if let Some(id) = ids.get(0) {
if let Err(error) = self.objects.get_mut(&id).unwrap().0.mark_as_freed() {
possible_double_free_ids.push((id.clone(), error));
}
......
......@@ -217,6 +217,19 @@ pub struct Program {
pub entry_points: Vec<Tid>,
}
impl Program {
/// Find a block term by its term identifier.
/// WARNING: The function simply iterates through all blocks,
/// i.e. it is very inefficient for large projects!
pub fn find_block(&self, tid: &Tid) -> Option<&Term<Blk>> {
self.subs
.iter()
.map(|sub| sub.term.blocks.iter())
.flatten()
.find(|block| block.tid == *tid)
}
}
/// The `Project` struct is the main data structure representing a binary.
///
/// It contains information about the disassembled binary
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment