Unverified Commit 7841bf89 by Enkelmann Committed by GitHub

Fix return (#73)

Insert missing return statements into the control flow graph.
parent 5f9d78fb
......@@ -180,6 +180,9 @@ impl<'a> GraphBuilder<'a> {
return_from_sub: &Term<Sub>,
return_source: NodeIndex,
) {
if self.return_addresses.get(&return_from_sub.tid).is_none() {
return;
}
for (call_node, return_to_node) in self.return_addresses[&return_from_sub.tid].iter() {
let call_block = self.graph[*call_node].get_block();
let call_term = call_block
......
......@@ -204,7 +204,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
callee_state.set_register(
&self.project.stack_pointer_register,
super::data::PointerDomain::new(
callee_stack_id,
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap())
.into(),
)
......@@ -213,10 +213,14 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
Some(&call_term.tid),
);
// set the list of caller stack ids to only this caller id
callee_state.caller_ids = BTreeSet::new();
callee_state.caller_ids.insert(new_caller_stack_id.clone());
// remove non-referenced objects from the state
callee_state.caller_stack_ids = BTreeSet::new();
callee_state.caller_stack_ids.insert(new_caller_stack_id.clone());
// Remove non-referenced objects and objects, only the caller knows about, from the state.
callee_state.ids_known_to_caller = BTreeSet::new();
callee_state.remove_unreferenced_objects();
// all remaining objects, except for the callee stack id, are also known to the caller
callee_state.ids_known_to_caller = callee_state.memory.get_all_object_ids();
callee_state.ids_known_to_caller.remove(&callee_stack_id);
return callee_state;
} else {
......@@ -231,6 +235,11 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
state_before_call: Option<&State>,
call_term: &Term<Jmp>,
) -> Option<State> {
// TODO: For the long term we may have to replace the IDs representing callers with something
// that identifies the edge of the call and not just the callsite.
// When indirect calls are handled, the callsite alone is not a unique identifier anymore.
// This may lead to confusion if both caller and callee have the same ID in their respective caller_stack_id sets.
// we only return to functions with a value before the call to prevent returning to dead code
let state_before_call = match state_before_call {
Some(value) => value,
......@@ -244,8 +253,17 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
let callee_stack_id = &state_before_return.stack_id;
let stack_offset_on_call = self.get_current_stack_offset(state_before_call);
// Check whether state_before_return actually knows the caller_stack_id.
// If not, we are returning from a state that cannot correspond to this callsite.
if !state_before_return.caller_stack_ids.contains(&caller_stack_id) {
return None;
}
let mut state_after_return = state_before_return.clone();
state_after_return.remove_virtual_register();
// Remove the IDs of other callers not corresponding to this call
state_after_return.remove_other_caller_stack_ids(&caller_stack_id);
state_after_return.replace_abstract_id(
&caller_stack_id,
original_caller_stack_id,
......@@ -257,7 +275,11 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
&(-stack_offset_on_call.clone()),
);
state_after_return.stack_id = original_caller_stack_id.clone();
state_after_return.caller_ids = state_before_call.caller_ids.clone();
state_after_return.caller_stack_ids = state_before_call.caller_stack_ids.clone();
state_after_return.ids_known_to_caller = state_before_call.ids_known_to_caller.clone();
state_after_return.readd_caller_objects(state_before_call);
// remove non-referenced objects from the state
state_after_return.remove_unreferenced_objects();
......@@ -518,6 +540,21 @@ mod tests {
}
}
fn reg_add_term(name: &str, value: i64, tid_name: &str) -> Term<Def> {
let add_expr = Expression::BinOp{
op: crate::bil::BinOpType::PLUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
};
Term {
tid: Tid::new(format!("{}", tid_name)),
term: Def {
lhs: register(name),
rhs: add_expr,
}
}
}
fn call_term(target_name: &str) -> Term<Jmp> {
let call = Call {
target: Label::Direct(Tid::new(target_name)),
......@@ -611,9 +648,9 @@ mod tests {
let call = call_term("func");
let mut callee_state = context.update_call(&state, &call, &target_node);
assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
assert_eq!(callee_state.caller_ids.len(), 1);
assert_eq!(callee_state.caller_stack_ids.len(), 1);
assert_eq!(
callee_state.caller_ids.iter().next().unwrap(),
callee_state.caller_stack_ids.iter().next().unwrap(),
&new_id("call_func", "RSP")
);
......@@ -628,7 +665,7 @@ mod tests {
.update_return(&callee_state, Some(&state), &call)
.unwrap();
assert_eq!(return_state.stack_id, new_id("main", "RSP"));
assert_eq!(return_state.caller_ids, BTreeSet::new());
assert_eq!(return_state.caller_stack_ids, BTreeSet::new());
assert_eq!(
return_state.memory.get_internal_id_map(),
state.memory.get_internal_id_map()
......@@ -722,4 +759,50 @@ mod tests {
.unwrap()
.is_top());
}
#[test]
fn update_return() {
use crate::analysis::interprocedural_fixpoint::Problem;
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::data::*;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, cwe_sender, log_sender);
let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
let mut state_before_return = context.update_def(&state_before_return, &reg_add_term("RSP", 8, "stack_offset_on_return_adjustment"));
let callsite_id = new_id("call_callee", "RSP");
state_before_return.memory.add_abstract_object(callsite_id.clone(), bv(0).into(), ObjectType::Stack, 64);
state_before_return.caller_stack_ids.insert(callsite_id.clone());
state_before_return.ids_known_to_caller.insert(callsite_id.clone());
let other_callsite_id = new_id("call_callee_other", "RSP");
state_before_return.memory.add_abstract_object(other_callsite_id.clone(), bv(0).into(), ObjectType::Stack, 64);
state_before_return.caller_stack_ids.insert(other_callsite_id.clone());
state_before_return.ids_known_to_caller.insert(other_callsite_id.clone());
state_before_return.set_register(&register("RAX"), Data::Pointer(PointerDomain::new(new_id("call_callee_other", "RSP"), bv(-32)))).unwrap();
let state_before_call = State::new(&register("RSP"), Tid::new("original_caller_id"));
let mut state_before_call = context.update_def(&state_before_call, &reg_add_term("RSP", -16, "stack_offset_on_call_adjustment"));
let caller_caller_id = new_id("caller_caller", "RSP");
state_before_call.memory.add_abstract_object(caller_caller_id.clone(), bv(0).into(), ObjectType::Stack, 64);
state_before_call.caller_stack_ids.insert(caller_caller_id.clone());
state_before_call.ids_known_to_caller.insert(caller_caller_id.clone());
let state = context.update_return(&state_before_return, Some(&state_before_call), &call_term("callee")).unwrap();
let mut caller_caller_set = BTreeSet::new();
caller_caller_set.insert(caller_caller_id);
assert_eq!(state.ids_known_to_caller, caller_caller_set.clone());
assert_eq!(state.caller_stack_ids, caller_caller_set.clone());
assert_eq!(state.stack_id, new_id("original_caller_id", "RSP"));
assert!(state_before_return.memory.get_all_object_ids().len() == 3);
assert!(state.memory.get_all_object_ids().len() == 2);
assert!(state.memory.get_all_object_ids().get(&new_id("original_caller_id", "RSP")).is_some());
assert!(state.memory.get_all_object_ids().get(&new_id("caller_caller", "RSP")).is_some());
assert!(state.get_register(&register("RSP")).is_ok());
let expected_rsp = Data::Pointer(PointerDomain::new(new_id("original_caller_id", "RSP"), bv(-8)));
assert_eq!(state.get_register(&register("RSP")).unwrap(), expected_rsp);
}
}
......@@ -38,6 +38,26 @@ impl Data {
BTreeSet::new()
}
}
/// If *self* is a pointer, remove all provided IDs from the target list of it.
/// If this would leave the pointer without any targets, replace it with Data::Top(..).
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
// TODO: Some callers don't want to get Top(..) values. Probably has to be handled at the respective callsites.
if let Data::Pointer(pointer) = self {
let remaining_targets: BTreeMap<AbstractIdentifier, BitvectorDomain> = pointer.iter_targets().filter_map(|(id, offset)| {
if ids_to_remove.get(id).is_none() {
Some((id.clone(), offset.clone()))
} else {
None
}
}).collect();
if remaining_targets.len() == 0 {
*self = Data::new_top(self.bitsize());
} else {
*self = Data::Pointer(PointerDomain::with_targets(remaining_targets));
}
}
}
}
impl Data {
......
......@@ -109,6 +109,19 @@ impl AbstractObject {
}
}
/// Remove the provided IDs from all possible target lists, including all pointers.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
match self {
Self::Untracked(targets) => {
let remaining_targets = targets.difference(ids_to_remove).cloned().collect();
*self = Self::Untracked(remaining_targets);
}
Self::Memory(mem) => {
mem.remove_ids(ids_to_remove);
}
}
}
#[cfg(test)]
pub fn get_state(&self) -> Option<ObjectState> {
match self {
......@@ -238,6 +251,15 @@ impl AbstractObjectInfo {
} // else don't change the state
}
}
/// Remove the provided IDs from the target lists of all pointers in the memory object.
/// Also remove them from the pointer_targets list.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
self.pointer_targets = self.pointer_targets.difference(ids_to_remove).cloned().collect();
for value in self.memory.iter_values_mut() {
value.remove_ids(ids_to_remove);
}
}
}
impl AbstractDomain for AbstractObjectInfo {
......
......@@ -294,6 +294,11 @@ impl AbstractObjectList {
}
}
/// Get all object ids
pub fn get_all_object_ids(&self) -> BTreeSet<AbstractIdentifier> {
self.ids.keys().cloned().collect()
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
pub fn mark_mem_object_as_freed(
......@@ -364,6 +369,48 @@ impl AbstractObjectList {
pub fn get_num_objects(&self) -> usize {
self.objects.len()
}
/// Append those objects from another object list, whose abstract IDs are not known to self.
/// We also add all abstract IDs pointing to the added objects to the ID map.
pub fn append_unknown_objects(&mut self, other_object_list: &AbstractObjectList) {
let mut objects_already_known = vec![false; other_object_list.objects.len()];
for (id, (index, _offset)) in other_object_list.ids.iter() {
if self.ids.get(id).is_some() {
objects_already_known[*index] = true;
}
}
let mut old_to_new_index_map: BTreeMap<usize, usize> = BTreeMap::new();
for old_index in 0..other_object_list.objects.len() {
if objects_already_known[old_index] == false {
old_to_new_index_map.insert(old_index, self.objects.len());
self.objects.push(other_object_list.objects[old_index].clone());
}
}
for (id, (old_index, offset)) in other_object_list.ids.iter() {
if old_to_new_index_map.get(old_index).is_some() {
self.ids.insert(id.clone(), (old_to_new_index_map[old_index], offset.clone()));
}
}
}
/// Remove the provided IDs as targets from all pointers in all objects.
/// Also forget whether the provided IDs point to objects in the object list.
///
/// This may leave objects without known IDs pointing to them.
/// This function does *not* trim these objects from the object list.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
for object in self.objects.iter_mut() {
let object = Arc::make_mut(object);
object.remove_ids(ids_to_remove);
}
self.ids = self.ids.iter().filter_map(|(id, (index, offset))| {
if ids_to_remove.get(id).is_none() {
Some((id.clone(), (*index, offset.clone())))
} else {
None
}
}).collect();
}
}
impl AbstractObjectList {
......
......@@ -12,18 +12,25 @@ use std::collections::{BTreeMap, BTreeSet};
/// Notes:
/// - The *stack_id* is the identifier of the current stack frame.
/// Only reads and writes with offset less than 0 are permitted for it
/// - The *caller_ids* contain all known identifier of caller stack frames.
/// - The *caller_stack_ids* contain all known identifier of caller stack frames.
/// If a read to an offset >= 0 corresponding to the current stack frame happens, it is considered
/// a merge read to all caller stack frames.
/// A write to an offset >= 0 corresponding to the current stack frame writes to all caller stack frames.
/// - The caller_ids are given by the stack pointer at time of the call.
/// - The caller_stack_ids are given by the stack pointer at time of the call.
/// This way we can distinguish caller stack frames even if one function calls another several times.
/// - The ids_known_to_caller contains all ids directly known to some caller.
/// Objects referenced by these ids cannot be removed from the state, as some caller may have a reference to them.
/// This is not recursive, i.e. ids only known to the caller of the caller are not included.
/// If a caller does not pass a reference to a memory object to the callee (directly or indirectly),
/// it will not be included in ids_known_to_caller.
/// This way the caller can check on return, which memory objects could not have been accessed by the callee.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
register: BTreeMap<Variable, Data>,
pub memory: AbstractObjectList,
pub stack_id: AbstractIdentifier,
pub caller_ids: BTreeSet<AbstractIdentifier>,
pub caller_stack_ids: BTreeSet<AbstractIdentifier>,
pub ids_known_to_caller: BTreeSet<AbstractIdentifier>,
}
impl State {
......@@ -50,7 +57,8 @@ impl State {
stack_register.bitsize().unwrap(),
),
stack_id,
caller_ids: BTreeSet::new(),
caller_stack_ids: BTreeSet::new(),
ids_known_to_caller: BTreeSet::new(),
}
}
......@@ -356,7 +364,8 @@ impl State {
register: merged_register,
memory: merged_memory_objects,
stack_id: self.stack_id.clone(),
caller_ids: self.caller_ids.union(&other.caller_ids).cloned().collect(),
caller_stack_ids: self.caller_stack_ids.union(&other.caller_stack_ids).cloned().collect(),
ids_known_to_caller: self.ids_known_to_caller.union(&other.ids_known_to_caller).cloned().collect(),
}
}
......@@ -375,8 +384,8 @@ impl State {
if *id == self.stack_id {
match offset {
BitvectorDomain::Value(offset_val) => {
if offset_val.try_to_i64().unwrap() >= 0 && self.caller_ids.len() > 0 {
for caller_id in self.caller_ids.iter() {
if offset_val.try_to_i64().unwrap() >= 0 && self.caller_stack_ids.len() > 0 {
for caller_id in self.caller_stack_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone());
}
// Note that the id of the current stack frame was *not* added.
......@@ -385,7 +394,7 @@ impl State {
}
}
BitvectorDomain::Top(_bitsize) => {
for caller_id in self.caller_ids.iter() {
for caller_id in self.caller_stack_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone());
}
// Note that we also add the id of the current stack frame
......@@ -427,9 +436,13 @@ impl State {
if &self.stack_id == old_id {
self.stack_id = new_id.clone();
}
if self.caller_ids.get(old_id).is_some() {
self.caller_ids.remove(old_id);
self.caller_ids.insert(new_id.clone());
if self.caller_stack_ids.get(old_id).is_some() {
self.caller_stack_ids.remove(old_id);
self.caller_stack_ids.insert(new_id.clone());
}
if self.ids_known_to_caller.get(old_id).is_some() {
self.ids_known_to_caller.remove(old_id);
self.ids_known_to_caller.insert(new_id.clone());
}
}
......@@ -440,7 +453,8 @@ impl State {
referenced_ids.append(&mut data.referenced_ids());
}
referenced_ids.insert(self.stack_id.clone());
referenced_ids.append(&mut self.caller_ids.clone());
referenced_ids.append(&mut self.caller_stack_ids.clone());
referenced_ids.append(&mut self.ids_known_to_caller.clone());
referenced_ids = self.add_recursively_referenced_ids_to_id_set(referenced_ids);
// remove unreferenced ids
self.memory.remove_unused_ids(&referenced_ids);
......@@ -510,6 +524,29 @@ impl State {
.filter(|(register, _value)| register.is_temp == false)
.collect();
}
/// Recursively remove all caller_stack_ids not corresponding to the given caller.
pub fn remove_other_caller_stack_ids(&mut self, caller_id: &AbstractIdentifier) {
let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove);
}
self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new();
self.caller_stack_ids.insert(caller_id.clone());
self.ids_known_to_caller = self.ids_known_to_caller.difference(&ids_to_remove).cloned().collect();
}
/// Add those objects from the caller_state to self, that are not known to self.
///
/// Since self does not know these objects, we assume that the current function could not have accessed
/// them in any way during execution.
/// This means they are unchanged from the moment of the call until the return from the call,
/// thus we can simply copy their object-state from the moment of the call.
pub fn readd_caller_objects(&mut self, caller_state: &State) {
self.memory.append_unknown_objects(&caller_state.memory);
}
}
impl State {
......@@ -529,9 +566,18 @@ impl State {
Value::String(format!("{}", self.stack_id)),
);
state_map.insert(
"caller_ids".into(),
"caller_stack_ids".into(),
Value::Array(
self.caller_stack_ids
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
state_map.insert(
"ids_known_to_caller".into(),
Value::Array(
self.caller_ids
self.ids_known_to_caller
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
......@@ -646,7 +692,7 @@ mod tests {
state
.memory
.add_abstract_object(new_id("caller".into()), bv(0), ObjectType::Stack, 64);
state.caller_ids.insert(new_id("caller".into()));
state.caller_stack_ids.insert(new_id("caller".into()));
state
.store_value(&stack_addr, &Data::Value(bv(15)))
.unwrap();
......
......@@ -221,11 +221,25 @@ let of_jmp_kind (kind: jmp_kind) (tid_map: word Tid.Map.t) : t =
]
let of_jmp (jmp: Jmp.t) (tid_map: word Tid.Map.t) : t =
(* Since BAP 2.0 doesn't emit return statements anymore,
we have check the is_return hint to correct the jump kind for return statements. *)
let is_return = match Term.get_attr jmp Disasm.insn with
| None -> false
| Some(insn) -> Insn.(is return) insn in
let jmp_kind = if is_return then
match Jmp.kind jmp with
| Call(call) -> begin match Call.target call with
| Indirect(exp) -> Ret(Indirect(exp))
| _ -> Jmp.kind jmp
end
| _ -> Jmp.kind jmp
else
Jmp.kind jmp in
build_object [
("tid", of_tid (Term.tid jmp) tid_map);
("term", build_object [
("condition", if Option.is_some (Jmp.guard jmp) then of_exp (Jmp.cond jmp) else build_null ());
("kind", of_jmp_kind (Jmp.kind jmp) tid_map);
("kind", of_jmp_kind jmp_kind tid_map);
]);
]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment