Unverified Commit 09e4239c by Enkelmann Committed by GitHub

Refactor of the pointer inference module (#78)

parent e82c19ce
......@@ -43,7 +43,8 @@ impl<T: RegisterDomain> DataDomain<T> {
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
if let Self::Pointer(pointer) = self {
let remaining_targets: BTreeMap<AbstractIdentifier, T> = pointer
.iter_targets()
.targets()
.iter()
.filter_map(|(id, offset)| {
if ids_to_remove.get(id).is_none() {
Some((id.clone(), offset.clone()))
......@@ -98,8 +99,8 @@ impl<T: RegisterDomain> RegisterDomain for DataDomain<T> {
(Pointer(pointer), MINUS, Value(value)) => Pointer(pointer.sub_from_offset(value)),
(Pointer(pointer_lhs), MINUS, Pointer(pointer_rhs)) => {
if pointer_lhs.ids().len() == 1 && pointer_rhs.ids().len() == 1 {
let (id_lhs, offset_lhs) = pointer_lhs.iter_targets().next().unwrap();
let (id_rhs, offset_rhs) = pointer_rhs.iter_targets().next().unwrap();
let (id_lhs, offset_lhs) = pointer_lhs.targets().iter().next().unwrap();
let (id_rhs, offset_rhs) = pointer_rhs.targets().iter().next().unwrap();
if id_lhs == id_rhs {
Self::Value(offset_lhs.bin_op(MINUS, offset_rhs))
} else {
......@@ -214,7 +215,7 @@ impl<T: RegisterDomain + Display> DataDomain<T> {
match self {
Self::Top(bitsize) => serde_json::Value::String(format!("Top:{}", bitsize)),
Self::Pointer(pointer) => {
let target_iter = pointer.iter_targets().map(|(id, offset)| {
let target_iter = pointer.targets().iter().map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
......
......@@ -108,9 +108,9 @@ impl<T: RegisterDomain> PointerDomain<T> {
result
}
/// Get an iterator over all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn iter_targets(&self) -> std::collections::btree_map::Iter<AbstractIdentifier, T> {
self.0.iter()
/// Get all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn targets(&self) -> &BTreeMap<AbstractIdentifier, T> {
&self.0
}
/// Get an iterator over all abstract IDs that the pointer may target.
......
......@@ -50,21 +50,26 @@ pub type Graph<'a> = DiGraph<Node<'a>, Edge<'a>>;
/// The node type of an interprocedural control flow graph
///
/// Each node carries a pointer to its associated block with it.
/// For `CallReturn`nodes the associated block is the callsite block (containing the call instruction)
/// and *not* the return block (containing the return instruction).
/// For `CallReturn`nodes the associated blocks are both the callsite block (containing the call instruction)
/// and the returning-from block (containing the return instruction).
#[derive(Serialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Node<'a> {
BlkStart(&'a Term<Blk>),
BlkEnd(&'a Term<Blk>),
CallReturn(&'a Term<Blk>),
CallReturn {
call: &'a Term<Blk>,
return_: &'a Term<Blk>,
},
}
impl<'a> Node<'a> {
/// Get the block corresponding to the node.
/// Get the block corresponding to the node for `BlkStart` and `BlkEnd` nodes.
/// panics if called on a `CallReturn` node.
pub fn get_block(&self) -> &'a Term<Blk> {
use Node::*;
match self {
BlkStart(blk) | BlkEnd(blk) | CallReturn(blk) => blk,
BlkStart(blk) | BlkEnd(blk) => blk,
CallReturn { .. } => panic!("get_block() is undefined for CallReturn nodes"),
}
}
}
......@@ -74,7 +79,11 @@ impl<'a> std::fmt::Display for Node<'a> {
match self {
Self::BlkStart(block) => write!(formatter, "BlkStart @ {}", block.tid),
Self::BlkEnd(block) => write!(formatter, "BlkEnd @ {}", block.tid),
Self::CallReturn(block) => write!(formatter, "CallReturn (caller @ {})", block.tid),
Self::CallReturn { call, return_ } => write!(
formatter,
"CallReturn @ {} (caller @ {})",
return_.tid, call.tid
),
}
}
}
......@@ -203,8 +212,7 @@ impl<'a> GraphBuilder<'a> {
/// Add all outgoing edges generated by calls and interprocedural jumps for a specific block to the graph.
/// Return edges are *not* added by this function.
fn add_outgoing_edges(&mut self, node: NodeIndex) {
let block: &'a Term<Blk> = self.graph[node].get_block();
fn add_outgoing_edges(&mut self, node: NodeIndex, block: &'a Term<Blk>) {
let jumps = block.term.jmps.as_slice();
match jumps {
[] => (), // Blocks without jumps are dead ends corresponding to control flow reconstruction errors.
......@@ -231,13 +239,17 @@ impl<'a> GraphBuilder<'a> {
}
for (call_node, return_to_node) in self.return_addresses[&return_from_sub.tid].iter() {
let call_block = self.graph[*call_node].get_block();
let return_from_block = self.graph[return_source].get_block();
let call_term = call_block
.term
.jmps
.iter()
.find(|jump| matches!(jump.term.kind, JmpKind::Call(_)))
.unwrap();
let cr_combine_node = self.graph.add_node(Node::CallReturn(call_block));
let cr_combine_node = self.graph.add_node(Node::CallReturn {
call: call_block,
return_: return_from_block,
});
self.graph
.add_edge(*call_node, cr_combine_node, Edge::CRCallStub);
self.graph
......@@ -267,8 +279,8 @@ impl<'a> GraphBuilder<'a> {
/// Add all non-return-instruction-related jump edges to the graph.
fn add_jump_and_call_edges(&mut self) {
for node in self.graph.node_indices() {
if let Node::BlkEnd(_) = self.graph[node] {
self.add_outgoing_edges(node);
if let Node::BlkEnd(block) = self.graph[node] {
self.add_outgoing_edges(node, block);
}
}
}
......@@ -298,8 +310,8 @@ pub fn get_indices_of_block_nodes<'a, I: Iterator<Item = &'a Tid>>(
let tids: HashSet<Tid> = block_tids.cloned().collect();
let mut tid_to_indices_map = HashMap::new();
for node_index in graph.node_indices() {
if let Some(tid) = tids.get(&graph[node_index].get_block().tid) {
if let Node::BlkStart(_block_term) = graph[node_index] {
if let Node::BlkStart(block_term) = graph[node_index] {
if let Some(tid) = tids.get(&block_term.tid) {
let start_index = node_index;
let end_index = graph.neighbors(start_index).next().unwrap();
tid_to_indices_map.insert(tid.clone(), (start_index, end_index));
......
......@@ -85,6 +85,7 @@ pub trait Context<'a> {
value: &Self::Value,
value_before_call: Option<&Self::Value>,
call_term: &Term<Jmp>,
return_term: &Term<Jmp>,
) -> Option<Self::Value>;
/// Transition function for calls to functions not contained in the binary.
......@@ -158,9 +159,10 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
) -> Option<Self::NodeValue> {
let graph = self.context.get_graph();
let (start_node, end_node) = graph.edge_endpoints(edge).unwrap();
let block_term = graph.node_weight(start_node).unwrap().get_block();
match graph.edge_weight(edge).unwrap() {
Edge::Block => {
let block_term = graph.node_weight(start_node).unwrap().get_block();
let value = node_value.unwrap_value();
let defs = &block_term.term.defs;
let end_val = defs.iter().try_fold(value.clone(), |accum, def| {
......@@ -183,11 +185,18 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
Edge::CRCombine(call_term) => match node_value {
NodeValue::Value(_) => panic!("Unexpected interprocedural fixpoint graph state"),
NodeValue::CallReturnCombinator { call, return_ } => {
let return_from_block = match graph.node_weight(start_node) {
Some(Node::CallReturn { call: _, return_ }) => return_,
_ => panic!("Malformed Control flow graph"),
};
let return_from_jmp = &return_from_block.term.jmps[0];
if let Some(return_value) = return_ {
match self
.context
.update_return(return_value, call.as_ref(), call_term)
{
match self.context.update_return(
return_value,
call.as_ref(),
call_term,
return_from_jmp,
) {
Some(val) => Some(NodeValue::Value(val)),
None => None,
}
......
use crate::abstract_domain::*;
use crate::analysis::graph::Graph;
use crate::bil::Expression;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
use crate::term::*;
use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use super::state::State;
use super::Data;
pub struct Context<'a> {
pub graph: Graph<'a>,
pub project: &'a Project,
pub extern_symbol_map: BTreeMap<Tid, &'a ExternSymbol>,
pub cwe_collector: crossbeam_channel::Sender<CweWarning>,
pub log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> Context<'a> {
pub fn new(
project: &Project,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
log_collector: crossbeam_channel::Sender<LogMessage>,
) -> Context {
let mut extern_symbol_map = BTreeMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
let extern_symbol_tid_set: HashSet<Tid> = project
.program
.term
.extern_symbols
.iter()
.map(|symb| symb.tid.clone())
.collect();
let graph =
crate::analysis::graph::get_program_cfg(&project.program, extern_symbol_tid_set);
Context {
graph,
project,
extern_symbol_map,
cwe_collector,
log_collector,
}
}
pub fn log_debug<'_lt>(&self, result: Result<(), Error>, location: Option<&'_lt Tid>) {
if let Err(err) = result {
let log_message = LogMessage {
text: format!("Pointer Inference: {}", err),
level: LogLevel::Debug,
location: location.cloned(),
};
self.log_collector.send(log_message).unwrap();
}
}
}
impl<'a> crate::analysis::interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
fn get_graph(&self) -> &Graph<'a> {
&self.graph
}
fn merge(&self, value1: &State, value2: &State) -> State {
value1.merge(value2)
}
fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Option<Self::Value> {
// first check for use-after-frees
if state.contains_access_of_dangling_memory(&def.term.rhs) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: "0.1".to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Access through a dangling pointer at {}",
def.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
}
// TODO: handle loads in the right hand side expression for their side effects!
match &def.term.rhs {
Expression::Store { .. } => {
let mut state = state.clone();
self.log_debug(state.handle_store_exp(&def.term.rhs), Some(&def.tid));
Some(state)
}
Expression::IfThenElse {
condition,
true_exp,
false_exp,
} => {
// IfThenElse needs special handling, because it may encode conditional store instructions.
let mut true_state = state.clone();
if let Expression::Store { .. } = **true_exp {
self.log_debug(true_state.handle_store_exp(true_exp), Some(&def.tid));
} else {
self.log_debug(
true_state.handle_register_assign(&def.term.lhs, true_exp),
Some(&def.tid),
);
};
let mut false_state = state.clone();
if let Expression::Store { .. } = **false_exp {
self.log_debug(false_state.handle_store_exp(false_exp), Some(&def.tid));
} else {
self.log_debug(
false_state.handle_register_assign(&def.term.lhs, false_exp),
Some(&def.tid),
);
};
match state.eval(condition) {
Ok(Data::Value(cond)) if !cond.is_top() => {
if cond == Bitvector::from_bit(true).into() {
Some(true_state)
} else if cond == Bitvector::from_bit(false).into() {
Some(false_state)
} else {
panic!("IfThenElse with wrong condition bitsize encountered")
}
}
Ok(_) => Some(true_state.merge(&false_state)),
Err(err) => panic!("IfThenElse-Condition evaluation failed: {}", err),
}
}
expression => {
let mut new_state = state.clone();
self.log_debug(
new_state.handle_register_assign(&def.term.lhs, expression),
Some(&def.tid),
);
Some(new_state)
}
}
}
fn update_jump(
&self,
value: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State> {
// TODO: Implement some real specialization of conditionals!
let mut new_value = value.clone();
new_value.remove_virtual_register();
Some(new_value)
}
fn update_call(
&self,
state: &State,
call_term: &Term<Jmp>,
_target_node: &crate::analysis::graph::Node,
) -> Option<State> {
let call = if let JmpKind::Call(ref call) = call_term.term.kind {
call
} else {
panic!("Malformed control flow graph: Encountered call edge with a non-call jump term.")
};
if let Label::Direct(ref callee_tid) = call.target {
let callee_stack_id = AbstractIdentifier::new(
callee_tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let new_caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let stack_offset_adjustment = self.get_current_stack_offset(state);
let address_bitsize = self.project.stack_pointer_register.bitsize().unwrap();
let mut callee_state = state.clone();
callee_state.remove_virtual_register();
// Replace the caller stack id with one determined by the call instruction.
// This has to be done *before* adding the new callee stack id to avoid confusing caller and callee stack ids in case of recursive calls.
callee_state.replace_abstract_id(
&state.stack_id,
&new_caller_stack_id,
&stack_offset_adjustment,
);
// add a new memory object for the callee stack frame
callee_state.memory.add_abstract_object(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap()).into(),
super::object::ObjectType::Stack,
address_bitsize,
);
// set the new stack_id
callee_state.stack_id = callee_stack_id.clone();
// Set the stack pointer register to the callee stack id.
// At the beginning of a function this is the only known pointer to the new stack frame.
self.log_debug(
callee_state.set_register(
&self.project.stack_pointer_register,
PointerDomain::new(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap())
.into(),
)
.into(),
),
Some(&call_term.tid),
);
// set the list of caller stack ids to only this caller id
callee_state.caller_stack_ids = BTreeSet::new();
callee_state.caller_stack_ids.insert(new_caller_stack_id);
// Remove non-referenced objects and objects, only the caller knows about, from the state.
callee_state.ids_known_to_caller = BTreeSet::new();
callee_state.remove_unreferenced_objects();
// all remaining objects, except for the callee stack id, are also known to the caller
callee_state.ids_known_to_caller = callee_state.memory.get_all_object_ids();
callee_state.ids_known_to_caller.remove(&callee_stack_id);
Some(callee_state)
} else {
panic!("Indirect call edges not yet supported.")
// TODO: Support indirect call edges!
}
}
fn update_return(
&self,
state_before_return: &State,
state_before_call: Option<&State>,
call_term: &Term<Jmp>,
) -> Option<State> {
// TODO: For the long term we may have to replace the IDs representing callers with something
// that identifies the edge of the call and not just the callsite.
// When indirect calls are handled, the callsite alone is not a unique identifier anymore.
// This may lead to confusion if both caller and callee have the same ID in their respective caller_stack_id sets.
// we only return to functions with a value before the call to prevent returning to dead code
let state_before_call = match state_before_call {
Some(value) => value,
None => return None,
};
let original_caller_stack_id = &state_before_call.stack_id;
let caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let callee_stack_id = &state_before_return.stack_id;
let stack_offset_on_call = self.get_current_stack_offset(state_before_call);
// Check whether state_before_return actually knows the caller_stack_id.
// If not, we are returning from a state that cannot correspond to this callsite.
if !state_before_return
.caller_stack_ids
.contains(&caller_stack_id)
{
return None;
}
let mut state_after_return = state_before_return.clone();
state_after_return.remove_virtual_register();
// Remove the IDs of other callers not corresponding to this call
state_after_return.remove_other_caller_stack_ids(&caller_stack_id);
state_after_return.replace_abstract_id(
&caller_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call.clone()),
);
state_after_return.merge_callee_stack_to_caller_stack(
callee_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call),
);
state_after_return.stack_id = original_caller_stack_id.clone();
state_after_return.caller_stack_ids = state_before_call.caller_stack_ids.clone();
state_after_return.ids_known_to_caller = state_before_call.ids_known_to_caller.clone();
state_after_return.readd_caller_objects(state_before_call);
// remove non-referenced objects from the state
state_after_return.remove_unreferenced_objects();
// TODO: I need to detect and report cases where pointers to objects on the callee stack get returned, as this has its own CWE number!
// Detect and report cases, where knowledge about the offset of the stack pointer gets lost on return!
// Maybe add a fallback repair mechanism in these cases.
Some(state_after_return)
}
fn update_call_stub(&self, state: &State, call: &Term<Jmp>) -> Option<State> {
let mut new_state = state.clone();
let call_target = match &call.term.kind {
JmpKind::Call(call_inner) => &call_inner.target,
_ => panic!("Malformed control flow graph encountered."),
};
// Clear non-callee-saved registers from the state.
new_state.clear_non_callee_saved_register(&self.project.callee_saved_registers[..]);
// Set the stack register value.
// TODO: This is wrong if the extern call clears more from the stack than just the return address.
// TODO: a check on validity of the return address could also be useful here.
let stack_register = &self.project.stack_pointer_register;
{
let stack_pointer = state.get_register(stack_register).unwrap();
let offset = Bitvector::from_u16(stack_register.bitsize().unwrap() / 8)
.into_zero_extend(stack_register.bitsize().unwrap() as usize)
.unwrap();
self.log_debug(
new_state.set_register(
stack_register,
stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &offset.into()),
),
Some(&call.tid),
);
}
match call_target {
Label::Direct(tid) => {
if let Some(extern_symbol) = self.extern_symbol_map.get(tid) {
// TODO: Replace the hardcoded symbol matching by something configurable in config.json!
// TODO: This implementation ignores that allocation functions may return Null,
// since this is not yet representable in the state object.
// Check all parameter register for dangling pointers and report possible use-after-free if one is found.
for argument in extern_symbol
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
{
match state.eval(&argument.location) {
Ok(value) => {
if state.memory.is_dangling_pointer(&value) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: "0.1".to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!("(Use After Free) Call to {} may access freed memory at {}", extern_symbol.name, call.tid.address),
};
self.cwe_collector.send(warning).unwrap();
}
}
Err(err) => self.log_debug(
Err(err.context(format!(
"Function argument expression {:?} could not be evaluated",
argument.location
))),
Some(&call.tid),
),
}
}
match extern_symbol.name.as_str() {
"malloc" | "calloc" | "realloc" | "xmalloc" => {
if let Ok(return_register) = extern_symbol.get_unique_return_register()
{
let object_id = AbstractIdentifier::new(
call.tid.clone(),
AbstractLocation::from_var(return_register).unwrap(),
);
let address_bitsize =
self.project.stack_pointer_register.bitsize().unwrap();
new_state.memory.add_abstract_object(
object_id.clone(),
Bitvector::zero((address_bitsize as usize).into()).into(),
super::object::ObjectType::Heap,
address_bitsize,
);
let pointer = PointerDomain::new(
object_id,
Bitvector::zero((address_bitsize as usize).into()).into(),
);
self.log_debug(
new_state.set_register(return_register, pointer.into()),
Some(&call.tid),
);
Some(new_state)
} else {
// We cannot track the new object, since we do not know where to store the pointer to it.
// TODO: Return a diagnostics message to the user here.
Some(new_state)
}
}
"free" => {
match extern_symbol.get_unique_parameter() {
Ok(parameter_expression) => {
if let Ok(memory_object_pointer) =
state.eval(parameter_expression)
{
if let Data::Pointer(pointer) = memory_object_pointer {
if let Err(possible_double_free_object_ids) =
new_state.mark_mem_object_as_freed(&pointer)
{
let warning = CweWarning {
name: "CWE415".to_string(),
version: "0.1".to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: vec![possible_double_free_object_ids.into_iter().map(|id| {format!("{}", id)}).collect()],
description: format!("(Double Free) Object may have been freed before at {}", call.tid.address),
};
self.cwe_collector.send(warning).unwrap();
}
} // TODO: add diagnostics for else case
new_state.remove_unreferenced_objects();
Some(new_state)
} else {
// TODO: add diagnostics message for the user here
Some(new_state)
}
}
Err(err) => {
// We do not know which memory object to free
self.log_debug(Err(err), Some(&call.tid));
Some(new_state)
}
}
}
_ => {
self.log_debug(
new_state.clear_stack_parameter(extern_symbol),
Some(&call.tid),
);
let mut possible_referenced_ids = BTreeSet::new();
if extern_symbol.arguments.is_empty() {
// TODO: We assume here that we do not know the parameters and approximate them by all parameter registers.
// This approximation is wrong if the function is known but has neither parameters nor return values.
// We need to somehow distinguish these two cases.
// TODO: We need to cleanup stack memory below the current position of the stack pointer.
for parameter_register_name in
self.project.parameter_registers.iter()
{
if let Some(register_value) =
state.get_register_by_name(parameter_register_name)
{
possible_referenced_ids
.append(&mut register_value.referenced_ids());
}
}
} else {
for parameter in extern_symbol
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
{
if let Ok(data) = state.eval(&parameter.location) {
possible_referenced_ids.append(&mut data.referenced_ids());
}
}
}
possible_referenced_ids = state
.add_recursively_referenced_ids_to_id_set(possible_referenced_ids);
// Delete content of all referenced objects, as the function may write to them.
for id in possible_referenced_ids.iter() {
new_state
.memory
.mark_mem_object_as_untracked(id, &possible_referenced_ids);
}
Some(new_state)
}
}
} else {
panic!("Extern symbol not found.");
}
}
Label::Indirect(_) => unimplemented!("Handling of indirect edges not yet implemented"), // Right now this case should not exist. Decide how to handle only after it can actually occur.
}
}
fn specialize_conditional(
&self,
value: &State,
_condition: &Expression,
_is_true: bool,
) -> Option<State> {
// TODO: implement some real specialization of conditionals!
Some(value.clone())
}
}
impl<'a> Context<'a> {
fn get_current_stack_offset(&self, state: &State) -> BitvectorDomain {
if let Ok(Data::Pointer(ref stack_pointer)) =
state.get_register(&self.project.stack_pointer_register)
{
if stack_pointer.iter_targets().len() == 1 {
// TODO: add sanity check that the stack id is the expected id
let (_stack_id, stack_offset_domain) = stack_pointer.iter_targets().next().unwrap();
stack_offset_domain.clone()
} else {
BitvectorDomain::new_top(self.project.stack_pointer_register.bitsize().unwrap())
}
} else {
BitvectorDomain::new_top(self.project.stack_pointer_register.bitsize().unwrap())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bil::variable::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(time: &str, reg_name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(time),
AbstractLocation::Register(reg_name.to_string(), 64),
)
}
fn mock_extern_symbol(name: &str) -> ExternSymbol {
use crate::bil;
let arg = Arg {
var: register("RAX"),
location: bil::Expression::Var(register("RAX")),
intent: ArgIntent::Both,
};
ExternSymbol {
tid: Tid::new("extern_".to_string() + name),
address: "somewhere".into(),
name: name.into(),
calling_convention: None,
arguments: vec![arg],
}
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: crate::bil::variable::Type::Immediate(64),
is_temp: false,
}
}
fn reg_add_term(name: &str, value: i64, tid_name: &str) -> Term<Def> {
let add_expr = Expression::BinOp {
op: crate::bil::BinOpType::PLUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
};
Term {
tid: Tid::new(format!("{}", tid_name)),
term: Def {
lhs: register(name),
rhs: add_expr,
},
}
}
fn call_term(target_name: &str) -> Term<Jmp> {
let call = Call {
target: Label::Direct(Tid::new(target_name)),
return_: None,
};
Term {
tid: Tid::new(format!("call_{}", target_name)),
term: Jmp {
condition: None,
kind: JmpKind::Call(call),
},
}
}
fn mock_project() -> Project {
let program = Program {
subs: Vec::new(),
extern_symbols: vec![
mock_extern_symbol("malloc"),
mock_extern_symbol("free"),
mock_extern_symbol("other"),
],
entry_points: Vec::new(),
};
let program_term = Term {
tid: Tid::new("program"),
term: program,
};
Project {
program: program_term,
cpu_architecture: "mock_arch".to_string(),
stack_pointer_register: register("RSP"),
callee_saved_registers: vec!["callee_saved_reg".to_string()],
parameter_registers: vec!["RAX".to_string()],
}
}
#[test]
fn context_problem_implementation() {
use crate::analysis::interprocedural_fixpoint::Context as IpFpContext;
use crate::analysis::pointer_inference::Data;
use crate::bil::*;
use Expression::*;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, cwe_sender, log_sender);
let mut state = State::new(&register("RSP"), Tid::new("main"));
let def = Term {
tid: Tid::new("def"),
term: Def {
lhs: register("RSP"),
rhs: BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Var(register("RSP"))),
rhs: Box::new(Const(Bitvector::from_i64(-16))),
},
},
};
let store_term = Term {
tid: Tid::new("store"),
term: Def {
lhs: register("memory"), // technically false, but not checked at the moment
rhs: Store {
address: Box::new(Var(register("RSP"))),
endian: Endianness::LittleEndian,
memory: Box::new(Var(register("memory"))), // This is technically false, but the field is ignored at the moment
value: Box::new(Const(Bitvector::from_i64(42))),
size: 64,
},
},
};
// test update_def
state = context.update_def(&state, &def).unwrap();
let stack_pointer = Data::Pointer(PointerDomain::new(new_id("main", "RSP"), bv(-16)));
assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), stack_pointer);
state = context.update_def(&state, &store_term).unwrap();
// Test update_call
let target_block = Term {
tid: Tid::new("func_start"),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
};
let target_node = crate::analysis::graph::Node::BlkStart(&target_block);
let call = call_term("func");
let mut callee_state = context.update_call(&state, &call, &target_node).unwrap();
assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
assert_eq!(callee_state.caller_stack_ids.len(), 1);
assert_eq!(
callee_state.caller_stack_ids.iter().next().unwrap(),
&new_id("call_func", "RSP")
);
callee_state
.memory
.set_value(
PointerDomain::new(new_id("func", "RSP"), bv(-30)),
Data::Value(bv(33).into()),
)
.unwrap();
let return_state = context
.update_return(&callee_state, Some(&state), &call)
.unwrap();
assert_eq!(return_state.stack_id, new_id("main", "RSP"));
assert_eq!(return_state.caller_stack_ids, BTreeSet::new());
assert_eq!(
return_state.memory.get_internal_id_map(),
state.memory.get_internal_id_map()
);
assert_eq!(
return_state.get_register(&register("RSP")).unwrap(),
state.get_register(&register("RSP")).unwrap()
);
state
.set_register(&register("callee_saved_reg"), Data::Value(bv(13)))
.unwrap();
state
.set_register(&register("other_reg"), Data::Value(bv(14)))
.unwrap();
let malloc = call_term("extern_malloc");
let mut state_after_malloc = context.update_call_stub(&state, &malloc).unwrap();
assert_eq!(
state_after_malloc.get_register(&register("RAX")).unwrap(),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0)
))
);
assert_eq!(state_after_malloc.memory.get_num_objects(), 2);
assert_eq!(
state_after_malloc.get_register(&register("RSP")).unwrap(),
state
.get_register(&register("RSP"))
.unwrap()
.bin_op(BinOpType::PLUS, &Data::Value(bv(8)))
);
assert_eq!(
state_after_malloc
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Value(bv(13))
);
assert!(state_after_malloc
.get_register(&register("other_reg"))
.unwrap()
.is_top());
state_after_malloc
.set_register(
&register("callee_saved_reg"),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0),
)),
)
.unwrap();
let free = call_term("extern_free");
let state_after_free = context
.update_call_stub(&state_after_malloc, &free)
.unwrap();
assert!(state_after_free
.get_register(&register("RAX"))
.unwrap()
.is_top());
assert_eq!(state_after_free.memory.get_num_objects(), 2);
assert_eq!(
state_after_free
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0)
))
);
let other_extern_fn = call_term("extern_other");
let state_after_other_fn = context.update_call_stub(&state, &other_extern_fn).unwrap();
assert_eq!(
state_after_other_fn.get_register(&register("RSP")).unwrap(),
state
.get_register(&register("RSP"))
.unwrap()
.bin_op(BinOpType::PLUS, &Data::Value(bv(8)))
);
assert_eq!(
state_after_other_fn
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Value(bv(13))
);
assert!(state_after_other_fn
.get_register(&register("other_reg"))
.unwrap()
.is_top());
}
#[test]
fn update_return() {
use crate::analysis::interprocedural_fixpoint::Context as IpFpContext;
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, cwe_sender, log_sender);
let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
let mut state_before_return = context
.update_def(
&state_before_return,
&reg_add_term("RSP", 8, "stack_offset_on_return_adjustment"),
)
.unwrap();
let callsite_id = new_id("call_callee", "RSP");
state_before_return.memory.add_abstract_object(
callsite_id.clone(),
bv(0).into(),
ObjectType::Stack,
64,
);
state_before_return
.caller_stack_ids
.insert(callsite_id.clone());
state_before_return
.ids_known_to_caller
.insert(callsite_id.clone());
let other_callsite_id = new_id("call_callee_other", "RSP");
state_before_return.memory.add_abstract_object(
other_callsite_id.clone(),
bv(0).into(),
ObjectType::Stack,
64,
);
state_before_return
.caller_stack_ids
.insert(other_callsite_id.clone());
state_before_return
.ids_known_to_caller
.insert(other_callsite_id.clone());
state_before_return
.set_register(
&register("RAX"),
Data::Pointer(PointerDomain::new(
new_id("call_callee_other", "RSP"),
bv(-32),
)),
)
.unwrap();
let state_before_call = State::new(&register("RSP"), Tid::new("original_caller_id"));
let mut state_before_call = context
.update_def(
&state_before_call,
&reg_add_term("RSP", -16, "stack_offset_on_call_adjustment"),
)
.unwrap();
let caller_caller_id = new_id("caller_caller", "RSP");
state_before_call.memory.add_abstract_object(
caller_caller_id.clone(),
bv(0).into(),
ObjectType::Stack,
64,
);
state_before_call
.caller_stack_ids
.insert(caller_caller_id.clone());
state_before_call
.ids_known_to_caller
.insert(caller_caller_id.clone());
let state = context
.update_return(
&state_before_return,
Some(&state_before_call),
&call_term("callee"),
)
.unwrap();
let mut caller_caller_set = BTreeSet::new();
caller_caller_set.insert(caller_caller_id);
assert_eq!(state.ids_known_to_caller, caller_caller_set.clone());
assert_eq!(state.caller_stack_ids, caller_caller_set.clone());
assert_eq!(state.stack_id, new_id("original_caller_id", "RSP"));
assert!(state_before_return.memory.get_all_object_ids().len() == 3);
assert!(state.memory.get_all_object_ids().len() == 2);
assert!(state
.memory
.get_all_object_ids()
.get(&new_id("original_caller_id", "RSP"))
.is_some());
assert!(state
.memory
.get_all_object_ids()
.get(&new_id("caller_caller", "RSP"))
.is_some());
assert!(state.get_register(&register("RSP")).is_ok());
let expected_rsp = Data::Pointer(PointerDomain::new(
new_id("original_caller_id", "RSP"),
bv(-8),
));
assert_eq!(state.get_register(&register("RSP")).unwrap(), expected_rsp);
}
}
use super::object::ObjectType;
use crate::abstract_domain::*;
use crate::analysis::graph::Graph;
use crate::bil::Expression;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
use crate::term::*;
use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use super::state::State;
use super::{Data, VERSION};
// contains trait implementations for the `Context` struct,
// especially the implementation of the `interprocedural_fixpoint::Context` trait.
mod trait_impls;
/// Contains all context information needed for the pointer inference fixpoint computation.
///
/// The struct also implements the `interprocedural_fixpoint::Context` trait to enable the fixpoint computation.
pub struct Context<'a> {
/// The program control flow graph on which the fixpoint will be computed
pub graph: Graph<'a>,
/// A reference to the `Project` object representing the binary
pub project: &'a Project,
/// Maps the TIDs of functions that shall be treated as extern symbols to the `ExternSymbol` object representing it.
pub extern_symbol_map: BTreeMap<Tid, &'a ExternSymbol>,
/// A channel where found CWE warnings should be sent to.
/// The receiver may filter or modify the warnings before presenting them to the user.
/// For example, the same CWE warning will be found several times
/// if the fixpoint computation does not instantly stabilize at the corresponding code point.
/// These duplicates need to be filtered out.
pub cwe_collector: crossbeam_channel::Sender<CweWarning>,
/// A channel where log messages should be sent to.
pub log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> Context<'a> {
/// Create a new context object for a given project.
/// Also needs two channels as input to know where CWE warnings and log messages should be sent to.
pub fn new(
project: &Project,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
log_collector: crossbeam_channel::Sender<LogMessage>,
) -> Context {
let mut extern_symbol_map = BTreeMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
let extern_symbol_tid_set: HashSet<Tid> = project
.program
.term
.extern_symbols
.iter()
.map(|symb| symb.tid.clone())
.collect();
let graph =
crate::analysis::graph::get_program_cfg(&project.program, extern_symbol_tid_set);
Context {
graph,
project,
extern_symbol_map,
cwe_collector,
log_collector,
}
}
/// If `result` is an `Err`, log the error message as a debug message through the `log_collector` channel.
pub fn log_debug<'_lt>(&self, result: Result<(), Error>, location: Option<&'_lt Tid>) {
if let Err(err) = result {
let log_message = LogMessage {
text: format!("Pointer Inference: {}", err),
level: LogLevel::Debug,
location: location.cloned(),
};
self.log_collector.send(log_message).unwrap();
}
}
/// Detect and log if the stack pointer is not as expected when returning from a function.
fn detect_stack_pointer_information_loss_on_return(
&self,
state_before_return: &State,
return_term: &Term<Jmp>,
) {
let expected_stack_pointer_offset = match self.project.cpu_architecture.as_str() {
"x86" | "x86_64" => Bitvector::from_u16(self.project.get_pointer_bitsize() / 8)
.into_zero_extend(self.project.get_pointer_bitsize() as usize)
.unwrap(),
_ => Bitvector::zero((self.project.get_pointer_bitsize() as usize).into()),
};
match state_before_return.get_register(&self.project.stack_pointer_register) {
Ok(Data::Pointer(pointer)) => {
if pointer.targets().len() == 1 {
let (id, offset) = pointer.targets().iter().next().unwrap();
if *id != state_before_return.stack_id
|| *offset != expected_stack_pointer_offset.into()
{
self.log_debug(
Err(anyhow!(
"Unexpected stack register value at return instruction"
)),
Some(&return_term.tid),
);
}
}
}
Ok(Data::Top(_)) => self.log_debug(
Err(anyhow!(
"Stack register value lost during function execution"
)),
Some(&return_term.tid),
),
Ok(Data::Value(_)) => self.log_debug(
Err(anyhow!("Unexpected stack register value on return")),
Some(&return_term.tid),
),
Err(err) => self.log_debug(Err(err), Some(&return_term.tid)),
}
}
/// Add a new abstract object and a pointer to it in the return register of an extern call.
/// This models the behaviour of `malloc`-like functions,
/// except that we cannot represent possible `NULL` pointers as return values yet.
fn add_new_object_in_call_return_register(
&self,
mut state: State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) -> Option<State> {
match extern_symbol.get_unique_return_register() {
Ok(return_register) => {
let object_id = AbstractIdentifier::new(
call.tid.clone(),
AbstractLocation::from_var(return_register).unwrap(),
);
let address_bitsize = self.project.stack_pointer_register.bitsize().unwrap();
state.memory.add_abstract_object(
object_id.clone(),
Bitvector::zero((address_bitsize as usize).into()).into(),
super::object::ObjectType::Heap,
address_bitsize,
);
let pointer = PointerDomain::new(
object_id,
Bitvector::zero((address_bitsize as usize).into()).into(),
);
self.log_debug(
state.set_register(return_register, pointer.into()),
Some(&call.tid),
);
Some(state)
}
Err(err) => {
// We cannot track the new object, since we do not know where to store the pointer to it.
self.log_debug(Err(err), Some(&call.tid));
Some(state)
}
}
}
/// Mark the object that the parameter of a call is pointing to as freed.
/// If the object may have been already freed, generate a CWE warning.
/// This models the behaviour of `free` and similar functions.
fn mark_parameter_object_as_freed(
&self,
state: &State,
mut new_state: State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) -> Option<State> {
match extern_symbol.get_unique_parameter() {
Ok(parameter_expression) => match state.eval(parameter_expression) {
Ok(memory_object_pointer) => {
if let Data::Pointer(pointer) = memory_object_pointer {
if let Err(possible_double_frees) =
new_state.mark_mem_object_as_freed(&pointer)
{
let warning = CweWarning {
name: "CWE415".to_string(),
version: VERSION.to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: vec![possible_double_frees
.into_iter()
.map(|(id, err)| format!("{}: {}", id, err))
.collect()],
description: format!(
"(Double Free) Object may have been freed before at {}",
call.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
}
} else {
self.log_debug(
Err(anyhow!("Free on a non-pointer value called.")),
Some(&call.tid),
);
}
new_state.remove_unreferenced_objects();
Some(new_state)
}
Err(err) => {
self.log_debug(Err(err), Some(&call.tid));
Some(new_state)
}
},
Err(err) => {
// We do not know which memory object to free
self.log_debug(Err(err), Some(&call.tid));
Some(new_state)
}
}
}
/// Check all parameter registers of a call for dangling pointers and report possible use-after-frees.
fn check_parameter_register_for_dangling_pointer(
&self,
state: &State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) {
for argument in extern_symbol
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
{
match state.eval(&argument.location) {
Ok(value) => {
if state.memory.is_dangling_pointer(&value, true) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: VERSION.to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Call to {} may access freed memory at {}",
extern_symbol.name, call.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
}
}
Err(err) => self.log_debug(
Err(err.context(format!(
"Function argument expression {:?} could not be evaluated",
argument.location
))),
Some(&call.tid),
),
}
}
}
/// Handle an extern symbol call, whose concrete effect on the state is unknown.
/// Basically, we assume that the call may write to all memory objects and register that is has access to.
fn handle_generic_extern_call(
&self,
state: &State,
mut new_state: State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) -> Option<State> {
self.log_debug(
new_state.clear_stack_parameter(extern_symbol),
Some(&call.tid),
);
let mut possible_referenced_ids = BTreeSet::new();
if extern_symbol.arguments.is_empty() {
// We assume here that we do not know the parameters and approximate them by all possible parameter registers.
// This approximation is wrong if the function is known but has neither parameters nor return values.
// We cannot distinguish these two cases yet.
for parameter_register_name in self.project.parameter_registers.iter() {
if let Some(register_value) = state.get_register_by_name(parameter_register_name) {
possible_referenced_ids.append(&mut register_value.referenced_ids());
}
}
} else {
for parameter in extern_symbol
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
{
if let Ok(data) = state.eval(&parameter.location) {
possible_referenced_ids.append(&mut data.referenced_ids());
}
}
}
possible_referenced_ids =
state.add_recursively_referenced_ids_to_id_set(possible_referenced_ids);
// Delete content of all referenced objects, as the function may write to them.
for id in possible_referenced_ids.iter() {
new_state
.memory
.assume_arbitrary_writes_to_object(id, &possible_referenced_ids);
}
Some(new_state)
}
/// Get the offset of the current stack pointer to the base of the current stack frame.
fn get_current_stack_offset(&self, state: &State) -> BitvectorDomain {
if let Ok(Data::Pointer(ref stack_pointer)) =
state.get_register(&self.project.stack_pointer_register)
{
if stack_pointer.targets().len() == 1 {
let (stack_id, stack_offset_domain) =
stack_pointer.targets().iter().next().unwrap();
if *stack_id == state.stack_id {
stack_offset_domain.clone()
} else {
BitvectorDomain::new_top(stack_pointer.bitsize())
}
} else {
BitvectorDomain::new_top(self.project.stack_pointer_register.bitsize().unwrap())
}
} else {
BitvectorDomain::new_top(self.project.stack_pointer_register.bitsize().unwrap())
}
}
}
#[cfg(test)]
mod tests;
use super::*;
use crate::bil::variable::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(time: &str, reg_name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(time),
AbstractLocation::Register(reg_name.to_string(), 64),
)
}
fn mock_extern_symbol(name: &str) -> ExternSymbol {
use crate::bil;
let arg = Arg {
var: register("RAX"),
location: bil::Expression::Var(register("RAX")),
intent: ArgIntent::Both,
};
ExternSymbol {
tid: Tid::new("extern_".to_string() + name),
address: "somewhere".into(),
name: name.into(),
calling_convention: None,
arguments: vec![arg],
}
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: crate::bil::variable::Type::Immediate(64),
is_temp: false,
}
}
fn reg_add_term(name: &str, value: i64, tid_name: &str) -> Term<Def> {
let add_expr = Expression::BinOp {
op: crate::bil::BinOpType::PLUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
};
Term {
tid: Tid::new(format!("{}", tid_name)),
term: Def {
lhs: register(name),
rhs: add_expr,
},
}
}
fn call_term(target_name: &str) -> Term<Jmp> {
let call = Call {
target: Label::Direct(Tid::new(target_name)),
return_: None,
};
Term {
tid: Tid::new(format!("call_{}", target_name)),
term: Jmp {
condition: None,
kind: JmpKind::Call(call),
},
}
}
fn return_term(target_name: &str) -> Term<Jmp> {
Term {
tid: Tid::new(format!("return")),
term: Jmp {
condition: None,
kind: JmpKind::Return(Label::Direct(Tid::new(target_name))),
},
}
}
fn mock_project() -> Project {
let program = Program {
subs: Vec::new(),
extern_symbols: vec![
mock_extern_symbol("malloc"),
mock_extern_symbol("free"),
mock_extern_symbol("other"),
],
entry_points: Vec::new(),
};
let program_term = Term {
tid: Tid::new("program"),
term: program,
};
Project {
program: program_term,
cpu_architecture: "x86_64".to_string(),
stack_pointer_register: register("RSP"),
callee_saved_registers: vec!["callee_saved_reg".to_string()],
parameter_registers: vec!["RAX".to_string()],
}
}
#[test]
fn context_problem_implementation() {
use crate::analysis::interprocedural_fixpoint::Context as IpFpContext;
use crate::analysis::pointer_inference::Data;
use crate::bil::*;
use Expression::*;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, cwe_sender, log_sender);
let mut state = State::new(&register("RSP"), Tid::new("main"));
let def = Term {
tid: Tid::new("def"),
term: Def {
lhs: register("RSP"),
rhs: BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Var(register("RSP"))),
rhs: Box::new(Const(Bitvector::from_i64(-16))),
},
},
};
let store_term = Term {
tid: Tid::new("store"),
term: Def {
lhs: register("memory"), // technically false, but not checked at the moment
rhs: Store {
address: Box::new(Var(register("RSP"))),
endian: Endianness::LittleEndian,
memory: Box::new(Var(register("memory"))), // This is technically false, but the field is ignored at the moment
value: Box::new(Const(Bitvector::from_i64(42))),
size: 64,
},
},
};
// test update_def
state = context.update_def(&state, &def).unwrap();
let stack_pointer = Data::Pointer(PointerDomain::new(new_id("main", "RSP"), bv(-16)));
assert_eq!(state.eval(&Var(register("RSP"))).unwrap(), stack_pointer);
state = context.update_def(&state, &store_term).unwrap();
// Test update_call
let target_block = Term {
tid: Tid::new("func_start"),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
};
let target_node = crate::analysis::graph::Node::BlkStart(&target_block);
let call = call_term("func");
let mut callee_state = context.update_call(&state, &call, &target_node).unwrap();
assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
assert_eq!(callee_state.caller_stack_ids.len(), 1);
assert_eq!(
callee_state.caller_stack_ids.iter().next().unwrap(),
&new_id("call_func", "RSP")
);
callee_state
.memory
.set_value(
PointerDomain::new(new_id("func", "RSP"), bv(-30)),
Data::Value(bv(33).into()),
)
.unwrap();
let return_state = context
.update_return(
&callee_state,
Some(&state),
&call,
&return_term("return_target"),
)
.unwrap();
assert_eq!(return_state.stack_id, new_id("main", "RSP"));
assert_eq!(return_state.caller_stack_ids, BTreeSet::new());
assert_eq!(
return_state.memory.get_internal_id_map(),
state.memory.get_internal_id_map()
);
assert_eq!(
return_state.get_register(&register("RSP")).unwrap(),
state.get_register(&register("RSP")).unwrap()
);
state
.set_register(&register("callee_saved_reg"), Data::Value(bv(13)))
.unwrap();
state
.set_register(&register("other_reg"), Data::Value(bv(14)))
.unwrap();
let malloc = call_term("extern_malloc");
let mut state_after_malloc = context.update_call_stub(&state, &malloc).unwrap();
assert_eq!(
state_after_malloc.get_register(&register("RAX")).unwrap(),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0)
))
);
assert_eq!(state_after_malloc.memory.get_num_objects(), 2);
assert_eq!(
state_after_malloc.get_register(&register("RSP")).unwrap(),
state
.get_register(&register("RSP"))
.unwrap()
.bin_op(BinOpType::PLUS, &Data::Value(bv(8)))
);
assert_eq!(
state_after_malloc
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Value(bv(13))
);
assert!(state_after_malloc
.get_register(&register("other_reg"))
.unwrap()
.is_top());
state_after_malloc
.set_register(
&register("callee_saved_reg"),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0),
)),
)
.unwrap();
let free = call_term("extern_free");
let state_after_free = context
.update_call_stub(&state_after_malloc, &free)
.unwrap();
assert!(state_after_free
.get_register(&register("RAX"))
.unwrap()
.is_top());
assert_eq!(state_after_free.memory.get_num_objects(), 2);
assert_eq!(
state_after_free
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Pointer(PointerDomain::new(
new_id("call_extern_malloc", "RAX"),
bv(0)
))
);
let other_extern_fn = call_term("extern_other");
let state_after_other_fn = context.update_call_stub(&state, &other_extern_fn).unwrap();
assert_eq!(
state_after_other_fn.get_register(&register("RSP")).unwrap(),
state
.get_register(&register("RSP"))
.unwrap()
.bin_op(BinOpType::PLUS, &Data::Value(bv(8)))
);
assert_eq!(
state_after_other_fn
.get_register(&register("callee_saved_reg"))
.unwrap(),
Data::Value(bv(13))
);
assert!(state_after_other_fn
.get_register(&register("other_reg"))
.unwrap()
.is_top());
}
#[test]
fn update_return() {
use crate::analysis::interprocedural_fixpoint::Context as IpFpContext;
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, cwe_sender, log_sender);
let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
let mut state_before_return = context
.update_def(
&state_before_return,
&reg_add_term("RSP", 8, "stack_offset_on_return_adjustment"),
)
.unwrap();
let callsite_id = new_id("call_callee", "RSP");
state_before_return.memory.add_abstract_object(
callsite_id.clone(),
bv(0).into(),
ObjectType::Stack,
64,
);
state_before_return
.caller_stack_ids
.insert(callsite_id.clone());
state_before_return
.ids_known_to_caller
.insert(callsite_id.clone());
let other_callsite_id = new_id("call_callee_other", "RSP");
state_before_return.memory.add_abstract_object(
other_callsite_id.clone(),
bv(0).into(),
ObjectType::Stack,
64,
);
state_before_return
.caller_stack_ids
.insert(other_callsite_id.clone());
state_before_return
.ids_known_to_caller
.insert(other_callsite_id.clone());
state_before_return
.set_register(
&register("RAX"),
Data::Pointer(PointerDomain::new(
new_id("call_callee_other", "RSP"),
bv(-32),
)),
)
.unwrap();
let state_before_call = State::new(&register("RSP"), Tid::new("original_caller_id"));
let mut state_before_call = context
.update_def(
&state_before_call,
&reg_add_term("RSP", -16, "stack_offset_on_call_adjustment"),
)
.unwrap();
let caller_caller_id = new_id("caller_caller", "RSP");
state_before_call.memory.add_abstract_object(
caller_caller_id.clone(),
bv(0).into(),
ObjectType::Stack,
64,
);
state_before_call
.caller_stack_ids
.insert(caller_caller_id.clone());
state_before_call
.ids_known_to_caller
.insert(caller_caller_id.clone());
let state = context
.update_return(
&state_before_return,
Some(&state_before_call),
&call_term("callee"),
&return_term("return_target"),
)
.unwrap();
let mut caller_caller_set = BTreeSet::new();
caller_caller_set.insert(caller_caller_id);
assert_eq!(state.ids_known_to_caller, caller_caller_set.clone());
assert_eq!(state.caller_stack_ids, caller_caller_set.clone());
assert_eq!(state.stack_id, new_id("original_caller_id", "RSP"));
assert!(state_before_return.memory.get_all_object_ids().len() == 3);
assert!(state.memory.get_all_object_ids().len() == 2);
assert!(state
.memory
.get_all_object_ids()
.get(&new_id("original_caller_id", "RSP"))
.is_some());
assert!(state
.memory
.get_all_object_ids()
.get(&new_id("caller_caller", "RSP"))
.is_some());
assert!(state.get_register(&register("RSP")).is_ok());
let expected_rsp = Data::Pointer(PointerDomain::new(
new_id("original_caller_id", "RSP"),
bv(-8),
));
assert_eq!(state.get_register(&register("RSP")).unwrap(), expected_rsp);
}
use super::*;
impl<'a> crate::analysis::interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
/// Get the underlying graph on which the analysis operates.
fn get_graph(&self) -> &Graph<'a> {
&self.graph
}
/// Merge two state values.
fn merge(&self, value1: &State, value2: &State) -> State {
value1.merge(value2)
}
/// Update the state according to the effects of the given `Def` term.
fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Option<Self::Value> {
// first check for use-after-frees
if state.contains_access_of_dangling_memory(&def.term.rhs) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: VERSION.to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Access through a dangling pointer at {}",
def.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
}
match &def.term.rhs {
Expression::IfThenElse {
condition,
true_exp,
false_exp,
} => {
// IfThenElse needs special handling, because it may encode conditional store instructions.
let mut true_state = state.clone();
if let Expression::Store { .. } = **true_exp {
self.log_debug(true_state.handle_store_exp(true_exp), Some(&def.tid));
} else {
self.log_debug(
true_state.handle_register_assign(&def.term.lhs, true_exp),
Some(&def.tid),
);
};
let mut false_state = state.clone();
if let Expression::Store { .. } = **false_exp {
self.log_debug(false_state.handle_store_exp(false_exp), Some(&def.tid));
} else {
self.log_debug(
false_state.handle_register_assign(&def.term.lhs, false_exp),
Some(&def.tid),
);
};
match state.eval(condition) {
Ok(Data::Value(cond)) if !cond.is_top() => {
if cond == Bitvector::from_bit(true).into() {
Some(true_state)
} else if cond == Bitvector::from_bit(false).into() {
Some(false_state)
} else {
panic!("IfThenElse with wrong condition bitsize encountered")
}
}
Ok(_) => Some(true_state.merge(&false_state)),
Err(err) => panic!("IfThenElse-Condition evaluation failed: {}", err),
}
}
Expression::Store { .. } => {
let mut state = state.clone();
self.log_debug(state.handle_store_exp(&def.term.rhs), Some(&def.tid));
Some(state)
}
expression => {
let mut new_state = state.clone();
self.log_debug(
new_state.handle_register_assign(&def.term.lhs, expression),
Some(&def.tid),
);
Some(new_state)
}
}
}
/// Update the state according to the effects of the given `Jmp` term.
/// Right now this only removes virtual registers from the state,
/// as specialization for conditional jumps is not implemented yet.
fn update_jump(
&self,
value: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State> {
let mut new_value = value.clone();
new_value.remove_virtual_register();
Some(new_value)
}
/// Update the state according to the effects of the given `Call` term.
/// The resulting state is the state at the start of the call target function.
fn update_call(
&self,
state: &State,
call_term: &Term<Jmp>,
_target_node: &crate::analysis::graph::Node,
) -> Option<State> {
let call = if let JmpKind::Call(ref call) = call_term.term.kind {
call
} else {
panic!("Malformed control flow graph: Encountered call edge with a non-call jump term.")
};
if let Label::Direct(ref callee_tid) = call.target {
let callee_stack_id = AbstractIdentifier::new(
callee_tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let new_caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let stack_offset_adjustment = self.get_current_stack_offset(state);
let address_bitsize = self.project.stack_pointer_register.bitsize().unwrap();
let mut callee_state = state.clone();
callee_state.remove_virtual_register();
// Replace the caller stack ID with one determined by the call instruction.
// This has to be done *before* adding the new callee stack id to avoid confusing caller and callee stack ids in case of recursive calls.
callee_state.replace_abstract_id(
&state.stack_id,
&new_caller_stack_id,
&stack_offset_adjustment,
);
// add a new memory object for the callee stack frame
callee_state.memory.add_abstract_object(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap()).into(),
ObjectType::Stack,
address_bitsize,
);
// set the new stack_id
callee_state.stack_id = callee_stack_id.clone();
// Set the stack pointer register to the callee stack id.
// At the beginning of a function this is the only known pointer to the new stack frame.
self.log_debug(
callee_state.set_register(
&self.project.stack_pointer_register,
PointerDomain::new(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap())
.into(),
)
.into(),
),
Some(&call_term.tid),
);
// set the list of caller stack ids to only this caller id
callee_state.caller_stack_ids = BTreeSet::new();
callee_state.caller_stack_ids.insert(new_caller_stack_id);
// Remove non-referenced objects and objects, only the caller knows about, from the state.
callee_state.ids_known_to_caller = BTreeSet::new();
callee_state.remove_unreferenced_objects();
// all remaining objects, except for the callee stack id, are also known to the caller
callee_state.ids_known_to_caller = callee_state.memory.get_all_object_ids();
callee_state.ids_known_to_caller.remove(&callee_stack_id);
Some(callee_state)
} else {
panic!("Indirect call edges not yet supported.")
}
}
/// Update the state according to the effects of the given return instruction.
/// The `state_before_call` is used to reconstruct caller-specific information like the caller stack frame.
fn update_return(
&self,
state_before_return: &State,
state_before_call: Option<&State>,
call_term: &Term<Jmp>,
return_term: &Term<Jmp>,
) -> Option<State> {
// TODO: For the long term we may have to replace the IDs representing callers with something
// that identifies the edge of the call and not just the callsite.
// When indirect calls are handled, the callsite alone is not a unique identifier anymore.
// This may lead to confusion if both caller and callee have the same ID in their respective caller_stack_id sets.
// we only return to functions with a value before the call to prevent returning to dead code
let state_before_call = match state_before_call {
Some(value) => value,
None => return None,
};
let original_caller_stack_id = &state_before_call.stack_id;
let caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let callee_stack_id = &state_before_return.stack_id;
let stack_offset_on_call = self.get_current_stack_offset(state_before_call);
// Detect possible information loss on the stack pointer and report it.
self.detect_stack_pointer_information_loss_on_return(state_before_return, return_term);
// Check whether state_before_return actually knows the `caller_stack_id`.
// If not, we are returning from a state that cannot correspond to this callsite.
if !state_before_return
.caller_stack_ids
.contains(&caller_stack_id)
{
return None;
}
let mut state_after_return = state_before_return.clone();
state_after_return.remove_virtual_register();
// Remove the IDs of other callers not corresponding to this call
state_after_return.remove_other_caller_stack_ids(&caller_stack_id);
state_after_return.replace_abstract_id(
&caller_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call.clone()),
);
state_after_return.merge_callee_stack_to_caller_stack(
callee_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call),
);
state_after_return.stack_id = original_caller_stack_id.clone();
state_after_return.caller_stack_ids = state_before_call.caller_stack_ids.clone();
state_after_return.ids_known_to_caller = state_before_call.ids_known_to_caller.clone();
state_after_return.readd_caller_objects(state_before_call);
// remove non-referenced objects from the state
state_after_return.remove_unreferenced_objects();
Some(state_after_return)
}
/// Update the state according to the effect of a call to an extern symbol.
fn update_call_stub(&self, state: &State, call: &Term<Jmp>) -> Option<State> {
let mut new_state = state.clone();
let call_target = match &call.term.kind {
JmpKind::Call(call_inner) => &call_inner.target,
_ => panic!("Malformed control flow graph encountered."),
};
// Clear non-callee-saved registers from the state.
new_state.clear_non_callee_saved_register(&self.project.callee_saved_registers[..]);
// On x86, remove the return address from the stack (other architectures pass the return address in a register, not on the stack).
// Note that in some calling conventions the callee also clears function parameters from the stack.
// We do not detect and handle these cases yet.
let stack_register = &self.project.stack_pointer_register;
let stack_pointer = state.get_register(stack_register).unwrap();
match self.project.cpu_architecture.as_str() {
"x86" | "x86_64" => {
let offset = Bitvector::from_u16(stack_register.bitsize().unwrap() / 8)
.into_zero_extend(stack_register.bitsize().unwrap() as usize)
.unwrap();
self.log_debug(
new_state.set_register(
stack_register,
stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &offset.into()),
),
Some(&call.tid),
);
}
_ => self.log_debug(
new_state.set_register(stack_register, stack_pointer),
Some(&call.tid),
),
}
match call_target {
Label::Direct(tid) => {
if let Some(extern_symbol) = self.extern_symbol_map.get(tid) {
// Check parameter for possible use-after-frees
self.check_parameter_register_for_dangling_pointer(state, call, extern_symbol);
match extern_symbol.name.as_str() {
"malloc" | "calloc" | "realloc" | "xmalloc" => self
.add_new_object_in_call_return_register(new_state, call, extern_symbol),
"free" => self.mark_parameter_object_as_freed(
state,
new_state,
call,
extern_symbol,
),
_ => self.handle_generic_extern_call(state, new_state, call, extern_symbol),
}
} else {
panic!("Extern symbol not found.");
}
}
Label::Indirect(_) => unimplemented!("Handling of indirect edges not yet implemented"), // Right now this case should not exist. Decide how to handle only after it can actually occur.
}
}
/// Update the state with the knowledge that some conditional evaluated to true or false.
/// Currently not implemented, this function just returns the state as it is.
fn specialize_conditional(
&self,
value: &State,
_condition: &Expression,
_is_true: bool,
) -> Option<State> {
Some(value.clone())
}
}
//! The pointer inference analysis.
//!
//! The goal of the pointer inference analysis is to keep track of all memory objects and pointers
//! that the program knows about at specific program points during execution.
//! Possible memory management errors, like access to memory that may already have been freed,
//! are reported to the user.
//!
//! Keep in mind that the analysis operates on a best-effort basis.
//! In cases where we cannot know
//! whether an error is due to an error in the memory management of the program under analysis
//! or due to inexactness of the pointer inference analysis itself,
//! we try to treat is as the more likely (but not necessarily true) case of the two.
use super::interprocedural_fixpoint::{Computation, NodeValue};
use crate::abstract_domain::{BitvectorDomain, DataDomain};
use crate::analysis::graph::{Graph, Node};
......@@ -16,15 +29,20 @@ mod state;
use context::Context;
use state::State;
/// The version number of the analysis.
const VERSION: &str = "0.1";
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
/// A wrapper struct for the pointer inference computation object.
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
}
impl<'a> PointerInference<'a> {
/// Generate a new pointer inference compuation for a project.
pub fn new(
project: &'a Project,
cwe_sender: crossbeam_channel::Sender<CweWarning>,
......@@ -90,12 +108,14 @@ impl<'a> PointerInference<'a> {
}
}
/// Compute the fixpoint of the pointer inference analysis.
/// Has a `max_steps` bound for the fixpoint algorithm to prevent infinite loops.
pub fn compute(&mut self) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
}
/// Print results serialized as YAML to stdout
pub fn print_yaml(&self) {
// Print results serialized as YAML to stdout
let graph = self.computation.get_graph();
for (node_index, value) in self.computation.node_values().iter() {
let node = graph.node_weight(*node_index).unwrap();
......@@ -111,6 +131,9 @@ impl<'a> PointerInference<'a> {
}
}
/// Generate a compacted json representation of the results.
/// Note that this output cannot be used for serialization/deserialization,
/// but is only intended for user output.
pub fn generate_compact_json(&self) -> serde_json::Value {
let graph = self.computation.get_graph();
let mut json_nodes = serde_json::Map::new();
......@@ -136,7 +159,7 @@ impl<'a> PointerInference<'a> {
/// Since indirect jumps and calls are not handled yet (TODO: change that),
/// the analysis may miss a *lot* of code in some cases.
/// To remedy this somewhat,
/// we mark all function starts, that are also roots in the control flow graph,
/// we mark all function starts, that are also roots in the control flow graph
/// and do not have a state assigned to them yet, as additional entry points.
///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
......@@ -192,6 +215,8 @@ impl<'a> PointerInference<'a> {
}
}
/// Print the number of blocks that have a state associated to them.
/// Intended for debug purposes.
fn count_blocks_with_state(&self) {
let graph = self.computation.get_graph();
let mut stateful_blocks: i64 = 0;
......@@ -220,6 +245,8 @@ impl<'a> PointerInference<'a> {
}
}
/// Generate and execute the pointer inference analysis.
/// Returns a vector of all found CWE warnings and a vector of all log messages generated during analysis.
pub fn run(project: &Project, print_debug: bool) -> (Vec<CweWarning>, Vec<String>) {
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, log_receiver) = crossbeam_channel::unbounded();
......@@ -256,6 +283,7 @@ pub fn run(project: &Project, print_debug: bool) -> (Vec<CweWarning>, Vec<String
)
}
/// Collect CWE warnings from the receiver until the channel is closed. Then return them.
fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Vec<CweWarning> {
let mut collected_warnings = HashMap::new();
while let Ok(warning) = receiver.recv() {
......@@ -272,6 +300,7 @@ fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Ve
.collect()
}
/// Collect log messages from the receiver until the channel is closed. Then return them.
fn collect_logs(receiver: crossbeam_channel::Receiver<LogMessage>) -> Vec<String> {
let mut logs_with_address = HashMap::new();
let mut general_logs = Vec::new();
......
......@@ -2,198 +2,80 @@ use super::Data;
use crate::abstract_domain::*;
use crate::bil::Bitvector;
use crate::prelude::*;
use derive_more::Deref;
use serde::{Deserialize, Serialize};
use std::collections::BTreeSet;
use std::iter::FromIterator;
use std::ops::DerefMut;
use std::sync::Arc;
/// An abstract object is either a tracked or an untracked memory object.
/// In the untracked case we still track whether the object may contain pointers to other objects.
/// This way we do not necessarily need to invalidate all abstract objects
/// if a pointer contained in an untracked object is used for a memory write.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum AbstractObject {
Untracked(BTreeSet<AbstractIdentifier>),
Memory(AbstractObjectInfo),
/// A wrapper struct wrapping `AbstractObjectInfo` in an `Arc`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone, Deref)]
#[deref(forward)]
pub struct AbstractObject(Arc<AbstractObjectInfo>);
impl DerefMut for AbstractObject {
fn deref_mut(&mut self) -> &mut AbstractObjectInfo {
Arc::make_mut(&mut self.0)
}
}
impl AbstractObject {
/// Create a new abstract object with given object type and address bitsize.
pub fn new(type_: ObjectType, address_bitsize: BitSize) -> AbstractObject {
Self::Memory(AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: Some(ObjectState::Alive),
type_: Some(type_),
memory: MemRegion::new(address_bitsize),
})
}
pub fn get_value(&self, offset: Bitvector, bitsize: BitSize) -> Data {
if let Self::Memory(object_info) = self {
object_info.get_value(offset, bitsize)
} else {
Data::new_top(bitsize)
}
AbstractObject(Arc::new(AbstractObjectInfo::new(type_, address_bitsize)))
}
/// Short-circuits the `AbstractObjectInfo::merge` function if `self==other`.
pub fn merge(&self, other: &Self) -> Self {
match (self, other) {
(Self::Untracked(set1), Self::Untracked(set2)) => {
Self::Untracked(set1.union(set2).cloned().collect())
}
(Self::Untracked(untracked), Self::Memory(memory))
| (Self::Memory(memory), Self::Untracked(untracked)) => {
Self::Untracked(untracked.union(&memory.pointer_targets).cloned().collect())
}
(Self::Memory(left), Self::Memory(right)) => Self::Memory(left.merge(right)),
}
}
pub fn set_value(&mut self, value: Data, offset: BitvectorDomain) -> Result<(), Error> {
match self {
Self::Untracked(target_list) => {
if let Data::Pointer(ref pointer) = value {
target_list.extend(
pointer
.iter_targets()
.map(|(abstract_id, _offset)| abstract_id.clone()),
)
};
}
Self::Memory(memory_object) => {
memory_object.set_value(value, offset)?;
}
};
Ok(())
}
pub fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> {
match self {
Self::Untracked(targets) => targets.clone(),
Self::Memory(memory) => memory.get_all_possible_pointer_targets(),
}
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
match self {
Self::Untracked(id_set) => {
if id_set.get(old_id).is_some() {
id_set.remove(old_id);
id_set.insert(new_id.clone());
}
}
Self::Memory(mem_object) => {
mem_object.replace_abstract_id(old_id, new_id, offset_adjustment);
}
}
}
pub fn get_referenced_ids(&self) -> BTreeSet<AbstractIdentifier> {
match self {
Self::Untracked(ids) => ids.clone(),
Self::Memory(object_info) => object_info.pointer_targets.clone(),
}
}
pub fn set_state(&mut self, new_state: Option<ObjectState>) {
if let Self::Memory(object_info) = self {
object_info.set_state(new_state)
}
}
/// Remove the provided IDs from all possible target lists, including all pointers.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
match self {
Self::Untracked(targets) => {
let remaining_targets = targets.difference(ids_to_remove).cloned().collect();
*self = Self::Untracked(remaining_targets);
}
Self::Memory(mem) => {
mem.remove_ids(ids_to_remove);
}
}
}
#[cfg(test)]
pub fn get_state(&self) -> Option<ObjectState> {
match self {
Self::Untracked(_) => None,
Self::Memory(mem) => mem.state,
}
}
}
impl AbstractObject {
pub fn to_json_compact(&self) -> serde_json::Value {
match self {
Self::Untracked(_) => serde_json::Value::String("Untracked".into()),
Self::Memory(object_info) => {
let mut elements = Vec::new();
elements.push((
"is_unique".to_string(),
serde_json::Value::String(format!("{}", object_info.is_unique)),
));
elements.push((
"state".to_string(),
serde_json::Value::String(format!("{:?}", object_info.state)),
));
elements.push((
"type".to_string(),
serde_json::Value::String(format!("{:?}", object_info.type_)),
));
let memory = object_info
.memory
.iter()
.map(|(index, value)| (format!("{}", index), value.to_json_compact()));
elements.push((
"memory".to_string(),
serde_json::Value::Object(serde_json::Map::from_iter(memory)),
));
serde_json::Value::Object(serde_json::Map::from_iter(elements.into_iter()))
}
if self == other {
self.clone()
} else {
AbstractObject(Arc::new(self.0.merge(other)))
}
}
}
/// The abstract object info contains all information that we track for an abstract object.
///
/// Some noteworthy properties:
/// - The field *is_unique* indicates whether the object is the union of several memory objects
/// - The *state* indicates whether the object is still alive or not.
/// This can be used to detect "use after free" bugs.
/// - Many fields are wrapped in Option<_> to indicate whether the property is known or not.
/// - The field pointer_targets is a (coarse) upper approximation of all possible targets
/// for which pointers may exist inside the memory region.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct AbstractObjectInfo {
/// An upper approximation of all possible targets for which pointers may exist inside the memory region.
pointer_targets: BTreeSet<AbstractIdentifier>,
/// Tracks whether this may represent more than one actual memory object.
pub is_unique: bool,
pub state: Option<ObjectState>,
/// Is the object alive or already destroyed
state: Option<ObjectState>,
/// Is the object a stack frame or a heap object
type_: Option<ObjectType>,
/// The actual content of the memory object
memory: MemRegion<Data>,
}
impl AbstractObjectInfo {
fn get_value(&self, offset: Bitvector, bitsize: BitSize) -> Data {
// TODO: This function does not check whether a data read is "sound", e.g. that the offset is inside the object.
// Make sure that this is checked somewhere!
/// Create a new abstract object with known object type and address bitsize
pub fn new(type_: ObjectType, address_bitsize: BitSize) -> AbstractObjectInfo {
AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: Some(ObjectState::Alive),
type_: Some(type_),
memory: MemRegion::new(address_bitsize),
}
}
/// Read the value at the given offset of the given size (in bits, not bytes) inside the memory region.
pub fn get_value(&self, offset: Bitvector, bitsize: BitSize) -> Data {
assert_eq!(bitsize % 8, 0);
self.memory.get(offset, (bitsize / 8) as u64)
}
fn set_value(&mut self, value: Data, offset: BitvectorDomain) -> Result<(), Error> {
/// Write a value at the given offset to the memory region.
///
/// If the abstract object is not unique (i.e. may represent more than one actual object),
/// merge the old value at the given offset with the new value.
pub fn set_value(&mut self, value: Data, offset: &BitvectorDomain) -> Result<(), Error> {
if let Data::Pointer(ref pointer) = value {
self.pointer_targets.extend(
pointer
.iter_targets()
.map(|(abstract_id, _offset)| abstract_id.clone()),
)
self.pointer_targets.extend(pointer.ids().cloned());
};
if let BitvectorDomain::Value(ref concrete_offset) = offset {
if self.is_unique {
......@@ -211,16 +93,25 @@ impl AbstractObjectInfo {
Ok(())
}
fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> {
let mut targets = self.pointer_targets.clone();
for elem in self.memory.values() {
if let Data::Pointer(pointer) = elem {
for (id, _) in pointer.iter_targets() {
targets.insert(id.clone());
}
/// Merge `value` at position `offset` with the value currently saved at that position.
pub fn merge_value(&mut self, value: Data, offset: &BitvectorDomain) {
if let Data::Pointer(ref pointer) = value {
self.pointer_targets.extend(pointer.ids().cloned());
};
if let BitvectorDomain::Value(ref concrete_offset) = offset {
let merged_value = self
.memory
.get(concrete_offset.clone(), (value.bitsize() / 8) as u64)
.merge(&value);
self.memory.add(merged_value, concrete_offset.clone());
} else {
self.memory = MemRegion::new(self.memory.get_address_bitsize());
}
}
targets
/// Get all abstract IDs that the object may contain pointers to.
pub fn get_referenced_ids(&self) -> &BTreeSet<AbstractIdentifier> {
&self.pointer_targets
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offsets.
......@@ -241,6 +132,7 @@ impl AbstractObjectInfo {
}
}
/// If `self.is_unique==true`, set the state of the object. Else merge the new state with the old.
pub fn set_state(&mut self, new_state: Option<ObjectState>) {
if self.is_unique {
self.state = new_state;
......@@ -258,25 +150,68 @@ impl AbstractObjectInfo {
.cloned()
.collect();
for value in self.memory.values_mut() {
value.remove_ids(ids_to_remove); // TODO: This may leave *Top* values in the memory object. Remove them.
value.remove_ids(ids_to_remove);
}
self.memory.clear_top_values()
self.memory.clear_top_values(); // In case the previous operation left *Top* values in the memory struct.
}
}
impl HasTop for AbstractObjectInfo {
fn top(&self) -> Self {
AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
is_unique: false,
state: None,
type_: None,
memory: MemRegion::new(self.memory.get_address_bitsize()),
/// Get the state of the memory object.
pub fn get_state(&self) -> Option<ObjectState> {
self.state
}
/// Invalidates all memory and adds the `additional_targets` to the pointer targets.
/// Represents the effect of unknown write instructions to the object
/// which may include writing pointers to targets from the `additional_targets` set to the object.
pub fn assume_arbitrary_writes(&mut self, additional_targets: &BTreeSet<AbstractIdentifier>) {
self.memory = MemRegion::new(self.memory.get_address_bitsize());
self.pointer_targets
.extend(additional_targets.iter().cloned());
}
/// Mark the memory object as freed.
/// Returns an error if a possible double free is detected
/// or the memory object may not be a heap object.
pub fn mark_as_freed(&mut self) -> Result<(), Error> {
if self.type_ != Some(ObjectType::Heap) {
self.set_state(Some(ObjectState::Dangling));
return Err(anyhow!("Free operation on possibly non-heap memory object"));
}
match (self.is_unique, self.state) {
(true, Some(ObjectState::Alive)) => {
self.state = Some(ObjectState::Dangling);
Ok(())
}
(true, _) | (false, Some(ObjectState::Dangling)) => {
self.state = Some(ObjectState::Dangling);
Err(anyhow!("Object may already have been freed"))
}
(false, _) => {
self.state = None;
Ok(())
}
}
}
/// Mark the memory object as possibly (but not definitely) freed.
/// Returns an error if the object was definitely freed before
/// or if the object may not be a heap object.
pub fn mark_as_maybe_freed(&mut self) -> Result<(), Error> {
if self.type_ != Some(ObjectType::Heap) {
self.set_state(Some(ObjectState::Dangling));
return Err(anyhow!("Free operation on possibly non-heap memory object"));
}
if self.state != Some(ObjectState::Dangling) {
self.state = None;
Ok(())
} else {
Err(anyhow!("Object may already have been freed"))
}
}
}
impl AbstractDomain for AbstractObjectInfo {
/// Merge two abstract objects
fn merge(&self, other: &Self) -> Self {
AbstractObjectInfo {
pointer_targets: self
......@@ -297,6 +232,36 @@ impl AbstractDomain for AbstractObjectInfo {
}
}
impl AbstractObjectInfo {
/// Get a more compact json-representation of the abstract object.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
let mut elements = Vec::new();
elements.push((
"is_unique".to_string(),
serde_json::Value::String(format!("{}", self.is_unique)),
));
elements.push((
"state".to_string(),
serde_json::Value::String(format!("{:?}", self.state)),
));
elements.push((
"type".to_string(),
serde_json::Value::String(format!("{:?}", self.type_)),
));
let memory = self
.memory
.iter()
.map(|(index, value)| (format!("{}", index), value.to_json_compact()));
elements.push((
"memory".to_string(),
serde_json::Value::Object(serde_json::Map::from_iter(memory)),
));
serde_json::Value::Object(serde_json::Map::from_iter(elements.into_iter()))
}
}
/// Helper function for merging two `Option<T>` values (merging to `None` if they are not equal).
fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T> {
if left.as_ref()? == right.as_ref()? {
Some(left.as_ref().unwrap().clone())
......@@ -306,7 +271,6 @@ fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T>
}
/// An object is either a stack or a heap object.
/// TODO: add a type for tracking for global variables!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectType {
Stack,
......@@ -332,7 +296,7 @@ mod tests {
type_: Some(ObjectType::Heap),
memory: MemRegion::new(64),
};
AbstractObject::Memory(obj_info)
AbstractObject(Arc::new(obj_info))
}
fn new_data(number: i64) -> Data {
......@@ -343,26 +307,38 @@ mod tests {
BitvectorDomain::Value(Bitvector::from_i64(number))
}
fn new_id(tid: &str, reg_name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(tid),
AbstractLocation::Register(reg_name.into(), 64),
)
}
#[test]
fn abstract_object() {
let mut object = new_abstract_object();
let three = new_data(3);
let offset = bv(-15);
object.set_value(three, offset).unwrap();
object.set_value(three, &offset).unwrap();
assert_eq!(
object.get_value(Bitvector::from_i64(-16), 64),
Data::Top(64)
);
assert_eq!(object.get_value(Bitvector::from_i64(-15), 64), new_data(3));
object.set_value(new_data(4), bv(-12)).unwrap();
object.set_value(new_data(4), &bv(-12)).unwrap();
assert_eq!(
object.get_value(Bitvector::from_i64(-15), 64),
Data::Top(64)
);
object.merge_value(new_data(5), &bv(-12));
assert_eq!(
object.get_value(Bitvector::from_i64(-12), 64),
Data::Value(BitvectorDomain::new_top(64))
);
let mut other_object = new_abstract_object();
object.set_value(new_data(0), bv(0)).unwrap();
other_object.set_value(new_data(0), bv(0)).unwrap();
object.set_value(new_data(0), &bv(0)).unwrap();
other_object.set_value(new_data(0), &bv(0)).unwrap();
let merged_object = object.merge(&other_object);
assert_eq!(
merged_object.get_value(Bitvector::from_i64(-12), 64),
......@@ -373,4 +349,67 @@ mod tests {
new_data(0)
);
}
#[test]
fn replace_id() {
use std::collections::BTreeMap;
let mut object = new_abstract_object();
let mut target_map = BTreeMap::new();
target_map.insert(new_id("time_1", "RAX"), bv(20));
target_map.insert(new_id("time_234", "RAX"), bv(30));
target_map.insert(new_id("time_1", "RBX"), bv(40));
let pointer = PointerDomain::with_targets(target_map.clone());
object.set_value(pointer.into(), &bv(-15)).unwrap();
assert_eq!(object.get_referenced_ids().len(), 3);
object.replace_abstract_id(
&new_id("time_1", "RAX"),
&new_id("time_234", "RAX"),
&bv(10),
);
target_map.remove(&new_id("time_1", "RAX"));
let modified_pointer = PointerDomain::with_targets(target_map);
assert_eq!(
object.get_value(Bitvector::from_i64(-15), 64),
modified_pointer.into()
);
object.replace_abstract_id(
&new_id("time_1", "RBX"),
&new_id("time_234", "RBX"),
&bv(10),
);
let mut target_map = BTreeMap::new();
target_map.insert(new_id("time_234", "RAX"), bv(30));
target_map.insert(new_id("time_234", "RBX"), bv(50));
let modified_pointer = PointerDomain::with_targets(target_map);
assert_eq!(
object.get_value(Bitvector::from_i64(-15), 64),
modified_pointer.into()
);
}
#[test]
fn remove_ids() {
use std::collections::BTreeMap;
let mut object = new_abstract_object();
let mut target_map = BTreeMap::new();
target_map.insert(new_id("time_1", "RAX"), bv(20));
target_map.insert(new_id("time_234", "RAX"), bv(30));
target_map.insert(new_id("time_1", "RBX"), bv(40));
let pointer = PointerDomain::with_targets(target_map.clone());
object.set_value(pointer.into(), &bv(-15)).unwrap();
assert_eq!(object.get_referenced_ids().len(), 3);
let ids_to_remove = vec![new_id("time_1", "RAX"), new_id("time_23", "RBX")]
.into_iter()
.collect();
object.remove_ids(&ids_to_remove);
assert_eq!(
object.get_referenced_ids(),
&vec![new_id("time_234", "RAX"), new_id("time_1", "RBX")]
.into_iter()
.collect()
);
}
}
......@@ -5,17 +5,19 @@ use crate::bil::Bitvector;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
use std::ops::Deref;
use std::sync::Arc;
/// The list of all known abstract objects.
///
/// Each abstract object is unique in the sense that each pointer can only point to one abstract object.
/// If a pointer may point to two different abstract objects,
/// these two objects will be merged to one object.
/// Each abstract object is unique in the sense that each abstract identifier can only point to one abstract object.
/// However, an abstract object itself can be marked as non-unique
/// to indicate that it may represent more than one actual memory object.
/// Also, several abstract identifiers may point to the same abstract object.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct AbstractObjectList {
objects: Vec<Arc<AbstractObject>>,
/// The abstract objects
objects: Vec<AbstractObject>,
/// A map from an abstract identifier to the index of the object in the `self.objects` array
/// and the offset (as `BitvectorDomain`) inside the object that the identifier is pointing to.
ids: BTreeMap<AbstractIdentifier, (usize, BitvectorDomain)>,
}
......@@ -28,7 +30,7 @@ impl AbstractObjectList {
) -> AbstractObjectList {
let mut objects = Vec::new();
let stack_object = AbstractObject::new(ObjectType::Stack, address_bitsize);
objects.push(Arc::new(stack_object));
objects.push(stack_object);
let mut ids = BTreeMap::new();
ids.insert(
stack_id,
......@@ -39,18 +41,25 @@ impl AbstractObjectList {
/// Check the state of a memory object at a given address.
/// Returns True if at least one of the targets of the pointer is dangling.
/// May lead to false negatives, as objects with unknown object states are treated the same as alive objects.
pub fn is_dangling_pointer(&self, address: &Data) -> bool {
/// If `report_none_states` is `true`,
/// then objects with unknown states get reported if they are unique.
/// I.e. objects representing more than one actual object (e.g. an array of object) will not get reported,
/// even if their state is unknown and `report_none_states` is `true`.
pub fn is_dangling_pointer(&self, address: &Data, report_none_states: bool) -> bool {
match address {
Data::Value(_) | Data::Top(_) => (),
Data::Pointer(pointer) => {
for (id, _offset) in pointer.iter_targets() {
for id in pointer.ids() {
let (object_index, _offset_id) = self.ids.get(id).unwrap();
if let AbstractObject::Memory(ref object) = *self.objects[*object_index] {
if object.state == Some(ObjectState::Dangling) {
match (report_none_states, self.objects[*object_index].get_state()) {
(_, Some(ObjectState::Dangling)) => return true,
(true, None) => {
if self.objects[*object_index].is_unique {
return true;
}
}
_ => (),
}
}
}
}
......@@ -60,15 +69,14 @@ impl AbstractObjectList {
/// Get the value at a given address.
/// If the address is not unique, merge the value of all possible addresses.
///
/// TODO: document when this function should return errors
/// Returns an error if the address is a `Data::Value`, i.e. not a pointer.
pub fn get_value(&self, address: &Data, size: BitSize) -> Result<Data, Error> {
match address {
Data::Value(value) => Err(anyhow!("Load from non-pointer value:\n{:?}", value)),
Data::Top(_) => Ok(Data::new_top(size)),
Data::Pointer(pointer) => {
// TODO: Document the design decisions behind the implementation!
let mut merged_value: Option<Data> = None;
for (id, offset_pointer_domain) in pointer.iter_targets() {
for (id, offset_pointer_domain) in pointer.targets() {
let (abstract_object_index, offset_identifier) = self.ids.get(id).unwrap();
let offset = offset_pointer_domain.clone() + offset_identifier.clone();
if let BitvectorDomain::Value(concrete_offset) = offset {
......@@ -86,31 +94,28 @@ impl AbstractObjectList {
break;
}
}
merged_value.ok_or_else(|| anyhow!("Pointer without targets encountered."))
merged_value.ok_or_else(|| panic!("Pointer without targets encountered."))
}
}
}
/// Set the value at a given address.
///
/// Returns an error if the gitven address has no targets.
/// If the address has more than one target, all targets are merged to one untracked object.
// TODO: Implement write-merging to still tracked objects!
/// If the address has more than one target,
/// we merge-write the value to all targets.
pub fn set_value(
&mut self,
pointer: PointerDomain<BitvectorDomain>,
value: Data,
) -> Result<(), Error> {
let mut target_object_set: BTreeSet<usize> = BTreeSet::new();
for (id, _offset) in pointer.iter_targets() {
for id in pointer.ids() {
target_object_set.insert(self.ids.get(id).unwrap().0);
}
if target_object_set.is_empty() {
return Err(anyhow!("Pointer without targets encountered"));
}
assert!(!target_object_set.is_empty());
if target_object_set.len() == 1 {
let mut target_offset: Option<BitvectorDomain> = None;
for (id, pointer_offset) in pointer.iter_targets() {
for (id, pointer_offset) in pointer.targets() {
let adjusted_offset = pointer_offset.clone() + self.ids.get(id).unwrap().1.clone();
target_offset = match target_offset {
Some(offset) => Some(offset.merge(&adjusted_offset)),
......@@ -121,88 +126,25 @@ impl AbstractObjectList {
.objects
.get_mut(*target_object_set.iter().next().unwrap())
.unwrap();
Arc::make_mut(object).set_value(value, target_offset.unwrap())?; // TODO: Write unit test whether this is correctly written to the self.objects vector!
object.set_value(value, &target_offset.unwrap())?;
} else {
// There is more than one object that the pointer may write to.
// We merge all targets to one untracked object
// TODO: Implement merging to a still tracked object!
// Get all pointer targets the object may point to
let mut inner_targets: BTreeSet<AbstractIdentifier> = BTreeSet::new();
for object in target_object_set.iter() {
inner_targets.append(
&mut self
.objects
.get(*object)
.unwrap()
.get_all_possible_pointer_targets(),
);
// We merge-write to all possible targets
for (id, offset) in pointer.targets() {
let (object_index, object_offset) = self.ids.get(id).unwrap();
let adjusted_offset = offset.clone() + object_offset.clone();
self.objects[*object_index].merge_value(value.clone(), &adjusted_offset);
}
// Generate the new (untracked) object that all other objects are merged to
let new_object = AbstractObject::Untracked(inner_targets);
// generate the ne map from abstract identifier to index of corresponding memory object
let mut index_map = BTreeMap::new();
let mut new_object_vec: Vec<Arc<AbstractObject>> = Vec::new();
for old_index in 0..self.objects.len() {
if target_object_set.get(&old_index).is_none() {
index_map.insert(old_index, new_object_vec.len());
new_object_vec.push(self.objects.get(old_index).unwrap().clone());
}
}
new_object_vec.push(Arc::new(new_object));
let merged_object_index = new_object_vec.len() - 1;
for old_index in target_object_set {
index_map.insert(old_index, merged_object_index);
}
let mut new_id_map: BTreeMap<AbstractIdentifier, (usize, BitvectorDomain)> =
BTreeMap::new();
for (id, (old_index, offset)) in self.ids.iter() {
new_id_map.insert(id.clone(), (index_map[old_index], offset.clone()));
}
self.objects = new_object_vec;
self.ids = new_id_map;
// now we can do the actual write operation on the newly merged object
// the offset does not matter since the merged object is untracked anyway
Arc::make_mut(self.objects.get_mut(merged_object_index).unwrap())
.set_value(value, BitvectorDomain::new_top(pointer.bitsize()))?;
}
Ok(())
}
pub fn merge(&self, other: &Self) -> Self {
let mut merged_objects = self.objects.clone();
let mut merged_ids = self.ids.clone();
for (other_id, (other_index, other_offset)) in other.ids.iter() {
if let Some((index, offset)) = merged_ids.get(&other_id) {
let (index, offset) = (*index, offset.clone());
merged_ids.insert(other_id.clone(), (index, offset.merge(&other_offset)));
if index < self.objects.len() {
// The object already existed in self, so we have to merge it with the object in other
merged_objects[index] =
Arc::new(merged_objects[index].merge(&other.objects[*other_index]));
// TODO: This is still inefficient, since we may end up merging the same objects more than once (if several ids point to it)
}
} else {
merged_objects.push(other.objects.get(*other_index).unwrap().clone());
merged_ids.insert(
other_id.clone(),
(merged_objects.len() - 1, other_offset.clone()),
);
}
}
// merge the underlying abstract objects.
AbstractObjectList {
objects: merged_objects,
ids: merged_ids,
}
}
/// Replace one abstract identifier with another one. Adjust offsets of all pointers accordingly.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Assume the `old_id` points to offset 0 in the corresponding memory object and the `new_id` points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// The offset_adjustment gets *added* to the base offset in `self.memory.ids` (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
......@@ -210,19 +152,13 @@ impl AbstractObjectList {
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
let negative_offset = -offset_adjustment.clone();
for object in self.objects.iter_mut() {
Arc::make_mut(object).replace_abstract_id(
old_id,
new_id,
&(-offset_adjustment.clone()),
);
object.replace_abstract_id(old_id, new_id, &negative_offset);
}
if let Some((index, offset)) = self.ids.get(old_id) {
let index = *index;
// Note that we have to *subtract* the offset offset_adjustment to get the new offset,
// since the offset_adjustment gets added to all pointers.
// This way all pointers will still point to the same place in memory.
let new_offset = offset.clone() + offset_adjustment.clone();
let (index, offset) = (*index, offset.clone());
let new_offset = offset + offset_adjustment.clone();
self.ids.remove(old_id);
self.ids.insert(new_id.clone(), (index, new_offset));
}
......@@ -234,6 +170,9 @@ impl AbstractObjectList {
}
/// Add a new abstract object to the object list
///
/// If an object with the same ID already exists,
/// the object is marked as non-unique and merged with the newly created object.
pub fn add_abstract_object(
&mut self,
object_id: AbstractIdentifier,
......@@ -245,97 +184,69 @@ impl AbstractObjectList {
if let Some((index, offset)) = self.ids.get(&object_id) {
// If the identifier already exists, we have to assume that more than one object may be referred by this identifier.
let object = Arc::make_mut(&mut self.objects[*index]);
if let AbstractObject::Memory(object_info) = object {
object_info.is_unique = false;
}
let object = &mut self.objects[*index];
object.is_unique = false;
*object = object.merge(&new_object);
let index = *index;
let merged_offset = offset.merge(&initial_offset);
self.ids.insert(object_id, (index, merged_offset));
} else {
let index = self.objects.len();
self.objects.push(Arc::new(new_object));
self.objects.push(new_object);
self.ids.insert(object_id, (index, initial_offset));
}
}
/// return all ids that get referenced by the memory object pointed to by the given id
pub fn get_referenced_ids(&self, id: &AbstractIdentifier) -> BTreeSet<AbstractIdentifier> {
/// Return all IDs that get referenced by the memory object pointed to by the given ID.
pub fn get_referenced_ids(&self, id: &AbstractIdentifier) -> &BTreeSet<AbstractIdentifier> {
if let Some((index, _offset)) = self.ids.get(id) {
self.objects[*index].get_referenced_ids()
} else {
BTreeSet::new()
panic!("Abstract ID not associated to an object")
}
}
/// Remove all abstract identifier not contained in the provided set of identifier.
/// Then remove all objects not longer referenced by any identifier.
/// For abstract IDs not contained in the provided set of IDs
/// remove the mapping from the ID to the corresponding abstract object.
/// Then remove all objects not longer referenced by any ID.
///
/// This function does not remove any pointer targets in the contained abstract objects.
pub fn remove_unused_ids(&mut self, ids_to_keep: &BTreeSet<AbstractIdentifier>) {
let all_ids: BTreeSet<AbstractIdentifier> = self.ids.keys().cloned().collect();
let ids_to_remove = all_ids.difference(ids_to_keep);
for id in ids_to_remove {
self.ids.remove(id);
}
let referenced_objects: BTreeSet<usize> =
self.ids.values().map(|(index, _offset)| *index).collect();
if referenced_objects.len() != self.objects.len() {
// We have to remove some objects and map the object indices to new values
let mut new_object_list = Vec::new();
let mut index_map = BTreeMap::new();
for i in 0..self.objects.len() {
if referenced_objects.get(&i).is_some() {
index_map.insert(i, new_object_list.len());
new_object_list.push(self.objects[i].clone());
}
}
self.objects = new_object_list;
// map the object indices to their new values
for (index, _offset) in self.ids.values_mut() {
*index = *index_map.get(index).unwrap();
}
}
self.remove_unreferenced_objects();
}
/// Get all object ids
/// Get all object IDs.
pub fn get_all_object_ids(&self) -> BTreeSet<AbstractIdentifier> {
self.ids.keys().cloned().collect()
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
///
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
/// Returns either a non-empty list of detected errors (like possible double frees) or `OK(())` if no errors were found.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<AbstractIdentifier>> {
let ids: BTreeSet<AbstractIdentifier> = object_pointer.ids().cloned().collect();
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
let ids: Vec<AbstractIdentifier> = object_pointer.ids().cloned().collect();
let mut possible_double_free_ids = Vec::new();
if ids.len() > 1 {
for id in ids {
let object = &mut self.objects[self.ids[&id].0];
if let AbstractObject::Memory(tracked_mem) = Arc::deref(object) {
if (tracked_mem.state != Some(ObjectState::Alive) && tracked_mem.is_unique)
|| tracked_mem.state == Some(ObjectState::Dangling)
{
// Possible double free detected
// TODO: Check rate of false positives.
// If too high, only mark those with explicit dangling state.
possible_double_free_ids.push(id.clone());
}
if let Err(error) = self.objects[self.ids[&id].0].mark_as_maybe_freed() {
possible_double_free_ids.push((id.clone(), error));
}
Arc::make_mut(object).set_state(None);
}
} else if let Some(id) = ids.iter().next() {
let object = &mut self.objects[self.ids[&id].0];
if let AbstractObject::Memory(tracked_mem) = Arc::deref(object) {
if tracked_mem.state != Some(ObjectState::Alive) {
// Possible double free detected
// TODO: Check rate of false positives.
// If too high, only mark those with explicit dangling state.
possible_double_free_ids.push(id.clone());
if let Err(error) = self.objects[self.ids[&id].0].mark_as_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
Arc::make_mut(object).set_state(Some(ObjectState::Dangling));
} else {
panic!("Pointer without targets encountered")
}
if possible_double_free_ids.is_empty() {
Ok(())
......@@ -344,25 +255,21 @@ impl AbstractObjectList {
}
}
/// Mark the memory object behind an abstract identifier as untracked.
/// Also add new possible reference targets to the object.
/// Assume that arbitrary writes happened to a memory object,
/// including adding pointers to targets contained in `new_possible_reference_targets` to it.
///
/// This is used as a very coarse approximation for function calls whose effect is unknown.
/// Since a function may spawn a new thread constantly writing to this memory object,
/// the content of the memory object may not become known later on.
/// The new reference targets are added because we also do not know whether the function adds pointers to the memory object.
pub fn mark_mem_object_as_untracked(
/// This is used as a coarse approximation for function calls whose effect is unknown.
/// Note that this may still underestimate the effect of a function call:
/// We do not assume that the state of the object changes (i.e. no memory freed), which may not be true.
/// We assume that pointers to the object are *not* given to other threads or the operating system,
/// which could result in arbitrary writes to the object even after the function call returned.
pub fn assume_arbitrary_writes_to_object(
&mut self,
object_id: &AbstractIdentifier,
new_possible_reference_targets: &BTreeSet<AbstractIdentifier>,
) {
let object_index = self.ids[object_id].0;
let reference_targets = self.objects[object_index]
.get_all_possible_pointer_targets()
.union(new_possible_reference_targets)
.cloned()
.collect();
self.objects[object_index] = Arc::new(AbstractObject::Untracked(reference_targets));
self.objects[object_index].assume_arbitrary_writes(new_possible_reference_targets);
}
/// Get the number of objects that are currently tracked.
......@@ -398,13 +305,10 @@ impl AbstractObjectList {
}
/// Remove the provided IDs as targets from all pointers in all objects.
/// Also forget whether the provided IDs point to objects in the object list.
///
/// This may leave objects without known IDs pointing to them.
/// This function does *not* trim these objects from the object list.
/// Also forget whether the provided IDs point to objects in the object list
/// and remove objects, that no longer have any ID pointing at them.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
for object in self.objects.iter_mut() {
let object = Arc::make_mut(object);
object.remove_ids(ids_to_remove);
}
self.ids = self
......@@ -418,10 +322,88 @@ impl AbstractObjectList {
}
})
.collect();
self.remove_unreferenced_objects();
}
/// Remove those objects from the object list that have no abstract ID pointing at them.
fn remove_unreferenced_objects(&mut self) {
let referenced_objects: BTreeSet<usize> =
self.ids.values().map(|(index, _offset)| *index).collect();
if referenced_objects.len() != self.objects.len() {
// We have to remove some objects and map the object indices to new values
let mut new_object_list = Vec::new();
let mut index_map = BTreeMap::new();
for i in 0..self.objects.len() {
if referenced_objects.get(&i).is_some() {
index_map.insert(i, new_object_list.len());
new_object_list.push(self.objects[i].clone());
}
}
self.objects = new_object_list;
// map the object indices to their new values
for (index, _offset) in self.ids.values_mut() {
*index = *index_map.get(index).unwrap();
}
}
}
}
impl AbstractDomain for AbstractObjectList {
/// Merge two abstract object lists.
///
/// Right now this function is only sound if for each abstract object only one ID pointing to it exists.
/// Violations of this will be detected and result in panics.
/// Further investigation into the problem is needed
/// to decide, how to correctly represent and handle cases,
/// where more than one ID should point to the same object.
fn merge(&self, other: &Self) -> Self {
let mut merged_objects = self.objects.clone();
let mut merged_ids = self.ids.clone();
for object_index in 0..other.objects.len() {
if other
.ids
.values()
.filter(|(index, _offset)| *index == object_index)
.count()
> 1
{
unimplemented!("Object list with more than one ID pointing to the same object encountered. This is not yet supported.")
}
}
for (other_id, (other_index, other_offset)) in other.ids.iter() {
if let Some((index, offset)) = merged_ids.get(&other_id) {
let (index, offset) = (*index, offset.clone());
merged_ids.insert(other_id.clone(), (index, offset.merge(&other_offset)));
if index < self.objects.len() {
// The object already existed in self, so we have to merge it with the object in other
merged_objects[index] =
merged_objects[index].merge(&other.objects[*other_index]);
}
} else {
merged_objects.push(other.objects.get(*other_index).unwrap().clone());
merged_ids.insert(
other_id.clone(),
(merged_objects.len() - 1, other_offset.clone()),
);
}
}
AbstractObjectList {
objects: merged_objects,
ids: merged_ids,
}
}
/// Always returns `false`, since abstract object lists have no *Top* element.
fn is_top(&self) -> bool {
false
}
}
impl AbstractObjectList {
/// Get a more compact json-representation of the abstract object list.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut object_list = Vec::new();
......@@ -463,8 +445,11 @@ mod tests {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: String) -> AbstractIdentifier {
AbstractIdentifier::new(Tid::new("time0"), AbstractLocation::Register(name, 64))
fn new_id(name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("time0"),
AbstractLocation::Register(name.into(), 64),
)
}
#[test]
......@@ -535,10 +520,15 @@ mod tests {
merged
.get_value(&Data::Pointer(pointer.clone()), 64)
.unwrap(),
Data::new_top(64)
Data::Value(BitvectorDomain::new_top(64))
);
assert_eq!(
merged
.get_value(&Data::Pointer(heap_pointer.clone()), 64)
.unwrap(),
Data::Value(bv(3))
);
// assert_eq!(merged.get_value(&Data::Pointer(heap_pointer.clone()), 64).unwrap(), Data::Value(bv(3)));
assert_eq!(merged.objects.len(), 1); // This will fail in the future when the set_value function does no automatic merging to untracked objects anymore.
assert_eq!(merged.objects.len(), 2);
other_obj_list
.set_value(pointer.clone(), Data::Pointer(heap_pointer.clone()))
......@@ -594,4 +584,17 @@ mod tests {
Some(crate::analysis::pointer_inference::object::ObjectState::Dangling)
);
}
#[test]
fn append_unknown_objects_test() {
let mut obj_list = AbstractObjectList::from_stack_id(new_id("stack"), 64);
let mut other_obj_list = AbstractObjectList::from_stack_id(new_id("stack"), 64);
other_obj_list.add_abstract_object(new_id("heap_obj"), bv(0).into(), ObjectType::Heap, 64);
obj_list.append_unknown_objects(&other_obj_list);
assert_eq!(obj_list.objects.len(), 2);
assert!(obj_list.ids.get(&new_id("stack")).is_some());
assert!(obj_list.ids.get(&new_id("heap_obj")).is_some());
}
}
use super::object_list::AbstractObjectList;
use super::Data;
use crate::abstract_domain::*;
use crate::bil::*;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
use std::collections::{BTreeMap, BTreeSet};
/// This struct contains all information known about the state at a specific point of time.
///
/// Notes:
/// - The *stack_id* is the identifier of the current stack frame.
/// Only reads and writes with offset less than 0 are permitted for it
/// - The *caller_stack_ids* contain all known identifier of caller stack frames.
/// If a read to an offset >= 0 corresponding to the current stack frame happens, it is considered
/// a merge read to all caller stack frames.
/// A write to an offset >= 0 corresponding to the current stack frame writes to all caller stack frames.
/// - The caller_stack_ids are given by the stack pointer at time of the call.
/// This way we can distinguish caller stack frames even if one function calls another several times.
/// - The ids_known_to_caller contains all ids directly known to some caller.
/// Objects referenced by these ids cannot be removed from the state, as some caller may have a reference to them.
/// This is not recursive, i.e. ids only known to the caller of the caller are not included.
/// If a caller does not pass a reference to a memory object to the callee (directly or indirectly),
/// it will not be included in ids_known_to_caller.
/// This way the caller can check on return, which memory objects could not have been accessed by the callee.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
register: BTreeMap<Variable, Data>,
pub memory: AbstractObjectList,
pub stack_id: AbstractIdentifier,
pub caller_stack_ids: BTreeSet<AbstractIdentifier>,
pub ids_known_to_caller: BTreeSet<AbstractIdentifier>,
}
impl State {
/// Create a new state that contains only one memory object corresponding to the stack.
/// The stack offset will be set to zero.
pub fn new(stack_register: &Variable, function_tid: Tid) -> State {
let stack_id = AbstractIdentifier::new(
function_tid,
AbstractLocation::from_var(stack_register).unwrap(),
);
let mut register: BTreeMap<Variable, Data> = BTreeMap::new();
register.insert(
stack_register.clone(),
PointerDomain::new(
stack_id.clone(),
Bitvector::zero((stack_register.bitsize().unwrap() as usize).into()).into(),
)
.into(),
);
State {
register,
memory: AbstractObjectList::from_stack_id(
stack_id.clone(),
stack_register.bitsize().unwrap(),
),
stack_id,
caller_stack_ids: BTreeSet::new(),
ids_known_to_caller: BTreeSet::new(),
}
}
/// Get the value of a register or Top() if no value is known.
///
/// Returns an error if the variable is not a register.
pub fn get_register(&self, variable: &Variable) -> Result<Data, Error> {
if let Some(data) = self.register.get(variable) {
Ok(data.clone())
} else {
Ok(Data::new_top(variable.bitsize()?))
}
}
/// Get the value of a register by its name.
///
/// Returns None if no value is set for the register.
pub fn get_register_by_name(&self, reg_name: &str) -> Option<Data> {
self.register.iter().find_map(|(key, value)| {
if key.name == reg_name {
Some(value.clone())
} else {
None
}
})
}
/// Set the value of a register.
///
/// Returns an error if the variable is not a register.
pub fn set_register(&mut self, variable: &Variable, value: Data) -> Result<(), Error> {
if let variable::Type::Immediate(_bitsize) = variable.type_ {
if !value.is_top() {
self.register.insert(variable.clone(), value);
} else {
self.register.remove(variable);
}
Ok(())
} else {
Err(anyhow!("Variable is not a register type"))
}
}
/// Evaluate expression on the given state and write the result to the target register.
pub fn handle_register_assign(
&mut self,
target: &Variable,
expression: &Expression,
) -> Result<(), Error> {
if let Expression::Var(variable) = expression {
if target == variable {
// The assign does nothing. Occurs as "do nothing"-path in conditional stores.
// Needs special handling, since it is the only case where the target is allowed
// to denote memory instead of a register.
return Ok(());
}
}
match self.eval(expression) {
Ok(new_value) => {
self.set_register(target, new_value)?;
Ok(())
}
Err(err) => {
self.set_register(target, Data::new_top(target.bitsize()?))?;
Err(err)
}
}
}
/// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names.
pub fn clear_non_callee_saved_register(&mut self, callee_saved_register_names: &[String]) {
let register = self
.register
.iter()
.filter_map(|(register, value)| {
if callee_saved_register_names
.iter()
.any(|reg_name| **reg_name == register.name)
{
Some((register.clone(), value.clone()))
} else {
None
}
})
.collect();
self.register = register;
}
/// evaluate the value of an expression in the current state
pub fn eval(&self, expression: &Expression) -> Result<Data, Error> {
use Expression::*;
match expression {
Var(variable) => self.get_register(&variable),
Const(bitvector) => Ok(bitvector.clone().into()),
// TODO: implement handling of endianness for loads and writes!
Load {
memory: _,
address,
endian: _,
size,
} => Ok(self
.memory
.get_value(&self.adjust_pointer_for_read(&self.eval(address)?), *size)?),
Store { .. } => {
// This does not return an error, but panics outright.
// If this would return an error, it would hide a side effect, which is not allowed to happen.
panic!("Store expression cannot be evaluated!")
}
BinOp { op, lhs, rhs } => {
if *op == crate::bil::BinOpType::XOR && lhs == rhs {
// TODO: implement bitsize() for expressions to remove the state.eval(lhs) hack
return Ok(Data::Value(BitvectorDomain::Value(Bitvector::zero(
apint::BitWidth::new(self.eval(lhs)?.bitsize() as usize)?,
))));
}
let (left, right) = (self.eval(lhs)?, self.eval(rhs)?);
Ok(left.bin_op(*op, &right))
}
UnOp { op, arg } => Ok(self.eval(arg)?.un_op(*op)),
Cast { kind, width, arg } => Ok(self.eval(arg)?.cast(*kind, *width)),
Let {
var: _,
bound_exp: _,
body_exp: _,
} => Err(anyhow!("Let binding expression handling not implemented")),
Unknown { description, type_ } => {
if let crate::bil::variable::Type::Immediate(bitsize) = type_ {
Ok(Data::new_top(*bitsize))
} else {
Err(anyhow!("Unknown Memory operation: {}", description))
}
}
IfThenElse {
condition: _,
true_exp,
false_exp,
} => Ok(self.eval(true_exp)?.merge(&self.eval(false_exp)?)),
Extract {
low_bit,
high_bit,
arg,
} => Ok(self.eval(arg)?.extract(*low_bit, *high_bit)),
Concat { left, right } => Ok(self.eval(left)?.concat(&self.eval(right)?)),
}
}
/// Check if an expression contains a use-after-free
pub fn contains_access_of_dangling_memory(&self, expression: &Expression) -> bool {
use Expression::*;
match expression {
Var(_) | Const(_) | Unknown { .. } => false,
Load {
address: address_exp,
..
} => {
if let Ok(pointer) = self.eval(address_exp) {
self.memory.is_dangling_pointer(&pointer)
|| self.contains_access_of_dangling_memory(address_exp)
} else {
false
}
}
Store {
memory: _,
address: address_exp,
value: value_exp,
..
} => {
let address_check = if let Ok(pointer) = self.eval(address_exp) {
self.memory.is_dangling_pointer(&pointer)
} else {
false
};
address_check
|| self.contains_access_of_dangling_memory(address_exp)
|| self.contains_access_of_dangling_memory(value_exp)
}
BinOp { op: _, lhs, rhs } => {
self.contains_access_of_dangling_memory(lhs)
|| self.contains_access_of_dangling_memory(rhs)
}
UnOp { op: _, arg } => self.contains_access_of_dangling_memory(arg),
Cast {
kind: _,
width: _,
arg,
} => self.contains_access_of_dangling_memory(arg),
Let {
var: _,
bound_exp,
body_exp,
} => {
self.contains_access_of_dangling_memory(bound_exp)
|| self.contains_access_of_dangling_memory(body_exp)
}
IfThenElse {
condition,
true_exp,
false_exp,
} => {
self.contains_access_of_dangling_memory(condition)
|| self.contains_access_of_dangling_memory(true_exp)
|| self.contains_access_of_dangling_memory(false_exp)
}
Extract {
low_bit: _,
high_bit: _,
arg,
} => self.contains_access_of_dangling_memory(arg),
Concat { left, right } => {
self.contains_access_of_dangling_memory(left)
|| self.contains_access_of_dangling_memory(right)
}
}
}
pub fn store_value(&mut self, address: &Data, value: &Data) -> Result<(), Error> {
if let Data::Pointer(pointer) = self.adjust_pointer_for_read(address) {
// TODO: This is a very inexact shortcut, as this write will unnecessarily merge caller memory regions.
// A more precise solution would write to every caller memory region separately,
// but would also need to check first whether the target memory region is unique or not.
self.memory.set_value(pointer, value.clone())?;
Ok(())
} else {
// TODO: Implement recognition of stores to global memory.
// Needs implementation of reads from global data first.
Err(anyhow!("Memory write to non-pointer data"))
}
}
/// Write a value to the address one gets when evaluating the address expression.
pub fn write_to_address(&mut self, address: &Expression, value: &Data) -> Result<(), Error> {
// TODO: Depending on the separation logic, some memory may need to be invalidated in the error case.
match self.eval(address) {
Ok(address_data) => self.store_value(&address_data, value),
Err(err) => Err(err),
}
}
/// Evaluate the given store expression on the given state and return the resulting state.
///
/// The function panics if given anything else than a store expression.
pub fn handle_store_exp(&mut self, store_exp: &Expression) -> Result<(), Error> {
if let Expression::Store {
memory: _,
address,
value,
endian: _,
size,
} = store_exp
{
let data = self.eval(value).unwrap_or_else(|_| Data::new_top(*size));
assert_eq!(data.bitsize(), *size);
// TODO: At the moment, both memory and endianness are ignored. Change that!
self.write_to_address(address, &data)
} else {
panic!("Expected store expression")
}
}
/// Mark those parameter values of an extern function call, that are passed on the stack,
/// as unknown data (since the function may modify them).
pub fn clear_stack_parameter(&mut self, extern_call: &ExternSymbol) -> Result<(), Error> {
// TODO: This needs a unit test to check whether stack parameters are cleared as expected!
let mut result_log = Ok(());
for arg in &extern_call.arguments {
match &arg.location {
Expression::Var(_) => {}
location_expression => {
let arg_size = arg
.var
.bitsize()
.expect("Encountered argument with unknown size");
let data_top = Data::new_top(arg_size);
if let Err(err) = self.write_to_address(location_expression, &data_top) {
result_log = Err(err);
}
}
}
}
// We only return the last error encountered.
result_log
}
/// merge two states
pub fn merge(&self, other: &Self) -> Self {
assert_eq!(self.stack_id, other.stack_id);
let mut merged_register = BTreeMap::new();
for (register, other_value) in other.register.iter() {
if let Some(value) = self.register.get(register) {
let merged_value = value.merge(other_value);
if !merged_value.is_top() {
// We only have to keep non-top elements.
merged_register.insert(register.clone(), merged_value);
}
}
}
let merged_memory_objects = self.memory.merge(&other.memory);
State {
register: merged_register,
memory: merged_memory_objects,
stack_id: self.stack_id.clone(),
caller_stack_ids: self
.caller_stack_ids
.union(&other.caller_stack_ids)
.cloned()
.collect(),
ids_known_to_caller: self
.ids_known_to_caller
.union(&other.ids_known_to_caller)
.cloned()
.collect(),
}
}
/// If the pointer contains a reference to the stack with offset >= 0, replace it with a pointer
/// pointing to all possible caller ids.
fn adjust_pointer_for_read(&self, address: &Data) -> Data {
// TODO: There is a rare special case that is not handled correctly
// and might need a change in the way caller_ids get tracked to fix:
// If no caller_id is present, one can read (and write) to addresses on the stack with positive offset
// But if such a state gets merged with a state with caller_ids,
// then these values at positive offsets get overshadowed by the new callers,
// but they get not properly merged with the values from the other callers!
if let Data::Pointer(pointer) = address {
let mut new_targets = BTreeMap::new();
for (id, offset) in pointer.iter_targets() {
if *id == self.stack_id {
match offset {
BitvectorDomain::Value(offset_val) => {
if offset_val.try_to_i64().unwrap() >= 0
&& !self.caller_stack_ids.is_empty()
{
for caller_id in self.caller_stack_ids.iter() {
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that the id of the current stack frame was *not* added.
} else {
new_targets.insert(id.clone(), offset.clone());
}
}
BitvectorDomain::Top(_bitsize) => {
for caller_id in self.caller_stack_ids.iter() {
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that we also add the id of the current stack frame
new_targets.insert(id.clone(), offset.clone());
}
}
} else {
new_targets.insert(id.clone(), offset.clone());
}
}
Data::Pointer(PointerDomain::with_targets(new_targets))
} else {
address.clone()
}
}
/// Replace all occurences of old_id with new_id and adjust offsets accordingly.
/// This is needed to replace stack/caller ids on call and return instructions.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
// TODO: This function does not adjust stack frame/caller stack frame relations!
// Refactor so that the corresponding logic is contained in State.
// Else this function can be used to generate invalid state on improper use!
for register_data in self.register.values_mut() {
register_data.replace_abstract_id(old_id, new_id, &(-offset_adjustment.clone()));
}
self.memory
.replace_abstract_id(old_id, new_id, offset_adjustment);
if &self.stack_id == old_id {
self.stack_id = new_id.clone();
}
if self.caller_stack_ids.get(old_id).is_some() {
self.caller_stack_ids.remove(old_id);
self.caller_stack_ids.insert(new_id.clone());
}
if self.ids_known_to_caller.get(old_id).is_some() {
self.ids_known_to_caller.remove(old_id);
self.ids_known_to_caller.insert(new_id.clone());
}
}
pub fn remove_unreferenced_objects(&mut self) {
// get all referenced ids
let mut referenced_ids = BTreeSet::new();
for (_reg_name, data) in self.register.iter() {
referenced_ids.append(&mut data.referenced_ids());
}
referenced_ids.insert(self.stack_id.clone());
referenced_ids.append(&mut self.caller_stack_ids.clone());
referenced_ids.append(&mut self.ids_known_to_caller.clone());
referenced_ids = self.add_recursively_referenced_ids_to_id_set(referenced_ids);
// remove unreferenced ids
self.memory.remove_unused_ids(&referenced_ids);
}
pub fn add_recursively_referenced_ids_to_id_set(
&self,
mut ids: BTreeSet<AbstractIdentifier>,
) -> BTreeSet<AbstractIdentifier> {
let mut unsearched_ids = ids.clone();
while let Some(id) = unsearched_ids.iter().next() {
let id = id.clone();
unsearched_ids.remove(&id);
let memory_ids = self.memory.get_referenced_ids(&id);
for mem_id in memory_ids {
if ids.get(&mem_id).is_none() {
ids.insert(mem_id.clone());
unsearched_ids.insert(mem_id);
}
}
}
ids
}
/// Merge the callee stack with the caller stack.
///
/// This deletes the pointer from the callee_id to the corresponding memory object
/// and updates all other references pointing to the callee_id to point to the caller_id.
/// The offset adjustment is handled as in `replace_abstract_id`.
///
/// Note that right now the content of the callee memory object is not merged into the caller memory object.
/// In general this is the correct behaviour as the content below the stack pointer should be considered uninitialized memory after returning to the caller.
/// TODO: Check whether compilers may deviate from this convention when optimizing aggressively.
/// TODO: Also merge the memory objects!
// TODO: write unit tests
pub fn merge_callee_stack_to_caller_stack(
&mut self,
callee_id: &AbstractIdentifier,
caller_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
self.memory.remove_object_pointer(callee_id);
self.replace_abstract_id(callee_id, caller_id, offset_adjustment);
// TODO: Add a check that makes sure no other ids point to the now obsolete callee stack object!
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
/// If this may cause double frees (i.e. the object in question may have been freed already),
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<AbstractIdentifier>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all virtual register from the state.
/// This should only be done in cases where it is known that no virtual registers can be alive.
/// Example: At the start of a basic block no virtual registers should be alive.
pub fn remove_virtual_register(&mut self) {
self.register = self
.register
.clone()
.into_iter()
.filter(|(register, _value)| !register.is_temp)
.collect();
}
/// Recursively remove all caller_stack_ids not corresponding to the given caller.
pub fn remove_other_caller_stack_ids(&mut self, caller_id: &AbstractIdentifier) {
let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove); // TODO: This may leave *Top* elements in the register_value map. Should I remove them?
}
self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new();
self.caller_stack_ids.insert(caller_id.clone());
self.ids_known_to_caller = self
.ids_known_to_caller
.difference(&ids_to_remove)
.cloned()
.collect();
}
/// Add those objects from the caller_state to self, that are not known to self.
///
/// Since self does not know these objects, we assume that the current function could not have accessed
/// them in any way during execution.
/// This means they are unchanged from the moment of the call until the return from the call,
/// thus we can simply copy their object-state from the moment of the call.
pub fn readd_caller_objects(&mut self, caller_state: &State) {
self.memory.append_unknown_objects(&caller_state.memory);
}
}
impl State {
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut state_map = Map::new();
let register = self
.register
.iter()
.map(|(var, data)| (var.name.clone(), data.to_json_compact()))
.collect();
let register = Value::Object(register);
state_map.insert("register".into(), register);
state_map.insert("memory".into(), self.memory.to_json_compact());
state_map.insert(
"stack_id".into(),
Value::String(format!("{}", self.stack_id)),
);
state_map.insert(
"caller_stack_ids".into(),
Value::Array(
self.caller_stack_ids
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
state_map.insert(
"ids_known_to_caller".into(),
Value::Array(
self.ids_known_to_caller
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
Value::Object(state_map)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: String) -> AbstractIdentifier {
AbstractIdentifier::new(Tid::new("time0"), AbstractLocation::Register(name, 64))
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: crate::bil::variable::Type::Immediate(64),
is_temp: false,
}
}
fn reg_add(name: &str, value: i64) -> Expression {
Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
}
}
fn reg_sub(name: &str, value: i64) -> Expression {
Expression::BinOp {
op: BinOpType::MINUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
}
}
fn store_exp(address: Expression, value: Expression) -> Expression {
let mem_var = Variable {
name: "mem".into(),
type_: crate::bil::variable::Type::Memory {
addr_size: 64,
elem_size: 64,
},
is_temp: false,
};
Expression::Store {
memory: Box::new(Expression::Var(mem_var)),
address: Box::new(address),
value: Box::new(value),
endian: Endianness::LittleEndian,
size: 64,
}
}
fn load_exp(address: Expression) -> Expression {
let mem_var = Variable {
name: "mem".into(),
type_: crate::bil::variable::Type::Memory {
addr_size: 64,
elem_size: 64,
},
is_temp: false,
};
Expression::Load {
memory: Box::new(Expression::Var(mem_var)),
address: Box::new(address),
endian: Endianness::LittleEndian,
size: 64,
}
}
#[test]
fn state() {
use crate::analysis::pointer_inference::object::*;
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("RSP".into());
let stack_addr = Data::Pointer(PointerDomain::new(stack_id.clone(), bv(8)));
state
.store_value(&stack_addr, &Data::Value(bv(42)))
.unwrap();
state.register.insert(register("RSP"), stack_addr.clone());
let load_expr = Load {
memory: Box::new(Var(register("RSP"))), // This is wrong, but the memory var is not checked at the moment (since we have only the one for RAM)
address: Box::new(Var(register("RSP"))),
endian: Endianness::LittleEndian,
size: 64 as BitSize,
};
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(42)));
let mut other_state = State::new(&register("RSP"), Tid::new("time0"));
state.register.insert(register("RAX"), Data::Value(bv(42)));
other_state
.register
.insert(register("RSP"), stack_addr.clone());
other_state
.register
.insert(register("RAX"), Data::Value(bv(42)));
other_state
.register
.insert(register("RBX"), Data::Value(bv(35)));
let merged_state = state.merge(&other_state);
assert_eq!(merged_state.register[&register("RAX")], Data::Value(bv(42)));
assert_eq!(merged_state.register.get(&register("RBX")), None);
assert_eq!(merged_state.eval(&load_expr).unwrap(), Data::new_top(64));
// Test pointer adjustment on reads
state
.memory
.add_abstract_object(new_id("caller".into()), bv(0), ObjectType::Stack, 64);
state.caller_stack_ids.insert(new_id("caller".into()));
state
.store_value(&stack_addr, &Data::Value(bv(15)))
.unwrap();
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("caller".into()), bv(8))),
64
)
.unwrap(),
Data::Value(bv(15))
);
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(15)));
// Test replace_abstract_id
let pointer = Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16)));
state.register.insert(register("RSP"), pointer.clone());
state.store_value(&pointer, &Data::Value(bv(7))).unwrap();
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(7)));
state.replace_abstract_id(&stack_id, &new_id("callee".into()), &bv(-8));
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(7)));
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("callee".into()), bv(-8))),
64
)
.unwrap(),
Data::Value(bv(7))
);
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("callee".into()), bv(-16))),
64
)
.unwrap(),
Data::new_top(64)
);
state
.memory
.add_abstract_object(new_id("heap_obj".into()), bv(0), ObjectType::Heap, 64);
assert_eq!(state.memory.get_num_objects(), 3);
state.remove_unreferenced_objects();
assert_eq!(state.memory.get_num_objects(), 2);
}
#[test]
fn handle_store() {
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("RSP".into());
assert_eq!(
state.eval(&Var(register("RSP"))).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0)))
);
state
.handle_register_assign(&register("RSP"), &reg_sub("RSP", 32))
.unwrap();
assert_eq!(
state.eval(&Var(register("RSP"))).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-32)))
);
state
.handle_register_assign(&register("RSP"), &reg_add("RSP", -8))
.unwrap();
assert_eq!(
state.eval(&Var(register("RSP"))).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-40)))
);
state
.handle_store_exp(&store_exp(reg_add("RSP", 8), Const(Bitvector::from_i64(1))))
.unwrap();
state
.handle_store_exp(&store_exp(reg_sub("RSP", 8), Const(Bitvector::from_i64(2))))
.unwrap();
state
.handle_store_exp(&store_exp(
reg_add("RSP", -16),
Const(Bitvector::from_i64(3)),
))
.unwrap();
state
.handle_register_assign(&register("RSP"), &reg_sub("RSP", 4))
.unwrap();
assert_eq!(
state.eval(&load_exp(reg_add("RSP", 12))).unwrap(),
bv(1).into()
);
assert_eq!(
state.eval(&load_exp(reg_sub("RSP", 4))).unwrap(),
bv(2).into()
);
assert_eq!(
state.eval(&load_exp(reg_add("RSP", -12))).unwrap(),
bv(3).into()
);
}
}
use super::*;
impl State {
/// Get the value of a register or Top() if no value is known.
///
/// Returns an error if the variable is not a register.
pub fn get_register(&self, variable: &Variable) -> Result<Data, Error> {
if let Some(data) = self.register.get(variable) {
Ok(data.clone())
} else {
Ok(Data::new_top(variable.bitsize()?))
}
}
/// Get the value of a register by its name.
///
/// Returns None if no value is set for the register.
pub fn get_register_by_name(&self, reg_name: &str) -> Option<Data> {
self.register.iter().find_map(|(key, value)| {
if key.name == reg_name {
Some(value.clone())
} else {
None
}
})
}
/// Set the value of a register.
///
/// Returns an error if the variable is not a register.
pub fn set_register(&mut self, variable: &Variable, value: Data) -> Result<(), Error> {
if let variable::Type::Immediate(_bitsize) = variable.type_ {
if !value.is_top() {
self.register.insert(variable.clone(), value);
} else {
self.register.remove(variable);
}
Ok(())
} else {
Err(anyhow!("Variable is not a register type"))
}
}
/// Evaluate expression on the given state and write the result to the target register.
pub fn handle_register_assign(
&mut self,
target: &Variable,
expression: &Expression,
) -> Result<(), Error> {
if let Expression::Var(variable) = expression {
if target == variable {
// The assign does nothing. Occurs as "do nothing"-path in conditional stores.
// Needs special handling, since it is the only case where the target is allowed
// to denote memory instead of a register.
return Ok(());
}
}
match self.eval(expression) {
Ok(new_value) => {
self.set_register(target, new_value)?;
Ok(())
}
Err(err) => {
self.set_register(target, Data::new_top(target.bitsize()?))?;
Err(err)
}
}
}
/// Store `value` at the given `address`.
pub fn store_value(&mut self, address: &Data, value: &Data) -> Result<(), Error> {
// If the address is a unique caller stack address, write to *all* caller stacks.
if let Some(offset) = self.unwrap_offset_if_caller_stack_address(address) {
let caller_addresses: Vec<_> = self
.caller_stack_ids
.iter()
.map(|caller_stack_id| {
PointerDomain::new(caller_stack_id.clone(), offset.clone()).into()
})
.collect();
let mut result = Ok(());
for address in caller_addresses {
if let Err(err) = self.store_value(&address, &value.clone()) {
result = Err(err);
}
}
// Note that this only returns the last error that was detected.
result
} else if let Data::Pointer(pointer) = self.adjust_pointer_for_read(address) {
self.memory.set_value(pointer, value.clone())?;
Ok(())
} else {
// TODO: Implement recognition of stores to global memory.
Err(anyhow!("Memory write to non-pointer data"))
}
}
/// Write a value to the address one gets when evaluating the address expression.
pub fn write_to_address(&mut self, address: &Expression, value: &Data) -> Result<(), Error> {
match self.eval(address) {
Ok(address_data) => self.store_value(&address_data, value),
Err(err) => Err(err),
}
}
/// Evaluate the given store expression on the given state and return the resulting state.
///
/// The function panics if given anything else than a store expression.
pub fn handle_store_exp(&mut self, store_exp: &Expression) -> Result<(), Error> {
if let Expression::Store {
memory: _,
address,
value,
endian: _,
size,
} = store_exp
{
match self.eval(value) {
Ok(data) => {
assert_eq!(data.bitsize(), *size);
self.write_to_address(address, &data)
}
Err(err) => {
// we still need to write to the target location before reporting the error
self.write_to_address(address, &Data::new_top(*size))?;
Err(err)
}
}
} else {
panic!("Expected store expression")
}
}
/// If the pointer contains a reference to the stack with offset >= 0, replace it with a pointer
/// pointing to all possible caller IDs.
fn adjust_pointer_for_read(&self, address: &Data) -> Data {
if let Data::Pointer(pointer) = address {
let mut new_targets = BTreeMap::new();
for (id, offset) in pointer.targets() {
if *id == self.stack_id {
match offset {
BitvectorDomain::Value(offset_val) => {
if offset_val.try_to_i64().unwrap() >= 0
&& !self.caller_stack_ids.is_empty()
{
for caller_id in self.caller_stack_ids.iter() {
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that the id of the current stack frame was *not* added.
} else {
new_targets.insert(id.clone(), offset.clone());
}
}
BitvectorDomain::Top(_bitsize) => {
for caller_id in self.caller_stack_ids.iter() {
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that we also add the id of the current stack frame
new_targets.insert(id.clone(), offset.clone());
}
}
} else {
new_targets.insert(id.clone(), offset.clone());
}
}
Data::Pointer(PointerDomain::with_targets(new_targets))
} else {
address.clone()
}
}
/// Evaluate the value of an expression in the current state
pub fn eval(&self, expression: &Expression) -> Result<Data, Error> {
use Expression::*;
match expression {
Var(variable) => self.get_register(&variable),
Const(bitvector) => Ok(bitvector.clone().into()),
// TODO: implement handling of endianness for loads and writes!
Load {
memory: _,
address,
endian: _,
size,
} => Ok(self
.memory
.get_value(&self.adjust_pointer_for_read(&self.eval(address)?), *size)?),
Store { .. } => {
// This does not return an error, but panics outright.
// If this would return an error, it would hide a side effect, which is not allowed to happen.
panic!("Store expression cannot be evaluated!")
}
BinOp { op, lhs, rhs } => {
if *op == crate::bil::BinOpType::XOR && lhs == rhs {
// the result of `x XOR x` is always zero.
return Ok(Bitvector::zero(apint::BitWidth::new(
self.eval(lhs)?.bitsize() as usize
)?)
.into());
}
let (left, right) = (self.eval(lhs)?, self.eval(rhs)?);
Ok(left.bin_op(*op, &right))
}
UnOp { op, arg } => Ok(self.eval(arg)?.un_op(*op)),
Cast { kind, width, arg } => Ok(self.eval(arg)?.cast(*kind, *width)),
Let {
var: _,
bound_exp: _,
body_exp: _,
} => Err(anyhow!("Let binding expression handling not implemented")),
Unknown { description, type_ } => {
if let crate::bil::variable::Type::Immediate(bitsize) = type_ {
Ok(Data::new_top(*bitsize))
} else {
Err(anyhow!("Unknown Memory operation: {}", description))
}
}
IfThenElse {
condition,
true_exp,
false_exp,
} => match self.eval(condition)? {
x if x == Bitvector::from_bit(false).into() => self.eval(false_exp),
x if x == Bitvector::from_bit(true).into() => self.eval(true_exp),
_ => Ok(self.eval(true_exp)?.merge(&self.eval(false_exp)?)),
},
Extract {
low_bit,
high_bit,
arg,
} => Ok(self.eval(arg)?.extract(*low_bit, *high_bit)),
Concat { left, right } => Ok(self.eval(left)?.concat(&self.eval(right)?)),
}
}
/// Check if an expression contains a use-after-free
pub fn contains_access_of_dangling_memory(&self, expression: &Expression) -> bool {
use Expression::*;
match expression {
Var(_) | Const(_) | Unknown { .. } => false,
Load {
address: address_exp,
..
} => {
if let Ok(pointer) = self.eval(address_exp) {
self.memory.is_dangling_pointer(&pointer, true)
|| self.contains_access_of_dangling_memory(address_exp)
} else {
false
}
}
Store {
memory: _,
address: address_exp,
value: value_exp,
..
} => {
let address_check = if let Ok(pointer) = self.eval(address_exp) {
self.memory.is_dangling_pointer(&pointer, true)
} else {
false
};
address_check
|| self.contains_access_of_dangling_memory(address_exp)
|| self.contains_access_of_dangling_memory(value_exp)
}
BinOp { op: _, lhs, rhs } => {
self.contains_access_of_dangling_memory(lhs)
|| self.contains_access_of_dangling_memory(rhs)
}
UnOp { op: _, arg } => self.contains_access_of_dangling_memory(arg),
Cast {
kind: _,
width: _,
arg,
} => self.contains_access_of_dangling_memory(arg),
Let {
var: _,
bound_exp,
body_exp,
} => {
self.contains_access_of_dangling_memory(bound_exp)
|| self.contains_access_of_dangling_memory(body_exp)
}
IfThenElse {
condition,
true_exp,
false_exp,
} => {
self.contains_access_of_dangling_memory(condition)
|| self.contains_access_of_dangling_memory(true_exp)
|| self.contains_access_of_dangling_memory(false_exp)
}
Extract {
low_bit: _,
high_bit: _,
arg,
} => self.contains_access_of_dangling_memory(arg),
Concat { left, right } => {
self.contains_access_of_dangling_memory(left)
|| self.contains_access_of_dangling_memory(right)
}
}
}
/// If the given address is a positive stack offset and `self.caller_stack_ids` is non-empty,
/// i.e. it is an access to the caller stack, return the offset.
///
/// In all other cases, including the case that the address has more than one target, return `None`.
fn unwrap_offset_if_caller_stack_address(&self, address: &Data) -> Option<BitvectorDomain> {
if self.caller_stack_ids.is_empty() {
return None;
}
if let Data::Pointer(pointer) = address {
match (pointer.targets().len(), pointer.targets().iter().next()) {
(1, Some((id, offset))) if self.stack_id == *id => {
if let BitvectorDomain::Value(offset_val) = offset {
if offset_val.try_to_i64().unwrap() >= 0 {
return Some(offset.clone());
}
}
}
_ => (),
}
}
None
}
}
use super::object_list::AbstractObjectList;
use super::Data;
use crate::abstract_domain::*;
use crate::bil::*;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
use std::collections::{BTreeMap, BTreeSet};
mod access_handling;
/// Contains all information known about the state of a program at a specific point of time.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
/// Maps a register variable to the data known about its content.
/// A variable not contained in the map has value `Data::Top(..)`, i.e. nothing is known about its content.
register: BTreeMap<Variable, Data>,
/// The list of all known memory objects.
pub memory: AbstractObjectList,
/// The abstract identifier of the current stack frame.
/// It points to the to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
pub stack_id: AbstractIdentifier,
/// All known IDs of caller stack frames.
/// Note that these IDs are named after the callsite,
/// i.e. we can distinguish every callsite and for recursive functions the caller and current stack frames have different IDs.
///
/// Writes to the current stack frame with offset >= 0 are written to *all* caller stack frames.
/// Reads to the current stack frame with offset >= 0 are handled as merge-read from all caller stack frames.
pub caller_stack_ids: BTreeSet<AbstractIdentifier>,
/// All IDs of objects that are known to some caller.
/// This is an overapproximation of all object IDs that may have been passed as parameters to the function.
/// The corresponding objects are not allowed to be deleted (even if no pointer to them exists anymore)
/// so that after returning from a call the caller can recover their modified contents
/// and the callee does not accidentally delete this information if it loses all pointers to an object.
///
/// Note that IDs that the callee should not have access to are not included here.
/// For these IDs the caller can assume that the contents of the corresponding memory object were not accessed or modified by the call.
pub ids_known_to_caller: BTreeSet<AbstractIdentifier>,
}
impl State {
/// Create a new state that contains only one memory object corresponding to the stack.
/// The stack offset will be set to zero.
pub fn new(stack_register: &Variable, function_tid: Tid) -> State {
let stack_id = AbstractIdentifier::new(
function_tid,
AbstractLocation::from_var(stack_register).unwrap(),
);
let mut register: BTreeMap<Variable, Data> = BTreeMap::new();
register.insert(
stack_register.clone(),
PointerDomain::new(
stack_id.clone(),
Bitvector::zero((stack_register.bitsize().unwrap() as usize).into()).into(),
)
.into(),
);
State {
register,
memory: AbstractObjectList::from_stack_id(
stack_id.clone(),
stack_register.bitsize().unwrap(),
),
stack_id,
caller_stack_ids: BTreeSet::new(),
ids_known_to_caller: BTreeSet::new(),
}
}
/// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names.
pub fn clear_non_callee_saved_register(&mut self, callee_saved_register_names: &[String]) {
let register = self
.register
.iter()
.filter_map(|(register, value)| {
if callee_saved_register_names
.iter()
.any(|reg_name| **reg_name == register.name)
{
Some((register.clone(), value.clone()))
} else {
None
}
})
.collect();
self.register = register;
}
/// Mark those parameter values of an extern function call, that are passed on the stack,
/// as unknown data (since the function may modify them).
pub fn clear_stack_parameter(&mut self, extern_call: &ExternSymbol) -> Result<(), Error> {
let mut result_log = Ok(());
for arg in &extern_call.arguments {
match &arg.location {
Expression::Var(_) => {}
location_expression => {
let arg_size = arg
.var
.bitsize()
.expect("Encountered argument with unknown size");
let data_top = Data::new_top(arg_size);
if let Err(err) = self.write_to_address(location_expression, &data_top) {
result_log = Err(err);
}
}
}
}
// We only return the last error encountered.
result_log
}
/// Replace all occurences of old_id with new_id and adjust offsets accordingly.
/// This is needed to replace stack/caller IDs on call and return instructions.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
for register_data in self.register.values_mut() {
register_data.replace_abstract_id(old_id, new_id, &(-offset_adjustment.clone()));
}
self.memory
.replace_abstract_id(old_id, new_id, offset_adjustment);
if &self.stack_id == old_id {
self.stack_id = new_id.clone();
}
if self.caller_stack_ids.get(old_id).is_some() {
self.caller_stack_ids.remove(old_id);
self.caller_stack_ids.insert(new_id.clone());
}
if self.ids_known_to_caller.get(old_id).is_some() {
self.ids_known_to_caller.remove(old_id);
self.ids_known_to_caller.insert(new_id.clone());
}
}
/// Remove all objects that cannot longer be reached by any known pointer.
/// This does not remove objects, where some caller may still know a pointer to the object.
///
/// Right now it uses the conservative overapproximation of all possible pointer targets contained in a memory object,
/// which will sometimes prevent memory objects from being removed
/// even if no actual pointer to it can be reconstructed from the state.
/// This may change in the future if memory consumption is too high (TODO: measure that).
pub fn remove_unreferenced_objects(&mut self) {
// get all referenced IDs
let mut referenced_ids = BTreeSet::new();
for (_reg_name, data) in self.register.iter() {
referenced_ids.append(&mut data.referenced_ids());
}
referenced_ids.insert(self.stack_id.clone());
referenced_ids.append(&mut self.caller_stack_ids.clone());
referenced_ids.append(&mut self.ids_known_to_caller.clone());
referenced_ids = self.add_recursively_referenced_ids_to_id_set(referenced_ids);
// remove unreferenced IDs
self.memory.remove_unused_ids(&referenced_ids);
}
/// Search (recursively) through all memory objects referenced by the given IDs
/// and all IDs contained in them to the set of IDs.
///
/// This uses an overapproximation of the referenced IDs of a memory object,
/// i.e. for a memory object it may add IDs as possible references
/// where the corresponding reference is not longer present in the memory object.
pub fn add_recursively_referenced_ids_to_id_set(
&self,
mut ids: BTreeSet<AbstractIdentifier>,
) -> BTreeSet<AbstractIdentifier> {
let mut unsearched_ids = ids.clone();
while let Some(id) = unsearched_ids.iter().next() {
let id = id.clone();
unsearched_ids.remove(&id);
let memory_ids = self.memory.get_referenced_ids(&id);
for mem_id in memory_ids {
if ids.get(mem_id).is_none() {
ids.insert(mem_id.clone());
unsearched_ids.insert(mem_id.clone());
}
}
}
ids
}
/// Merge the callee stack with the caller stack.
///
/// This deletes the pointer from the callee_id to the corresponding memory object
/// and updates all other references pointing to the callee_id to point to the caller_id.
/// The offset adjustment is handled as in `replace_abstract_id`.
///
/// Note that right now the content of the callee memory object is *not* merged into the caller memory object.
/// In general this is the correct behaviour
/// as the content below the stack pointer should be considered uninitialized memory after returning to the caller.
/// However, an aggressively optimizing compiler or an unknown calling convention may deviate from this.
pub fn merge_callee_stack_to_caller_stack(
&mut self,
callee_id: &AbstractIdentifier,
caller_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
self.memory.remove_object_pointer(callee_id);
self.replace_abstract_id(callee_id, caller_id, offset_adjustment);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
/// If this may cause double frees (i.e. the object in question may have been freed already),
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all virtual register from the state.
/// This should only be done in cases where it is known that no virtual registers can be alive.
///
/// Example: At the start of a basic block no virtual registers should be alive.
pub fn remove_virtual_register(&mut self) {
self.register = self
.register
.clone()
.into_iter()
.filter(|(register, _value)| !register.is_temp)
.collect();
}
/// Recursively remove all `caller_stack_ids` not corresponding to the given caller.
pub fn remove_other_caller_stack_ids(&mut self, caller_id: &AbstractIdentifier) {
let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove);
}
self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new();
self.caller_stack_ids.insert(caller_id.clone());
self.ids_known_to_caller = self
.ids_known_to_caller
.difference(&ids_to_remove)
.cloned()
.collect();
}
/// Add those objects from the `caller_state` to `self`, that are not known to `self`.
///
/// Since self does not know these objects, we assume that the current function could not have accessed
/// them in any way during execution.
/// This means they are unchanged from the moment of the call until the return from the call,
/// thus we can simply copy their object-state from the moment of the call.
pub fn readd_caller_objects(&mut self, caller_state: &State) {
self.memory.append_unknown_objects(&caller_state.memory);
}
}
impl AbstractDomain for State {
/// Merge two states
fn merge(&self, other: &Self) -> Self {
assert_eq!(self.stack_id, other.stack_id);
let mut merged_register = BTreeMap::new();
for (register, other_value) in other.register.iter() {
if let Some(value) = self.register.get(register) {
let merged_value = value.merge(other_value);
if !merged_value.is_top() {
// We only have to keep non-*Top* elements.
merged_register.insert(register.clone(), merged_value);
}
}
}
let merged_memory_objects = self.memory.merge(&other.memory);
State {
register: merged_register,
memory: merged_memory_objects,
stack_id: self.stack_id.clone(),
caller_stack_ids: self
.caller_stack_ids
.union(&other.caller_stack_ids)
.cloned()
.collect(),
ids_known_to_caller: self
.ids_known_to_caller
.union(&other.ids_known_to_caller)
.cloned()
.collect(),
}
}
/// A state has no *Top* element
fn is_top(&self) -> bool {
false
}
}
impl State {
/// Get a more compact json-representation of the state.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut state_map = Map::new();
let register = self
.register
.iter()
.map(|(var, data)| (var.name.clone(), data.to_json_compact()))
.collect();
let register = Value::Object(register);
state_map.insert("register".into(), register);
state_map.insert("memory".into(), self.memory.to_json_compact());
state_map.insert(
"stack_id".into(),
Value::String(format!("{}", self.stack_id)),
);
state_map.insert(
"caller_stack_ids".into(),
Value::Array(
self.caller_stack_ids
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
state_map.insert(
"ids_known_to_caller".into(),
Value::Array(
self.ids_known_to_caller
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
Value::Object(state_map)
}
}
#[cfg(test)]
mod tests;
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(time: &str, register: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(time),
AbstractLocation::Register(register.into(), 64),
)
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: crate::bil::variable::Type::Immediate(64),
is_temp: false,
}
}
fn reg_add(name: &str, value: i64) -> Expression {
Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
}
}
fn reg_sub(name: &str, value: i64) -> Expression {
Expression::BinOp {
op: BinOpType::MINUS,
lhs: Box::new(Expression::Var(register(name))),
rhs: Box::new(Expression::Const(Bitvector::from_i64(value))),
}
}
fn store_exp(address: Expression, value: Expression) -> Expression {
let mem_var = Variable {
name: "mem".into(),
type_: crate::bil::variable::Type::Memory {
addr_size: 64,
elem_size: 64,
},
is_temp: false,
};
Expression::Store {
memory: Box::new(Expression::Var(mem_var)),
address: Box::new(address),
value: Box::new(value),
endian: Endianness::LittleEndian,
size: 64,
}
}
fn load_exp(address: Expression) -> Expression {
let mem_var = Variable {
name: "mem".into(),
type_: crate::bil::variable::Type::Memory {
addr_size: 64,
elem_size: 64,
},
is_temp: false,
};
Expression::Load {
memory: Box::new(Expression::Var(mem_var)),
address: Box::new(address),
endian: Endianness::LittleEndian,
size: 64,
}
}
#[test]
fn state() {
use crate::analysis::pointer_inference::object::*;
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("time0", "RSP");
let stack_addr = Data::Pointer(PointerDomain::new(stack_id.clone(), bv(8)));
state
.store_value(&stack_addr, &Data::Value(bv(42)))
.unwrap();
state.register.insert(register("RSP"), stack_addr.clone());
let load_expr = Load {
memory: Box::new(Var(register("RSP"))), // This is wrong, but the memory var is not checked at the moment (since we have only the one for RAM)
address: Box::new(Var(register("RSP"))),
endian: Endianness::LittleEndian,
size: 64 as BitSize,
};
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(42)));
let mut other_state = State::new(&register("RSP"), Tid::new("time0"));
state.register.insert(register("RAX"), Data::Value(bv(42)));
other_state
.register
.insert(register("RSP"), stack_addr.clone());
other_state
.register
.insert(register("RAX"), Data::Value(bv(42)));
other_state
.register
.insert(register("RBX"), Data::Value(bv(35)));
let merged_state = state.merge(&other_state);
assert_eq!(merged_state.register[&register("RAX")], Data::Value(bv(42)));
assert_eq!(merged_state.register.get(&register("RBX")), None);
assert_eq!(merged_state.eval(&load_expr).unwrap(), Data::new_top(64));
// Test pointer adjustment on reads
state
.memory
.add_abstract_object(new_id("time0", "caller"), bv(0), ObjectType::Stack, 64);
state.caller_stack_ids.insert(new_id("time0", "caller"));
state
.store_value(&stack_addr, &Data::Value(bv(15)))
.unwrap();
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("time0", "caller"), bv(8))),
64
)
.unwrap(),
Data::Value(bv(15))
);
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(15)));
// Test replace_abstract_id
let pointer = Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16)));
state.register.insert(register("RSP"), pointer.clone());
state.store_value(&pointer, &Data::Value(bv(7))).unwrap();
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(7)));
state.replace_abstract_id(&stack_id, &new_id("time0", "callee"), &bv(-8));
assert_eq!(state.eval(&load_expr).unwrap(), Data::Value(bv(7)));
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("time0", "callee"), bv(-8))),
64
)
.unwrap(),
Data::Value(bv(7))
);
assert_eq!(
state
.memory
.get_value(
&Data::Pointer(PointerDomain::new(new_id("time0", "callee"), bv(-16))),
64
)
.unwrap(),
Data::new_top(64)
);
state
.memory
.add_abstract_object(new_id("time0", "heap_obj"), bv(0), ObjectType::Heap, 64);
assert_eq!(state.memory.get_num_objects(), 3);
state.remove_unreferenced_objects();
assert_eq!(state.memory.get_num_objects(), 2);
}
#[test]
fn handle_store() {
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("time0", "RSP");
assert_eq!(
state.eval(&Var(register("RSP"))).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0)))
);
state
.handle_register_assign(&register("RSP"), &reg_sub("RSP", 32))
.unwrap();
assert_eq!(
state.eval(&Var(register("RSP"))).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-32)))
);
state
.handle_register_assign(&register("RSP"), &reg_add("RSP", -8))
.unwrap();
assert_eq!(
state.eval(&Var(register("RSP"))).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-40)))
);
state
.handle_store_exp(&store_exp(reg_add("RSP", 8), Const(Bitvector::from_i64(1))))
.unwrap();
state
.handle_store_exp(&store_exp(reg_sub("RSP", 8), Const(Bitvector::from_i64(2))))
.unwrap();
state
.handle_store_exp(&store_exp(
reg_add("RSP", -16),
Const(Bitvector::from_i64(3)),
))
.unwrap();
state
.handle_register_assign(&register("RSP"), &reg_sub("RSP", 4))
.unwrap();
assert_eq!(
state.eval(&load_exp(reg_add("RSP", 12))).unwrap(),
bv(1).into()
);
assert_eq!(
state.eval(&load_exp(reg_sub("RSP", 4))).unwrap(),
bv(2).into()
);
assert_eq!(
state.eval(&load_exp(reg_add("RSP", -12))).unwrap(),
bv(3).into()
);
}
#[test]
fn handle_caller_stack_stores() {
use super::super::object::ObjectType;
use crate::bil::Expression::*;
let mut state = State::new(&register("RSP"), Tid::new("time0"));
state
.memory
.add_abstract_object(new_id("caller1", "RSP"), bv(0), ObjectType::Stack, 64);
state
.memory
.add_abstract_object(new_id("caller2", "RSP"), bv(0), ObjectType::Stack, 64);
state.caller_stack_ids.insert(new_id("caller1", "RSP"));
state.caller_stack_ids.insert(new_id("caller2", "RSP"));
// store something on the caller stack
state
.handle_store_exp(&store_exp(
reg_add("RSP", 8),
Const(Bitvector::from_i64(42)),
))
.unwrap();
// check that it was saved in all caller objects and not on the callee stack object
let pointer = PointerDomain::new(new_id("time0", "RSP"), bv(8)).into();
assert_eq!(
state.memory.get_value(&pointer, 64).unwrap(),
Data::new_top(64)
);
let pointer = PointerDomain::new(new_id("caller1", "RSP"), bv(8)).into();
assert_eq!(state.memory.get_value(&pointer, 64).unwrap(), bv(42).into());
let pointer = PointerDomain::new(new_id("caller2", "RSP"), bv(8)).into();
assert_eq!(state.memory.get_value(&pointer, 64).unwrap(), bv(42).into());
// accessing through a positive stack register offset should yield the value of the caller stacks
assert_eq!(
state.eval(&load_exp(reg_add("RSP", 8))).unwrap(),
bv(42).into()
);
}
#[test]
fn clear_parameters_on_the_stack_on_extern_calls() {
use crate::bil::Expression::*;
use crate::term::{Arg, ArgIntent};
let mut state = State::new(&register("RSP"), Tid::new("time0"));
state.register.insert(
register("RSP"),
PointerDomain::new(new_id("time0", "RSP"), bv(-20)).into(),
);
// write something onto the stack
state
.handle_store_exp(&store_exp(
reg_add("RSP", 8),
Const(Bitvector::from_i64(42)),
))
.unwrap();
// create an extern symbol which uses the value on the stack as a parameter
let argument = Arg {
var: register("my_argument"),
location: reg_add("RSP", 8),
intent: ArgIntent::Input,
};
let extern_symbol = ExternSymbol {
tid: Tid::new("symbol"),
address: "some_address".into(),
name: "my_extern_symbol".into(),
calling_convention: None,
arguments: vec![argument],
};
// check the value before
let pointer = PointerDomain::new(new_id("time0", "RSP"), bv(-12)).into();
assert_eq!(state.memory.get_value(&pointer, 64).unwrap(), bv(42).into());
// clear stack parameter
state.clear_stack_parameter(&extern_symbol).unwrap();
// check the value after
assert_eq!(
state.memory.get_value(&pointer, 64).unwrap(),
Data::new_top(64)
);
}
#[test]
fn merge_callee_stack_to_caller_stack() {
use super::super::object::ObjectType;
let mut state = State::new(&register("RSP"), Tid::new("callee"));
state
.memory
.add_abstract_object(new_id("callsite", "RSP"), bv(52), ObjectType::Stack, 64);
state.caller_stack_ids.insert(new_id("callsite", "RSP"));
// check the state before merging to the caller stack
assert_eq!(
state.register.get(&register("RSP")).unwrap(),
&PointerDomain::new(new_id("callee", "RSP"), bv(0)).into()
);
assert_eq!(state.memory.get_all_object_ids().len(), 2);
// check state after merging to the caller stack
state.merge_callee_stack_to_caller_stack(
&new_id("callee", "RSP"),
&new_id("callsite", "RSP"),
&bv(-52),
);
assert_eq!(
state.register.get(&register("RSP")).unwrap(),
&PointerDomain::new(new_id("callsite", "RSP"), bv(52)).into()
);
assert_eq!(state.memory.get_all_object_ids().len(), 1);
}
......@@ -29,7 +29,7 @@ fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value)
let project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
crate::analysis::pointer_inference::run(&project, true); // TODO: This discard all CweWarnings and log messages. Change that?
crate::analysis::pointer_inference::run(&project, true); // Note: This discard all CweWarnings and log messages.
}
caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) {
......
......@@ -82,8 +82,6 @@ pub struct Program {
pub entry_points: Vec<Tid>,
}
// TODO: Add deserialization from Ocaml to the FFI module for project!
// TODO: Add other CPU-architecture specific data to this struct!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
......@@ -93,6 +91,12 @@ pub struct Project {
pub parameter_registers: Vec<String>,
}
impl Project {
pub fn get_pointer_bitsize(&self) -> BitSize {
self.stack_pointer_register.bitsize().unwrap()
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
pub var: Variable,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment