Unverified Commit 9f09ebeb by Enkelmann Committed by GitHub

Refactor caller stack handling in PointerInference analysis (#287)

parent 0662acda
......@@ -78,6 +78,26 @@ impl<T: RegisterDomain> DataDomain<T> {
}
}
/// Replace all abstract IDs in self with the corresponding values given by the `replacement_map`.
///
/// For IDs without a replacement value the `contains_top_values` flag will be set.
pub fn replace_all_ids(&mut self, replacement_map: &BTreeMap<AbstractIdentifier, Self>) {
let mut new_self = DataDomain {
size: self.size,
relative_values: BTreeMap::new(),
absolute_value: self.absolute_value.clone(),
contains_top_values: self.contains_top_values,
};
for (id, offset) in self.relative_values.iter() {
if let Some(replacement_value) = replacement_map.get(id) {
new_self = new_self.merge(&(replacement_value.clone() + offset.clone().into()));
} else {
new_self.contains_top_values = true;
}
}
*self = new_self;
}
/// Return an iterator over all referenced abstract IDs.
pub fn referenced_ids(&self) -> impl Iterator<Item = &AbstractIdentifier> {
self.relative_values.keys()
......@@ -262,14 +282,33 @@ mod tests {
let mut targets = BTreeMap::new();
targets.insert(new_id("Rax"), bv(1));
targets.insert(new_id("Rbx"), bv(2));
let mut data = DataDomain::mock_from_target_map(targets);
targets.insert(new_id("Rcx"), bv(3));
// Test replacing exactly one ID.
let mut data = DataDomain::mock_from_target_map(targets.clone());
data.replace_abstract_id(&new_id("Rbx"), &new_id("replaced_Rbx"), &bv(10));
assert_eq!(data.relative_values.len(), 2);
assert_eq!(data.relative_values.len(), 3);
assert_eq!(*data.relative_values.get(&new_id("Rax")).unwrap(), bv(1));
assert_eq!(
*data.relative_values.get(&new_id("replaced_Rbx")).unwrap(),
bv(12)
);
// Test replacing all IDs using a replacement map.
let mut data = DataDomain::mock_from_target_map(targets);
let replacement_map = BTreeMap::from_iter([
(
new_id("Rax"),
DataDomain::from_target(new_id("replaced_Rax"), bv(0)),
),
(new_id("Rbx"), bv(10).into()),
]);
data.replace_all_ids(&replacement_map);
assert_eq!(data.relative_values.len(), 1);
assert_eq!(
*data.relative_values.get(&new_id("replaced_Rax")).unwrap(),
bv(1)
);
assert!(data.contains_top());
assert_eq!(data.absolute_value.unwrap(), bv(12));
}
#[test]
......
......@@ -69,6 +69,24 @@ where
}
}
impl<K, V, S> FromIterator<(K, V)> for DomainMap<K, V, S>
where
K: PartialOrd + Ord + Clone,
V: AbstractDomain,
S: MapMergeStrategy<K, V>,
{
/// Generate a new `DomainMap` from an iterator over the key-value pairs that it should contain.
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = (K, V)>,
{
DomainMap {
inner: Arc::new(iter.into_iter().collect()),
phantom: PhantomData,
}
}
}
impl<K, V, S> AbstractDomain for DomainMap<K, V, S>
where
K: PartialOrd + Ord + Clone,
......
......@@ -20,7 +20,12 @@ use std::sync::Arc;
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited.
///
/// An abstract identifier is given by a time identifier and a location identifier.
/// Alternatively one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Path hints are given as a possibly empty array of time identifiers.
/// To prevent infinitely long path hints, each time identifier is only allowed to appear at most once in the array.
/// The specific meaning of the path hints depends upon the use case.
///
/// An abstract identifier is given by a time identifier, a location identifier and a path hints array (containing time identifiers).
///
/// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`.
......@@ -35,23 +40,71 @@ pub struct AbstractIdentifier(Arc<AbstractIdentifierData>);
pub struct AbstractIdentifierData {
time: Tid,
location: AbstractLocation,
path_hints: Vec<Tid>,
}
impl AbstractIdentifier {
/// Create a new abstract identifier.
pub fn new(time: Tid, location: AbstractLocation) -> AbstractIdentifier {
AbstractIdentifier(Arc::new(AbstractIdentifierData { time, location }))
AbstractIdentifier(Arc::new(AbstractIdentifierData {
time,
location,
path_hints: Vec::new(),
}))
}
/// Create a new abstract identifier where the abstract location is a register.
/// Panics if the register is a temporary register.
pub fn new_from_var(time: Tid, variable: &Variable) -> AbstractIdentifier {
pub fn from_var(time: Tid, variable: &Variable) -> AbstractIdentifier {
AbstractIdentifier(Arc::new(AbstractIdentifierData {
time,
location: AbstractLocation::from_var(variable).unwrap(),
path_hints: Vec::new(),
}))
}
/// Create an abstract identifier from a parameter argument.
///
/// If the argument is a sub-register, then the created identifier contains the whole base register.
pub fn from_arg(time: &Tid, arg: &Arg) -> AbstractIdentifier {
let location_register = match arg {
Arg::Register { expr, .. } | Arg::Stack { address: expr, .. } => {
match &expr.input_vars()[..] {
[var] => *var,
_ => panic!("Malformed argument expression encountered"),
}
}
};
let location = match arg {
Arg::Register { .. } => AbstractLocation::from_var(location_register).unwrap(),
Arg::Stack { size, .. } => AbstractLocation::from_stack_position(
location_register,
arg.eval_stack_offset().unwrap().try_to_i64().unwrap(),
*size,
),
};
AbstractIdentifier::new(time.clone(), location)
}
/// Create a new abstract identifier
/// by pushing the given path hint to the array of path hints of `self`.
/// Returns an error if the path hint is already contained in the path hints of `self`.
pub fn with_path_hint(&self, path_hint: Tid) -> Result<Self, Error> {
if self.path_hints.iter().any(|tid| *tid == path_hint) {
Err(anyhow!("Path hint already contained."))
} else {
let mut new_id = self.clone();
let inner = Arc::make_mut(&mut new_id.0);
inner.path_hints.push(path_hint);
Ok(new_id)
}
}
/// Get the path hints array of `self`.
pub fn get_path_hints(&self) -> &[Tid] {
&self.path_hints
}
/// Get the register associated to the abstract location.
/// Panics if the abstract location is a memory location and not a register.
pub fn unwrap_register(&self) -> &Variable {
......@@ -74,7 +127,15 @@ impl AbstractIdentifier {
impl std::fmt::Display for AbstractIdentifier {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
if self.path_hints.is_empty() {
write!(formatter, "{} @ {}", self.0.time, self.0.location)
} else {
write!(formatter, "{}(", self.0.time)?;
for hint in &self.0.path_hints {
write!(formatter, "->{}", hint)?;
}
write!(formatter, ") @ {}", self.0.location)
}
}
}
......@@ -159,3 +220,27 @@ impl std::fmt::Display for AbstractMemoryLocation {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_constraint_enforcements() {
// Test that no temporary registers are allowed as abstract locations.
assert!(AbstractLocation::from_var(&Variable {
name: "var".to_string(),
size: ByteSize::new(8),
is_temp: true,
})
.is_err());
// Test uniqueness of TIDs in path hint array.
let id = AbstractIdentifier::new(
Tid::new("time_id"),
AbstractLocation::from_var(&Variable::mock("var", 8)).unwrap(),
);
let id = id.with_path_hint(Tid::new("first_hint")).unwrap();
let id = id.with_path_hint(Tid::new("second_hint")).unwrap();
assert!(id.with_path_hint(Tid::new("first_hint")).is_err());
}
}
......@@ -107,18 +107,6 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegion<T> {
}
}
/// Clear all values that might be fully or partially overwritten if one writes a value with byte size `value_size`
/// to an offset contained in the interval from `start` to `end` (both bounds included in the interval).
///
/// This represents the effect of writing arbitrary values (with known byte size)
/// to arbitrary offsets contained in the interval.
/// Note that if one only wants to mark values in the interval as potentially overwritten without deleting them,
/// then one should use the [`MemRegion::mark_interval_values_as_top`] method instead.
pub fn clear_offset_interval(&mut self, start: i64, end: i64, value_size: ByteSize) {
let size = end - start + (u64::from(value_size) as i64);
self.clear_interval(start, size);
}
/// Add a value to the memory region.
pub fn add(&mut self, value: T, position: Bitvector) {
assert_eq!(
......@@ -259,6 +247,19 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegion<T> {
self.clear_top_values();
}
/// Add the given offset to the indices of all values contained in the memory region.
pub fn add_offset_to_all_indices(&mut self, offset: i64) {
if offset == 0 {
return;
}
let mut new_values = BTreeMap::new();
for (index, value) in self.inner.values.iter() {
new_values.insert(*index + offset, value.clone());
}
let inner = Arc::make_mut(&mut self.inner);
inner.values = new_values;
}
/// Merge two memory regions.
///
/// Values at the same position and with the same size get merged via their merge function.
......
......@@ -99,7 +99,7 @@ impl<'a> Context<'a> {
// If the Top-flag of the return value was set we replace it with an ID representing the return register
// to indicate where the unknown value originated from.
if return_value.contains_top() {
let id = AbstractIdentifier::new_from_var(call.tid.clone(), return_register);
let id = AbstractIdentifier::from_var(call.tid.clone(), return_register);
let value =
DataDomain::from_target(id, Bitvector::zero(return_register.size.into()).into());
return_value = return_value.merge(&value);
......
......@@ -22,19 +22,19 @@ fn test_compute_return_values_of_call() {
let return_values =
context.compute_return_values_of_call(&mut caller_state, &callee_state, &cconv, &call);
let expected_val = DataDomain::from_target(
AbstractIdentifier::new_from_var(Tid::new("call_tid"), &Variable::mock("RAX", 8)),
AbstractIdentifier::from_var(Tid::new("call_tid"), &Variable::mock("RAX", 8)),
Bitvector::from_i64(0).into(),
);
assert_eq!(return_values.iter().len(), 3);
assert_eq!(return_values[0], (&Variable::mock("RAX", 8), expected_val));
// Test returning a known value.
let param_ref = DataDomain::from_target(
AbstractIdentifier::new_from_var(Tid::new("callee"), &Variable::mock("RDI", 8)),
AbstractIdentifier::from_var(Tid::new("callee"), &Variable::mock("RDI", 8)),
Bitvector::from_i64(0).into(),
);
callee_state.set_register(&Variable::mock("RAX", 8), param_ref);
let expected_val = DataDomain::from_target(
AbstractIdentifier::new_from_var(Tid::new("caller"), &Variable::mock("RDI", 8)),
AbstractIdentifier::from_var(Tid::new("caller"), &Variable::mock("RDI", 8)),
Bitvector::from_i64(0).into(),
);
let return_values =
......
......@@ -57,14 +57,19 @@ fn generate_fixpoint_computation<'a>(
if let Some(entry_block) = sub.term.blocks.get(0) {
if entry_block.tid == block.tid {
// The node of a function entry point
let calling_convention = project
.get_specific_calling_convention(&sub.term.calling_convention)
.unwrap_or_else(|| {
project
.get_standard_calling_convention()
.expect("No standard calling convention found.")
});
computation.set_node_value(
node,
NodeValue::Value(State::new(
&sub.tid,
&project.stack_pointer_register,
project
.get_specific_calling_convention(&sub.term.calling_convention)
.unwrap(),
calling_convention,
)),
)
}
......@@ -166,6 +171,20 @@ impl FunctionSignature {
}
}
/// The returned number is the maximum of stack offset plus parameter size
/// taken over all stack parameters in the function signature.
pub fn get_stack_params_total_size(&self) -> i64 {
let mut stack_params_total_size: i64 = 0;
for param in self.parameters.keys() {
if let Ok(param_offset) = param.eval_stack_offset() {
let param_upper_bound =
param_offset.try_to_i64().unwrap() + (u64::from(param.bytesize()) as i64);
stack_params_total_size = std::cmp::max(stack_params_total_size, param_upper_bound);
}
}
stack_params_total_size
}
/// Merge the parameter list of `self` with the given parameter list.
fn merge_parameter_list(&mut self, params: &[(Arg, AccessPattern)]) {
for (arg, sig_new) in params {
......@@ -213,7 +232,7 @@ impl FunctionSignature {
if *size != stack_register.size {
return Err(anyhow!("Unexpected stack parameter size"));
}
if let Ok(offset) = arg.eval_stack_offset(stack_register) {
if let Ok(offset) = arg.eval_stack_offset() {
if offset.try_to_u64()? % u64::from(stack_register.size) != 0 {
return Err(anyhow!("Unexpected stack parameter alignment"));
}
......@@ -229,3 +248,27 @@ impl Default for FunctionSignature {
Self::new()
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl FunctionSignature {
/// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably.
pub fn mock_x64() -> FunctionSignature {
let mut write_access_pattern = AccessPattern::new();
write_access_pattern.set_unknown_access_flags();
let parameters = HashMap::from_iter([
(
Arg::from_var(Variable::mock("RDI", 8), None),
AccessPattern::new(),
),
(
Arg::from_var(Variable::mock("RSI", 8), None),
write_access_pattern,
),
]);
FunctionSignature { parameters }
}
}
}
......@@ -41,7 +41,7 @@ impl State {
let mut tracked_ids = BTreeMap::new();
// Generate tracked IDs for all parameters and also add them to the register map
for var in calling_convention.get_all_parameter_register() {
let id = AbstractIdentifier::new_from_var(func_tid.clone(), var);
let id = AbstractIdentifier::from_var(func_tid.clone(), var);
let value =
DataDomain::from_target(id.clone(), Bitvector::zero(var.size.into()).into());
register_map.insert(var.clone(), value);
......@@ -50,7 +50,7 @@ impl State {
}
}
// Generate all stack-related objects
let stack_id = AbstractIdentifier::new_from_var(func_tid.clone(), stack_register);
let stack_id = AbstractIdentifier::from_var(func_tid.clone(), stack_register);
let stack_value = DataDomain::from_target(
stack_id.clone(),
Bitvector::zero(stack_register.size.into()).into(),
......
......@@ -112,7 +112,7 @@ impl State {
{
if var.size == generic_pointer_size {
let specific_target = DataDomain::from_target(
AbstractIdentifier::new_from_var(call_tid.clone(), var),
AbstractIdentifier::from_var(call_tid.clone(), var),
Bitvector::zero(var.size.into()).into(),
);
let output = generic_output.merge(&specific_target);
......
......@@ -22,7 +22,7 @@ impl State {
/// Mock an abstract ID representing the stack.
fn mock_stack_id() -> AbstractIdentifier {
AbstractIdentifier::new_from_var(Tid::new("mock_fn"), &Variable::mock("sp", 4))
AbstractIdentifier::from_var(Tid::new("mock_fn"), &Variable::mock("sp", 4))
}
/// Mock an abstract ID of a stack parameter
......@@ -149,9 +149,9 @@ fn test_extern_symbol_handling() {
return_: Some(Tid::new("return_tid")),
},
};
let param_id = AbstractIdentifier::new_from_var(Tid::new("mock_fn"), &Variable::mock("r0", 4));
let param_id = AbstractIdentifier::from_var(Tid::new("mock_fn"), &Variable::mock("r0", 4));
let return_val_id =
AbstractIdentifier::new_from_var(Tid::new("call_tid"), &Variable::mock("r0", 4));
AbstractIdentifier::from_var(Tid::new("call_tid"), &Variable::mock("r0", 4));
// Test extern symbol handling.
state.handle_extern_symbol(&call, &extern_symbol, &cconv);
assert_eq!(
......
use super::*;
use crate::analysis::function_signature::AccessPattern;
impl<'a> Context<'a> {
/// Create a map that maps each abstract ID known to the callee
/// to the value that represents it in the caller.
///
/// For parameter IDs this is the value of the parameter on function call.
/// For IDs of objects created in the callee it is the ID together with a path hint given by the call TID.
/// For other IDs (including the callee stack frame ID) it is a `Top` value,
/// i.e. the value of the ID should be unknown to the caller.
pub fn create_callee_id_to_caller_data_map(
&self,
state_before_call: &State,
state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let stack_register = &self.project.stack_pointer_register;
let mut id_map = BTreeMap::new();
let callee_tid = state_before_return.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap();
for param in callee_fn_sig.parameters.keys() {
let param_id = AbstractIdentifier::from_arg(callee_tid, param);
if let Ok(param_value) =
state_before_call.eval_parameter_arg(param, self.runtime_memory_image)
{
id_map.insert(param_id, param_value);
} else {
id_map.insert(param_id, Data::new_top(param.bytesize()));
}
}
for object_id in state_before_return.memory.get_all_object_ids() {
if object_id.get_tid() != callee_tid || !object_id.get_path_hints().is_empty() {
// Object is neither a parameter object nor the stack frame of the callee.
if let Ok(new_object_id) = object_id.with_path_hint(call_tid.clone()) {
id_map.insert(
object_id,
Data::from_target(
new_object_id,
Bitvector::zero(stack_register.size.into()).into(),
),
);
} else {
id_map.insert(object_id, Data::new_top(stack_register.size));
}
}
}
id_map.insert(
state_before_return.stack_id.clone(),
Data::new_top(stack_register.size),
);
id_map
}
/// Create a map from the parameter IDs (of the function that the given state corresponds to)
/// to the corresponding access patterns.
pub fn create_id_to_access_pattern_map(
&self,
state: &State,
) -> BTreeMap<AbstractIdentifier, &AccessPattern> {
let mut id_to_access_pattern_map = BTreeMap::new();
let fn_tid = state.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(fn_tid).unwrap();
for (param, access_pattern) in &callee_fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(fn_tid, param);
id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
}
id_to_access_pattern_map
}
/// Identify caller IDs used in more than one parameter,
/// for which at least one parameter has write access to the corresponding memory object.
/// For these IDs the analysis in the callee is unsound for the corresponding callsite!
pub fn get_unsound_caller_ids(
&self,
callee_id_to_caller_data_map: &BTreeMap<AbstractIdentifier, Data>,
callee_id_to_access_pattern_map: &BTreeMap<AbstractIdentifier, &AccessPattern>,
) -> BTreeSet<AbstractIdentifier> {
let mut ids_touched = BTreeSet::new();
let mut ids_modified = BTreeSet::new();
let mut unsound_caller_ids = BTreeSet::new();
for (callee_id, access_pattern) in callee_id_to_access_pattern_map {
for id in callee_id_to_caller_data_map
.get(callee_id)
.unwrap()
.referenced_ids()
{
if ids_modified.get(id).is_some()
|| (access_pattern.is_mutably_dereferenced() && ids_touched.get(id).is_some())
{
unsound_caller_ids.insert(id.clone());
}
ids_touched.insert(id.clone());
if access_pattern.is_mutably_dereferenced() {
ids_modified.insert(id.clone());
}
}
}
unsound_caller_ids
}
}
use super::object::ObjectType;
use crate::analysis::function_signature::FunctionSignature;
use crate::analysis::graph::Graph;
use crate::intermediate_representation::*;
......@@ -11,13 +10,15 @@ use super::state::State;
use super::ValueDomain;
use super::{Config, Data, VERSION};
// contains trait implementations for the `Context` struct,
// especially the implementation of the `interprocedural_fixpoint::Context` trait.
/// Contains methods of the `Context` struct that deal with the manipulation of abstract IDs.
mod id_manipulation;
/// Contains trait implementations for the `Context` struct,
/// especially the implementation of the [`forward_interprocedural_fixpoint::Context`](crate::analysis::forward_interprocedural_fixpoint::Context) trait.
mod trait_impls;
/// Contains all context information needed for the pointer inference fixpoint computation.
///
/// The struct also implements the `interprocedural_fixpoint::Context` trait to enable the fixpoint computation.
/// The struct also implements the [`forward_interprocedural_fixpoint::Context`](crate::analysis::forward_interprocedural_fixpoint::Context) trait to enable the fixpoint computation.
pub struct Context<'a> {
/// The program control flow graph on which the fixpoint will be computed
pub graph: &'a Graph<'a>,
......@@ -185,9 +186,8 @@ impl<'a> Context<'a> {
);
new_state.memory.add_abstract_object(
object_id.clone(),
Bitvector::zero(apint::BitWidth::from(address_bytesize)).into(),
super::object::ObjectType::Heap,
address_bytesize,
Some(super::object::ObjectType::Heap),
);
new_state.memory.set_lower_index_bound(
&object_id,
......@@ -266,13 +266,15 @@ impl<'a> Context<'a> {
}
/// Check all parameter registers of a call for dangling pointers and report possible use-after-frees.
fn check_parameter_register_for_dangling_pointer(
fn check_parameter_register_for_dangling_pointer<'iter, I>(
&self,
state: &mut State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) {
for parameter in extern_symbol.parameters.iter() {
parameters: I,
) where
I: Iterator<Item = &'iter Arg>,
{
for parameter in parameters {
match state.eval_parameter_arg(parameter, self.runtime_memory_image) {
Ok(value) => {
if state.memory.is_dangling_pointer(&value, true) {
......@@ -287,8 +289,8 @@ impl<'a> Context<'a> {
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Call to {} may access freed memory at {}",
extern_symbol.name, call.tid.address
"(Use After Free) Call at {} may access freed memory",
call.tid.address
),
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
......@@ -354,14 +356,14 @@ impl<'a> Context<'a> {
}
}
/// Adjust the stack register after a call to an extern function.
/// Adjust the stack register after a call to a function.
///
/// On x86, this removes the return address from the stack
/// (other architectures pass the return address in a register, not on the stack).
/// On other architectures the stack register retains the value it had before the call.
/// Note that in some calling conventions the callee also clears function parameters from the stack.
/// We do not detect and handle these cases yet.
fn adjust_stack_register_on_extern_call(
fn adjust_stack_register_on_return_from_call(
&self,
state_before_call: &State,
new_state: &mut State,
......@@ -383,7 +385,7 @@ impl<'a> Context<'a> {
}
/// Handle an extern symbol call, whose concrete effect on the state is unknown.
/// Basically, we assume that the call may write to all memory objects and register that is has access to.
/// Basically, we assume that the call may write to all memory objects and registers that is has access to.
fn handle_generic_extern_call(
&self,
state: &State,
......@@ -438,7 +440,7 @@ impl<'a> Context<'a> {
let mut new_state = state_before_call.clone();
new_state.clear_non_callee_saved_register(&calling_conv.callee_saved_register[..]);
// Adjust stack register value (for x86 architecture).
self.adjust_stack_register_on_extern_call(state_before_call, &mut new_state);
self.adjust_stack_register_on_return_from_call(state_before_call, &mut new_state);
let mut possible_referenced_ids = BTreeSet::new();
for parameter_register in calling_conv.integer_parameter_register.iter() {
......@@ -463,19 +465,6 @@ impl<'a> Context<'a> {
}
}
/// Get the offset of the current stack pointer to the base of the current stack frame.
fn get_current_stack_offset(&self, state: &State) -> ValueDomain {
if let Some((stack_id, stack_offset_domain)) = state
.get_register(&self.project.stack_pointer_register)
.get_if_unique_target()
{
if *stack_id == state.stack_id {
return stack_offset_domain.clone();
}
}
ValueDomain::new_top(self.project.stack_pointer_register.size)
}
/// Report a NULL dereference CWE at the address of the given TID.
fn report_null_deref(&self, tid: &Tid) {
let warning = CweWarning {
......
......@@ -13,10 +13,7 @@ fn new_id(time: &str, reg_name: &str) -> AbstractIdentifier {
}
fn mock_extern_symbol(name: &str) -> (Tid, ExternSymbol) {
let arg = Arg::Register {
expr: Expression::Var(register("RDX")),
data_type: None,
};
let arg = Arg::from_var(register("RDX"), None);
let tid = Tid::new("extern_".to_string() + name);
(
tid.clone(),
......@@ -89,14 +86,7 @@ fn mock_project() -> (Project, Config) {
tid: Tid::new("program"),
term: program,
};
let cconv = CallingConvention {
name: "__cdecl".to_string(),
integer_parameter_register: vec![Variable::mock("RDX", 8)],
float_parameter_register: vec![Expression::Var(Variable::mock("XMMO", 16))],
integer_return_register: vec![Variable::mock("RDX", 8)],
float_return_register: vec![],
callee_saved_register: vec![Variable::mock("callee_saved_reg", 8)],
};
let cconv = CallingConvention::mock_x64();
let register_set = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
.into_iter()
.map(|name| Variable::mock(name, ByteSize::new(8)))
......@@ -117,16 +107,32 @@ fn mock_project() -> (Project, Config) {
)
}
/// Create a mock context for unit tests.
/// Note that the function leaks memory!
fn mock_context() -> Context<'static> {
let (project, config) = mock_project();
let project = Box::new(project);
let project = Box::leak(project);
let analysis_results = Box::new(AnalysisResults::mock_from_project(project));
let analysis_results = Box::leak(analysis_results);
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let mut mock_context = Context::new(analysis_results, config, log_sender);
// Create mocked function signatures
let fn_sigs = BTreeMap::from_iter([(Tid::new("callee"), FunctionSignature::mock_x64())]);
let fn_sigs = Box::new(fn_sigs);
let fn_sigs = Box::leak(fn_sigs);
mock_context.fn_signatures = fn_sigs;
mock_context
}
#[test]
fn context_problem_implementation() {
use crate::analysis::forward_interprocedural_fixpoint::Context as IpFpContext;
use crate::analysis::pointer_inference::Data;
use Expression::*;
let (project, config) = mock_project();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let analysis_results = AnalysisResults::mock_from_project(&project);
let context = Context::new(&analysis_results, config, log_sender);
let context = mock_context();
let mut state = State::new(&register("RSP"), Tid::new("main"));
let def = Term {
......@@ -150,79 +156,9 @@ fn context_problem_implementation() {
assert_eq!(state.eval(&Var(register("RSP"))), stack_pointer);
state = context.update_def(&state, &store_term).unwrap();
// Test update_call
let target_block = Term {
tid: Tid::new("func_start"),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
let sub = Term {
tid: Tid::new("caller_sub"),
term: Sub {
name: "caller_sub".into(),
blocks: vec![target_block.clone()],
calling_convention: None,
},
};
let target_node = crate::analysis::graph::Node::BlkStart(&target_block, &sub);
let call = call_term("func");
let mut callee_state = context
.update_call(&state, &call, &target_node, &None)
.unwrap();
assert_eq!(callee_state.stack_id, new_id("func", "RSP"));
assert_eq!(callee_state.caller_stack_ids.len(), 1);
assert_eq!(
callee_state.caller_stack_ids.iter().next().unwrap(),
&new_id("call_func", "RSP")
);
callee_state
.memory
.set_value(
Data::from_target(new_id("func", "RSP"), bv(-30)),
bv(33).into(),
)
.unwrap();
// Emulate removing the return pointer from the stack for x64
let stack_pointer_update_def = Term {
tid: Tid::new("stack_pointer_update_def"),
term: Def::Assign {
var: register("RSP"),
value: BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Var(register("RSP"))),
rhs: Box::new(Const(Bitvector::from_i64(8))),
},
},
};
callee_state = context
.update_def(&callee_state, &stack_pointer_update_def)
.unwrap();
// Test update_return
let return_state = context
.update_return(
Some(&callee_state),
Some(&state),
&call,
&return_term("return_target"),
&None,
)
.unwrap();
assert_eq!(return_state.stack_id, new_id("main", "RSP"));
assert_eq!(return_state.caller_stack_ids, BTreeSet::new());
assert_eq!(return_state.memory, state.memory);
assert_eq!(
return_state.get_register(&register("RSP")),
state
.get_register(&register("RSP"))
.bin_op(BinOpType::IntAdd, &Bitvector::from_i64(8).into())
);
state.set_register(&register("callee_saved_reg"), bv(13).into());
state.set_register(&register("other_reg"), bv(14).into());
// Test extern function handling
state.set_register(&register("RBP"), bv(13).into());
state.set_register(&register("RSI"), bv(14).into());
let malloc = call_term("extern_malloc");
let mut state_after_malloc = context.update_call_stub(&state, &malloc).unwrap();
......@@ -238,15 +174,13 @@ fn context_problem_implementation() {
.bin_op(BinOpType::IntAdd, &bv(8).into())
);
assert_eq!(
state_after_malloc.get_register(&register("callee_saved_reg")),
state_after_malloc.get_register(&register("RBP")),
bv(13).into()
);
assert!(state_after_malloc
.get_register(&register("other_reg"))
.is_top());
assert!(state_after_malloc.get_register(&register("RSI")).is_top());
state_after_malloc.set_register(
&register("callee_saved_reg"),
&register("RBP"),
Data::from_target(new_id("call_extern_malloc", "RDX"), bv(0)),
);
let free = call_term("extern_free");
......@@ -256,7 +190,7 @@ fn context_problem_implementation() {
assert!(state_after_free.get_register(&register("RDX")).is_top());
assert_eq!(state_after_free.memory.get_num_objects(), 2);
assert_eq!(
state_after_free.get_register(&register("callee_saved_reg")),
state_after_free.get_register(&register("RBP")),
Data::from_target(new_id("call_extern_malloc", "RDX"), bv(0))
);
......@@ -270,12 +204,10 @@ fn context_problem_implementation() {
.bin_op(BinOpType::IntAdd, &bv(8).into())
);
assert_eq!(
state_after_other_fn.get_register(&register("callee_saved_reg")),
state_after_other_fn.get_register(&register("RBP")),
bv(13).into()
);
assert!(state_after_other_fn
.get_register(&register("other_reg"))
.is_top());
assert!(state_after_other_fn.get_register(&register("RSI")).is_top());
}
#[test]
......@@ -283,11 +215,13 @@ fn update_return() {
use crate::analysis::forward_interprocedural_fixpoint::Context as IpFpContext;
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let (project, config) = mock_project();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let analysis_results = AnalysisResults::mock_from_project(&project);
let context = Context::new(&analysis_results, config, log_sender);
let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
let context = mock_context();
let callee_tid = Tid::new("callee");
let state_before_return = State::from_fn_sig(
context.fn_signatures.get(&callee_tid).unwrap(),
&register("RSP"),
callee_tid.clone(),
);
let mut state_before_return = context
.update_def(
&state_before_return,
......@@ -295,58 +229,42 @@ fn update_return() {
)
.unwrap();
let callsite_id = new_id("call_callee", "RSP");
let callee_created_heap_id = new_id("callee_created_heap", "RAX");
state_before_return.memory.add_abstract_object(
callsite_id.clone(),
bv(0).into(),
ObjectType::Stack,
callee_created_heap_id.clone(),
ByteSize::new(8),
Some(ObjectType::Heap),
);
state_before_return
.caller_stack_ids
.insert(callsite_id.clone());
state_before_return
.ids_known_to_caller
.insert(callsite_id.clone());
let other_callsite_id = new_id("call_callee_other", "RSP");
state_before_return.memory.add_abstract_object(
other_callsite_id.clone(),
bv(0).into(),
ObjectType::Stack,
ByteSize::new(8),
state_before_return.set_register(
&register("RAX"),
Data::from_target(callee_created_heap_id.clone(), bv(16)),
);
state_before_return
.caller_stack_ids
.insert(other_callsite_id.clone());
state_before_return
.ids_known_to_caller
.insert(other_callsite_id.clone());
state_before_return.set_register(
&register("RDX"),
Data::from_target(new_id("call_callee_other", "RSP"), bv(-32)),
Data::from_target(new_id("callee", "RDI"), bv(0)),
);
let state_before_call = State::new(&register("RSP"), Tid::new("original_caller_id"));
let state_before_call = State::new(&register("RSP"), Tid::new("caller"));
let mut state_before_call = context
.update_def(
&state_before_call,
&reg_add_term("RSP", -16, "stack_offset_on_call_adjustment"),
)
.unwrap();
let caller_caller_id = new_id("caller_caller", "RSP");
let param_obj_id = new_id("caller_created_heap", "RAX");
state_before_call.memory.add_abstract_object(
caller_caller_id.clone(),
bv(0).into(),
ObjectType::Stack,
param_obj_id.clone(),
ByteSize::new(8),
Some(ObjectType::Heap),
);
state_before_call.set_register(
&register("RDI"),
Data::from_target(param_obj_id.clone(), bv(0).into()),
);
state_before_call.set_register(
&register("RBX"),
Data::from_target(param_obj_id.clone(), bv(0).into()),
);
state_before_call
.caller_stack_ids
.insert(caller_caller_id.clone());
state_before_call
.ids_known_to_caller
.insert(caller_caller_id.clone());
let state = context
.update_return(
......@@ -358,25 +276,43 @@ fn update_return() {
)
.unwrap();
let mut caller_caller_set = BTreeSet::new();
caller_caller_set.insert(caller_caller_id);
assert_eq!(state.ids_known_to_caller, caller_caller_set.clone());
assert_eq!(state.caller_stack_ids, caller_caller_set.clone());
assert_eq!(state.stack_id, new_id("original_caller_id", "RSP"));
assert!(state_before_return.memory.get_all_object_ids().len() == 3);
assert!(state.memory.get_all_object_ids().len() == 2);
assert_eq!(state.stack_id, new_id("caller", "RSP"));
assert_eq!(
state.get_register(&register("RAX")),
Data::from_target(
callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.unwrap(),
bv(16).into()
)
);
assert_eq!(
state.get_register(&register("RBX")),
Data::from_target(param_obj_id.clone(), bv(0).into())
);
assert_eq!(
state.get_register(&register("RDX")),
Data::from_target(param_obj_id.clone(), bv(0).into())
);
assert_eq!(
state.get_register(&register("RSP")),
Data::from_target(new_id("caller", "RSP"), bv(-8).into())
);
assert!(state.memory.get_all_object_ids().len() == 3);
assert!(state
.memory
.get_all_object_ids()
.get(&new_id("original_caller_id", "RSP"))
.get(&param_obj_id)
.is_some());
assert!(state
.memory
.get_all_object_ids()
.get(&new_id("caller_caller", "RSP"))
.get(
&callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.unwrap()
)
.is_some());
let expected_rsp = Data::from_target(new_id("original_caller_id", "RSP"), bv(-8));
assert_eq!(state.get_register(&register("RSP")), expected_rsp);
}
#[test]
......@@ -418,3 +354,31 @@ fn specialize_conditional() {
let result = context.specialize_conditional(&state, &condition, &block, false);
assert!(result.is_none());
}
#[test]
fn get_unsound_caller_ids() {
let context = mock_context();
let mut callee_id_to_caller_data_map = BTreeMap::new();
callee_id_to_caller_data_map.insert(
new_id("callee", "RDI"),
Data::from_target(new_id("caller", "RAX"), bv(1).into()),
);
callee_id_to_caller_data_map.insert(
new_id("callee", "RSI"),
Data::from_target(new_id("caller", "RAX"), bv(2).into()),
);
let callee_tid = Tid::new("callee");
let callee_state = State::from_fn_sig(
context.fn_signatures.get(&callee_tid).unwrap(),
&register("RSP"),
callee_tid.clone(),
);
let callee_id_to_access_pattern_map = context.create_id_to_access_pattern_map(&callee_state);
let unsound_ids = context.get_unsound_caller_ids(
&callee_id_to_caller_data_map,
&callee_id_to_access_pattern_map,
);
assert_eq!(unsound_ids, BTreeSet::from_iter([new_id("caller", "RAX")]));
}
......@@ -43,19 +43,19 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
}
// check for out-of-bounds memory access
if new_state.contains_out_of_bounds_mem_access(&def.term, self.runtime_memory_image) {
let (warning_name, warning_description) = match def.term {
let (warning_name, warning_description) = match &def.term {
Def::Load { .. } => (
"CWE125",
format!(
"(Out-of-bounds Read) Memory load at {} may be out of bounds",
def.tid.address
def.tid.address,
),
),
Def::Store { .. } => (
"CWE787",
format!(
"(Out-of-bounds Write) Memory write at {} may be out of bounds",
def.tid.address
def.tid.address,
),
),
Def::Assign { .. } => panic!(),
......@@ -101,16 +101,15 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
}
/// Update the state according to the effects of the given `Jmp` term.
/// Right now the state is not changed.
fn update_jump(
&self,
value: &State,
state: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State> {
let new_value = value.clone();
Some(new_value)
let new_state = state.clone();
Some(new_state)
}
/// Update the state according to the effects of the given `Call` term.
......@@ -120,84 +119,22 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
state: &State,
call_term: &Term<Jmp>,
_target_node: &crate::analysis::graph::Node,
calling_convention: &Option<String>,
_calling_convention: &Option<String>,
) -> Option<State> {
if let Jmp::Call {
target: ref callee_tid,
return_: _,
} = call_term.term
{
let callee_stack_id = AbstractIdentifier::new(
callee_tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let new_caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let stack_offset_adjustment = self.get_current_stack_offset(state);
let address_bytesize = self.project.stack_pointer_register.size;
let mut callee_state = state.clone();
// Remove virtual register since they do no longer exist in the callee
callee_state.remove_virtual_register();
// Remove callee-saved register, since the callee should not use their values anyway.
// This should prevent recursive references to all stack frames in the call tree
// since the source for it, the stack frame base pointer, is callee-saved.
if let Some(cconv) = self
.project
.get_specific_calling_convention(calling_convention)
{
// Note that this may lead to analysis errors if the function uses another calling convention.
callee_state.remove_callee_saved_register(cconv);
}
// Set the lower index bound for the caller stack frame.
callee_state
.memory
.set_lower_index_bound(&state.stack_id, &stack_offset_adjustment);
// Replace the caller stack ID with one determined by the call instruction.
// This has to be done *before* adding the new callee stack id
// to avoid confusing caller and callee stack ids in case of recursive calls.
callee_state.replace_abstract_id(
&state.stack_id,
&new_caller_stack_id,
&stack_offset_adjustment,
);
// add a new memory object for the callee stack frame
callee_state.memory.add_abstract_object(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::from(address_bytesize)).into(),
ObjectType::Stack,
address_bytesize,
);
// set the new stack_id
callee_state.stack_id = callee_stack_id.clone();
// Set the stack pointer register to the callee stack id.
// At the beginning of a function this is the only known pointer to the new stack frame.
callee_state.set_register(
&self.project.stack_pointer_register,
Data::from_target(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::from(address_bytesize)).into(),
),
// Check call parameters for dangling pointers
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap();
self.check_parameter_register_for_dangling_pointer(
&mut state.clone(),
call_term,
callee_fn_sig.parameters.keys(),
);
// For MIPS architecture only: Ensure that the t9 register contains the address of the called function
if self.project.cpu_architecture.contains("MIPS") {
let _ = callee_state
.set_mips_link_register(callee_tid, self.project.stack_pointer_register.size);
}
// set the list of caller stack ids to only this caller id
callee_state.caller_stack_ids = BTreeSet::new();
callee_state.caller_stack_ids.insert(new_caller_stack_id);
// Remove non-referenced objects and objects, only the caller knows about, from the state.
callee_state.ids_known_to_caller = BTreeSet::new();
callee_state.remove_unreferenced_objects();
// all remaining objects, except for the callee stack id, are also known to the caller
callee_state.ids_known_to_caller = callee_state.memory.get_all_object_ids();
callee_state.ids_known_to_caller.remove(&callee_stack_id);
Some(callee_state)
// No information flows from caller to the callee in the analysis.
None
} else if let Jmp::CallInd { .. } = call_term.term {
panic!("Indirect call edges not yet supported.")
} else {
......@@ -213,13 +150,8 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
state_before_call: Option<&State>,
call_term: &Term<Jmp>,
return_term: &Term<Jmp>,
calling_convention: &Option<String>,
calling_convention_opt: &Option<String>,
) -> Option<State> {
// TODO: For the long term we may have to replace the IDs representing callers with something
// that identifies the edge of the call and not just the callsite.
// When indirect calls are handled, the callsite alone is not a unique identifier anymore.
// This may lead to confusion if both caller and callee have the same ID in their respective caller_stack_id sets.
let (state_before_call, state_before_return) =
match (state_before_call, state_before_return) {
(Some(state_call), Some(state_return)) => (state_call, state_return),
......@@ -238,13 +170,17 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
(None, None) => return None,
};
let original_caller_stack_id = &state_before_call.stack_id;
let caller_stack_id = AbstractIdentifier::new(
call_term.tid.clone(),
AbstractLocation::from_var(&self.project.stack_pointer_register).unwrap(),
);
let callee_stack_id = &state_before_return.stack_id;
let stack_offset_on_call = self.get_current_stack_offset(state_before_call);
let cconv = match self
.project
.get_specific_calling_convention(calling_convention_opt)
{
Some(cconv) => cconv,
None => {
// If we neither know the specific nor a default calling convention for the function,
// then we treat it as a dead end in the control flow graph.
return None;
}
};
// Detect possible information loss on the stack pointer and report it.
if let Err(err) = self.detect_stack_pointer_information_loss_on_return(state_before_return)
......@@ -255,61 +191,80 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
return None;
}
// Check whether state_before_return actually knows the `caller_stack_id`.
// If not, we are returning from a state that cannot correspond to this callsite.
if !state_before_return
.caller_stack_ids
.contains(&caller_stack_id)
{
return None;
}
let mut state_after_return = state_before_return.clone();
state_after_return.remove_virtual_register();
// Remove the IDs of other callers not corresponding to this call
state_after_return.remove_other_caller_stack_ids(&caller_stack_id);
state_after_return.replace_abstract_id(
&caller_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call.clone()),
);
state_after_return.merge_callee_stack_to_caller_stack(
callee_stack_id,
original_caller_stack_id,
&(-stack_offset_on_call),
// Create a mapping of IDs from the callee to IDs that should be used in the caller.
let id_map = self.create_callee_id_to_caller_data_map(
state_before_call,
state_before_return,
&call_term.tid,
);
state_after_return.stack_id = original_caller_stack_id.clone();
state_after_return.caller_stack_ids = state_before_call.caller_stack_ids.clone();
state_after_return.ids_known_to_caller = state_before_call.ids_known_to_caller.clone();
let callee_id_to_access_pattern_map =
self.create_id_to_access_pattern_map(state_before_return);
// Identify caller IDs for which the callee analysis may be unsound for this callsite.
let unsound_caller_ids =
self.get_unsound_caller_ids(&id_map, &callee_id_to_access_pattern_map);
// TODO: Unsound caller IDs occur too often to log the cases right now.
// We have to investigate the reasons for it (maybe too many parameters on the caller stack?)
// and find better heuristics to prevent them poisoning the analysis soundness.
state_after_return.readd_caller_objects(state_before_call);
let mut state_after_return = state_before_call.clone();
// Adjust register values of state_after_return
state_after_return.remove_non_callee_saved_register(cconv);
self.adjust_stack_register_on_return_from_call(state_before_call, &mut state_after_return);
for return_reg in cconv.get_all_return_register() {
let mut return_value = state_before_return.get_register(return_reg);
return_value.replace_all_ids(&id_map);
if !return_value.is_top() {
state_after_return.set_register(return_reg, return_value);
}
}
// Merge or add memory objects from the callee to the caller state.
for (callee_object_id, callee_object) in state_before_return.memory.iter() {
if *callee_object_id == state_before_return.stack_id {
// The callee stack frame does not exist anymore after return to the caller.
continue;
}
if Some(false)
== callee_id_to_access_pattern_map
.get(callee_object_id)
.map(|access_pattern| access_pattern.is_mutably_dereferenced())
{
// We do not have to modify anything for parameter objects that are only read but not written to.
continue;
}
let mut callee_object = callee_object.clone();
callee_object.replace_ids(&id_map);
if let Some(cconv) = self
.project
.get_specific_calling_convention(calling_convention)
if callee_id_to_access_pattern_map
.get(callee_object_id)
.is_none()
{
// Restore information about callee-saved register from the caller state.
// TODO: Implement some kind of check to ensure that the callee adheres to the given calling convention!
// The current workaround should be reasonably exact for programs written in C,
// but may introduce a lot of errors
// if the compiler often uses other calling conventions for internal function calls.
state_after_return.restore_callee_saved_register(
state_before_call,
cconv,
&self.project.stack_pointer_register,
// Add a callee object that does not correspond to a parameter to the caller or the stack of the callee.
if let Ok(new_object_id) = callee_object_id.with_path_hint(call_term.tid.clone()) {
state_after_return
.memory
.insert(new_object_id, callee_object);
}
} else {
// The callee object is a parameter object.
self.log_debug(
state_after_return.add_param_object_from_callee(
callee_object,
id_map.get(callee_object_id).unwrap(),
),
Some(&call_term.tid),
);
}
// remove non-referenced objects from the state
}
// Additionally assume arbitrary writes for every caller ID where the callee handling might be unsound.
for id in &unsound_caller_ids {
state_after_return
.memory
.assume_arbitrary_writes_to_object(id, &BTreeSet::new());
// TODO: We should specify more possible reference targets.
}
// Cleanup
state_after_return.remove_unreferenced_objects();
// remove the lower index bound of the stack frame
state_after_return.memory.set_lower_index_bound(
original_caller_stack_id,
&IntervalDomain::new_top(self.project.stack_pointer_register.size),
);
Some(state_after_return)
}
......@@ -341,14 +296,14 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
self.check_parameter_register_for_dangling_pointer(
&mut new_state,
call,
extern_symbol,
extern_symbol.parameters.iter(),
);
}
// Clear non-callee-saved registers from the state.
let cconv = self.project.get_calling_convention(extern_symbol);
new_state.clear_non_callee_saved_register(&cconv.callee_saved_register[..]);
// Adjust stack register value (for x86 architecture).
self.adjust_stack_register_on_extern_call(state, &mut new_state);
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
match extern_symbol.name.as_str() {
malloc_like_fn if self.allocation_symbols.iter().any(|x| x == malloc_like_fn) => {
......
......@@ -36,7 +36,6 @@ use crate::prelude::*;
use crate::utils::log::*;
use petgraph::graph::NodeIndex;
use petgraph::visit::IntoNodeReferences;
use petgraph::Direction;
use std::collections::{BTreeMap, HashMap};
mod context;
......@@ -86,7 +85,7 @@ pub struct PointerInference<'a> {
}
impl<'a> PointerInference<'a> {
/// Generate a new pointer inference compuation for a project.
/// Generate a new pointer inference computation for a project.
pub fn new(
analysis_results: &'a AnalysisResults<'a>,
config: Config,
......@@ -95,13 +94,12 @@ impl<'a> PointerInference<'a> {
) -> PointerInference<'a> {
let context = Context::new(analysis_results, config, log_sender.clone());
let project = analysis_results.project;
let function_signatures = analysis_results.function_signatures.unwrap();
let mut entry_sub_to_entry_blocks_map = HashMap::new();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = project.program.term.subs.get(sub_tid) {
let mut sub_to_entry_blocks_map = HashMap::new();
for (sub_tid, sub) in project.program.term.subs.iter() {
if let Some(entry_block) = sub.term.blocks.get(0) {
entry_sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
}
let mut tid_to_graph_indices_map = HashMap::new();
......@@ -110,26 +108,12 @@ impl<'a> PointerInference<'a> {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
let entry_sub_to_entry_node_map: HashMap<Tid, NodeIndex> = entry_sub_to_entry_blocks_map
let sub_to_entry_node_map: HashMap<Tid, NodeIndex> = sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
if let Some(start_node_index) =
tid_to_graph_indices_map.get(&(block_tid, sub_tid.clone()))
{
// We only add entry points that are also control flow graph roots
if context
.graph
.neighbors_directed(*start_node_index, Direction::Incoming)
.next()
.is_none()
{
Some((sub_tid.clone(), *start_node_index))
} else {
None
}
} else {
None
}
tid_to_graph_indices_map
.get(&(block_tid, sub_tid.clone()))
.map(|start_node_index| (sub_tid.clone(), *start_node_index))
})
.collect();
let mut fixpoint_computation =
......@@ -138,22 +122,18 @@ impl<'a> PointerInference<'a> {
let _ = log_sender.send(LogThreadMsg::Log(
LogMessage::new_info(format!(
"Adding {} entry points",
entry_sub_to_entry_node_map.len()
sub_to_entry_node_map.len()
))
.source("Pointer Inference"),
));
}
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
let mut fn_entry_state = if let Some(cconv) = project.get_standard_calling_convention()
{
State::new_with_generic_parameter_objects(
for (sub_tid, start_node_index) in sub_to_entry_node_map.into_iter() {
let fn_signature = function_signatures.get(&sub_tid).unwrap();
let mut fn_entry_state = State::from_fn_sig(
fn_signature,
&project.stack_pointer_register,
sub_tid.clone(),
&cconv.integer_parameter_register,
)
} else {
State::new(&project.stack_pointer_register, sub_tid.clone())
};
);
if project.cpu_architecture.contains("MIPS") {
let _ = fn_entry_state
.set_mips_link_register(&sub_tid, project.stack_pointer_register.size);
......@@ -172,8 +152,23 @@ impl<'a> PointerInference<'a> {
/// Compute the fixpoint of the pointer inference analysis.
/// Has a `max_steps` bound for the fixpoint algorithm to prevent infinite loops.
pub fn compute(&mut self) {
///
/// If `print_stats` is `true` then some extra log messages with statistics about the computation are generated.
pub fn compute(&mut self, print_stats: bool) {
self.computation.compute_with_max_steps(100); // TODO: make max_steps configurable!
if print_stats {
self.count_blocks_with_state();
}
if !self.computation.has_stabilized() {
let worklist_size = self.computation.get_worklist().len();
let _ = self.log_info(format!(
"Fixpoint did not stabilize. Remaining worklist size: {}",
worklist_size,
));
}
if print_stats {
statistics::compute_and_log_mem_access_stats(self);
}
}
/// Print results serialized as YAML to stdout
......@@ -232,79 +227,6 @@ impl<'a> PointerInference<'a> {
self.computation.get_node_value(node_id)
}
/// Add speculative entry points to the fixpoint algorithm state.
///
/// Since indirect jumps and calls are not handled yet (TODO: change that),
/// the analysis may miss a *lot* of code in some cases.
/// To remedy this somewhat,
/// we mark all function starts, that are also roots in the control flow graph
/// and do not have a state assigned to them yet, as additional entry points.
///
/// If `only_cfg_roots` is set to `false`, then all function starts without a state are marked as roots.
fn add_speculative_entry_points(
&mut self,
project: &Project,
only_cfg_roots: bool,
print_stats: bool,
) {
// TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
for sub in project.program.term.subs.values() {
if project.program.term.extern_symbols.contains_key(&sub.tid) {
continue; // We ignore functions marked as extern symbols.
}
if let Some(start_block) = sub.term.blocks.first() {
start_block_to_sub_map.insert(&start_block.tid, sub);
}
}
let graph = self.computation.get_graph();
let mut new_entry_points = Vec::new();
for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block, sub) = node {
if start_block_to_sub_map.get(&block.tid) == Some(sub)
&& self.computation.get_node_value(node_id).is_none()
&& (!only_cfg_roots
|| graph
.neighbors_directed(node_id, Direction::Incoming)
.next()
.is_none())
{
new_entry_points.push(node_id);
}
}
}
if print_stats {
self.log_info(format!(
"Adding {} speculative entry points",
new_entry_points.len()
));
}
for entry in new_entry_points {
let sub_tid = start_block_to_sub_map
[&self.computation.get_graph()[entry].get_block().tid]
.tid
.clone();
let mut fn_entry_state = if let Some(cconv) = project.get_standard_calling_convention()
{
State::new_with_generic_parameter_objects(
&project.stack_pointer_register,
sub_tid.clone(),
&cconv.integer_parameter_register,
)
} else {
State::new(&project.stack_pointer_register, sub_tid.clone())
};
if project.cpu_architecture.contains("MIPS") {
let _ = fn_entry_state
.set_mips_link_register(&sub_tid, project.stack_pointer_register.size);
}
self.computation.set_node_value(
entry,
super::interprocedural_fixpoint_generic::NodeValue::Value(fn_entry_state),
);
}
}
/// Print the number of blocks that have a state associated to them.
/// Intended for debug purposes.
fn count_blocks_with_state(&self) {
......@@ -330,39 +252,6 @@ impl<'a> PointerInference<'a> {
let _ = self.log_collector.send(LogThreadMsg::Log(log_msg));
}
/// Compute the results of the pointer inference fixpoint algorithm.
/// Successively adds more functions as possible entry points
/// to increase code coverage.
pub fn compute_with_speculative_entry_points(&mut self, project: &Project, print_stats: bool) {
self.compute();
if print_stats {
self.count_blocks_with_state();
}
// Now compute again with speculative entry points added
self.add_speculative_entry_points(project, true, print_stats);
self.compute();
if print_stats {
self.count_blocks_with_state();
}
// Now compute again with all missed functions as additional entry points
self.add_speculative_entry_points(project, false, print_stats);
self.compute();
if print_stats {
self.count_blocks_with_state();
}
if !self.computation.has_stabilized() {
let worklist_size = self.computation.get_worklist().len();
let _ = self.log_info(format!(
"Fixpoint did not stabilize. Remaining worklist size: {}",
worklist_size,
));
}
if print_stats {
statistics::compute_and_log_mem_access_stats(self);
}
}
/// Print information on dead ends in the control flow graph for debugging purposes.
/// Ignore returns where there is no known caller stack id.
#[allow(dead_code)]
......@@ -399,14 +288,7 @@ impl<'a> PointerInference<'a> {
return_.is_some()
);
}
Jmp::Return(_) => {
if !state.caller_stack_ids.is_empty() {
println!(
"{}: Return dead end despite known caller ids",
jmp.tid
)
}
}
Jmp::Return(_) => {}
_ => println!(
"{}: Unexpected Jmp dead end: {:?}",
jmp.tid, jmp.term
......@@ -470,7 +352,7 @@ pub fn run<'a>(
print_stats,
);
computation.compute_with_speculative_entry_points(analysis_results.project, print_stats);
computation.compute(print_stats);
if print_debug {
computation.print_compact_json();
......
use super::*;
use std::collections::BTreeMap;
impl AbstractObject {
/// Get all abstract IDs that the object may contain pointers to.
......@@ -18,25 +19,6 @@ impl AbstractObject {
referenced_ids
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offsets.
/// This is needed to adjust stack pointers on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &ValueDomain,
) {
let inner = Arc::make_mut(&mut self.inner);
for elem in inner.memory.values_mut() {
elem.replace_abstract_id(old_id, new_id, offset_adjustment);
}
inner.memory.clear_top_values();
if inner.pointer_targets.get(old_id).is_some() {
inner.pointer_targets.remove(old_id);
inner.pointer_targets.insert(new_id.clone());
}
}
/// Remove the provided IDs from the target lists of all pointers in the memory object.
/// Also remove them from the pointer_targets list.
///
......@@ -56,4 +38,23 @@ impl AbstractObject {
}
inner.memory.clear_top_values(); // In case the previous operation left *Top* values in the memory struct.
}
/// Replace all abstract IDs in `self` with the values given by the replacement map.
/// IDs not contained as keys in the replacement map are replaced by `Top` values.
pub fn replace_ids(&mut self, replacement_map: &BTreeMap<AbstractIdentifier, Data>) {
let inner = Arc::make_mut(&mut self.inner);
for elem in inner.memory.values_mut() {
elem.replace_all_ids(replacement_map);
}
inner.memory.clear_top_values();
let mut new_pointer_targets = BTreeSet::new();
for target in &inner.pointer_targets {
if let Some(replacement_value) = replacement_map.get(target) {
for new_target in replacement_value.referenced_ids() {
new_pointer_targets.insert(new_target.clone());
}
}
}
inner.pointer_targets = new_pointer_targets;
}
}
......@@ -87,12 +87,12 @@ impl std::convert::Into<AbstractObject> for Inner {
impl AbstractObject {
/// Create a new abstract object with given object type and address bytesize.
pub fn new(type_: ObjectType, address_bytesize: ByteSize) -> AbstractObject {
pub fn new(type_: Option<ObjectType>, address_bytesize: ByteSize) -> AbstractObject {
let inner = Inner {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: ObjectState::Alive,
type_: Some(type_),
type_,
memory: MemRegion::new(address_bytesize),
lower_index_bound: BitvectorDomain::Top(address_bytesize),
upper_index_bound: BitvectorDomain::Top(address_bytesize),
......@@ -124,6 +124,14 @@ impl AbstractObject {
inner.upper_index_bound = upper_bound;
}
/// Add an offset to the upper index bound that is still considered to be contained in the abstract object.
pub fn add_to_upper_index_bound(&mut self, offset: i64) {
let inner = Arc::make_mut(&mut self.inner);
let offset =
Bitvector::from_i64(offset).into_resize_signed(inner.upper_index_bound.bytesize());
inner.upper_index_bound = inner.upper_index_bound.clone() + offset.into();
}
/// Get the state of the memory object.
pub fn get_state(&self) -> ObjectState {
self.inner.state
......@@ -193,6 +201,80 @@ impl AbstractObject {
}
}
}
/// Overwrite the values in `self` with those in `other`
/// under the assumption that the zero offset in `other` corresponds to the offset `offset_other` in `self`.
///
/// If `self` is not a unique memory object or if `offset_other` is not a precisely known offset,
/// then the function tries to merge `self` and `other`,
/// since we do not exactly know which values of `self` were overwritten by `other`.
///
/// All values of `self` are marked as possibly overwritten, i.e. `Top`,
/// but they are only deleted if they intersect a non-`Top` value of `other`.
/// This approximates the fact that we currently do not track exactly which indices
/// in `other` were overwritten with a `Top` element and which indices simply were not
/// accessed at all in `other`.
///
/// The upper and lower index bounds of `self` are kept and not overwritten.
pub fn overwrite_with(&mut self, other: &AbstractObject, offset_other: &ValueDomain) {
if let Ok(obj_offset) = offset_other.try_to_offset() {
if self.inner.is_unique {
let inner = Arc::make_mut(&mut self.inner);
// Overwrite values in the memory region of self with those of other.
inner.memory.mark_all_values_as_top();
for (elem_offset, elem) in other.inner.memory.iter() {
inner
.memory
.insert_at_byte_index(elem.clone(), obj_offset + elem_offset);
}
// Merge all other properties with those of other.
inner.is_unique &= other.inner.is_unique;
inner.state = inner.state.merge(other.inner.state);
inner
.pointer_targets
.append(&mut other.inner.pointer_targets.clone());
// TODO: We should log cases where the index bounds are violated by `other`.
} else {
let inner = Arc::make_mut(&mut self.inner);
let mut other = other.clone();
let other_inner = Arc::make_mut(&mut other.inner);
other_inner.memory.add_offset_to_all_indices(obj_offset);
inner.memory = inner.memory.merge(&other_inner.memory);
inner.is_unique &= other.inner.is_unique;
inner.state = inner.state.merge(other.inner.state);
inner
.pointer_targets
.append(&mut other.inner.pointer_targets.clone());
// TODO: We should log cases where the index bounds are violated by `other`.
}
} else {
let inner = Arc::make_mut(&mut self.inner);
inner.memory.mark_all_values_as_top();
inner.is_unique &= other.inner.is_unique;
inner.state = inner.state.merge(other.inner.state);
inner
.pointer_targets
.append(&mut other.inner.pointer_targets.clone());
}
}
/// Add an offset to all values contained in the abstract object.
/// The offset is also added to the lower and upper index bounds.
pub fn add_offset_to_all_indices(&mut self, offset: &ValueDomain) {
let inner = Arc::make_mut(&mut self.inner);
if let Ok(offset) = offset.try_to_offset() {
inner.memory.add_offset_to_all_indices(offset);
let offset =
Bitvector::from_i64(offset).into_resize_signed(inner.lower_index_bound.bytesize());
inner.lower_index_bound = inner.lower_index_bound.clone() + offset.clone().into();
inner.upper_index_bound = inner.upper_index_bound.clone() + offset.into();
} else {
inner.memory = MemRegion::new(inner.memory.get_address_bytesize());
inner.lower_index_bound = inner.lower_index_bound.top();
inner.upper_index_bound = inner.upper_index_bound.top();
}
}
}
impl AbstractDomain for AbstractObject {
......
use crate::intermediate_representation::Variable;
use super::*;
use crate::intermediate_representation::Variable;
use std::collections::BTreeMap;
fn new_abstract_object() -> AbstractObject {
let inner = Inner {
......@@ -75,45 +75,6 @@ fn abstract_object() {
}
#[test]
fn replace_id() {
use std::collections::BTreeMap;
let mut object = new_abstract_object();
let mut target_map = BTreeMap::new();
target_map.insert(new_id("time_1", "RAX"), bv(20));
target_map.insert(new_id("time_234", "RAX"), bv(30));
target_map.insert(new_id("time_1", "RBX"), bv(40));
let pointer = DataDomain::mock_from_target_map(target_map.clone());
object.set_value(pointer, &bv(-15)).unwrap();
assert_eq!(object.get_referenced_ids_overapproximation().len(), 3);
object.replace_abstract_id(
&new_id("time_1", "RAX"),
&new_id("time_234", "RAX"),
&bv(10),
);
target_map.remove(&new_id("time_1", "RAX"));
let modified_pointer = DataDomain::mock_from_target_map(target_map);
assert_eq!(
object.get_value(Bitvector::from_i64(-15), ByteSize::new(8)),
modified_pointer
);
object.replace_abstract_id(
&new_id("time_1", "RBX"),
&new_id("time_234", "RBX"),
&bv(10),
);
let mut target_map = BTreeMap::new();
target_map.insert(new_id("time_234", "RAX"), bv(30));
target_map.insert(new_id("time_234", "RBX"), bv(50));
let modified_pointer = DataDomain::mock_from_target_map(target_map);
assert_eq!(
object.get_value(Bitvector::from_i64(-15), ByteSize::new(8)),
modified_pointer
);
}
#[test]
fn remove_ids() {
use std::collections::BTreeMap;
let mut object = new_abstract_object();
......@@ -145,3 +106,53 @@ fn access_contained_in_bounds() {
assert!(object.access_contained_in_bounds(&IntervalDomain::mock(92, 92), ByteSize::new(8)));
assert!(!object.access_contained_in_bounds(&IntervalDomain::mock(93, 93), ByteSize::new(8)));
}
#[test]
fn overwrite_with() {
let mut object = new_abstract_object();
object.set_value(bv(1).into(), &bv(0).into()).unwrap();
object.set_value(bv(2).into(), &bv(8).into()).unwrap();
let mut other_object = new_abstract_object();
other_object.set_value(bv(3).into(), &bv(0).into()).unwrap();
other_object.set_value(bv(4).into(), &bv(8).into()).unwrap();
object.overwrite_with(&other_object, &bv(8).into());
let mut expected_result = new_abstract_object();
let mut data: Data = bv(1).into();
data.set_contains_top_flag();
expected_result.set_value(data, &bv(0).into()).unwrap();
expected_result
.set_value(bv(3).into(), &bv(8).into())
.unwrap();
expected_result
.set_value(bv(4).into(), &bv(16).into())
.unwrap();
assert_eq!(object, expected_result);
}
#[test]
fn replace_ids() {
let set_value = |object: &mut AbstractObject, tid: &str, register: &str, offset: i64| {
object
.set_value(
Data::from_target(new_id(tid, register), bv(0).into()),
&bv(offset).into(),
)
.unwrap();
};
let mut object = new_abstract_object();
set_value(&mut object, "before", "RAX", 0);
set_value(&mut object, "before", "RBX", 8);
let mut replacement_map = BTreeMap::new();
replacement_map.insert(
new_id("before", "RAX"),
Data::from_target(new_id("after", "RCX"), bv(0).into()),
);
let mut expected_result = new_abstract_object();
set_value(&mut expected_result, "after", "RCX", 0);
object.replace_ids(&replacement_map);
assert_eq!(object, expected_result);
}
......@@ -5,7 +5,15 @@ impl AbstractObject {
/// and with the given size of the accessed value is contained in the bounds of the memory object.
/// If `offset` contains more than one possible index value,
/// then only return `true` if the access is contained in the abstract object for all possible offset values.
///
/// If `offset` is a `Top` value, then the function assumes this to be due to analysis inaccuracies
/// and does not flag them as possible out-of-bounds access.
pub fn access_contained_in_bounds(&self, offset: &ValueDomain, size: ByteSize) -> bool {
if offset.is_top() {
// Currently TOP offsets happen a lot due to inaccuracies in the analysis.
// So for the time being we do not flag them as possible CWEs.
return true;
}
if let Ok(offset_interval) = offset.try_to_interval() {
if let Ok(lower_bound) = self.inner.lower_index_bound.try_to_bitvec() {
if lower_bound.checked_sgt(&offset_interval.start).unwrap() {
......
......@@ -13,7 +13,7 @@ impl AbstractObjectList {
/// even if their state is unknown and `report_unknown_states` is `true`.
pub fn is_dangling_pointer(&self, address: &Data, report_unknown_states: bool) -> bool {
for id in address.referenced_ids() {
if let Some((object, _offset_id)) = self.objects.get(id) {
if let Some(object) = self.objects.get(id) {
match (report_unknown_states, object.get_state()) {
(_, ObjectState::Dangling) => return true,
(true, ObjectState::Unknown) => {
......@@ -34,7 +34,7 @@ impl AbstractObjectList {
/// as flagged.
pub fn mark_dangling_pointer_targets_as_flagged(&mut self, address: &Data) {
for id in address.referenced_ids() {
let (object, _) = self.objects.get_mut(id).unwrap();
let object = self.objects.get_mut(id).unwrap();
if matches!(
object.get_state(),
ObjectState::Unknown | ObjectState::Dangling
......@@ -69,11 +69,8 @@ impl AbstractObjectList {
}
}
for (id, offset) in address.get_relative_values() {
if let Some((object, base_offset)) = self.objects.get(id) {
let adjusted_offset = offset.clone() + base_offset.clone();
if !adjusted_offset.is_top()
&& !object.access_contained_in_bounds(&adjusted_offset, size)
{
if let Some(object) = self.objects.get(id) {
if !object.access_contained_in_bounds(offset, size) {
return true;
}
}
......@@ -86,8 +83,9 @@ impl AbstractObjectList {
///
/// Any `bound` value other than a constant bitvector is interpreted as the memory object not having a lower bound.
pub fn set_lower_index_bound(&mut self, object_id: &AbstractIdentifier, bound: &ValueDomain) {
let (object, base_offset) = self.objects.get_mut(object_id).unwrap();
let bound = (bound.clone() + base_offset.clone())
let object = self.objects.get_mut(object_id).unwrap();
let bound = bound
.clone()
.try_to_bitvec()
.map(|bitvec| bitvec.into())
.unwrap_or_else(|_| BitvectorDomain::new_top(bound.bytesize()));
......@@ -99,8 +97,9 @@ impl AbstractObjectList {
///
/// Any `bound` value other than a constant bitvector is interpreted as the memory object not having an upper bound.
pub fn set_upper_index_bound(&mut self, object_id: &AbstractIdentifier, bound: &ValueDomain) {
let (object, base_offset) = self.objects.get_mut(object_id).unwrap();
let bound = (bound.clone() + base_offset.clone())
let object = self.objects.get_mut(object_id).unwrap();
let bound = bound
.clone()
.try_to_bitvec()
.map(|bitvec| bitvec.into())
.unwrap_or_else(|_| BitvectorDomain::new_top(bound.bytesize()));
......@@ -117,14 +116,17 @@ impl AbstractObjectList {
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
let ids: Vec<AbstractIdentifier> = object_pointer.referenced_ids().cloned().collect();
let mut possible_double_free_ids = Vec::new();
if ids.len() > 1 {
if ids.len() > 1
|| object_pointer.contains_top()
|| object_pointer.get_absolute_value().is_some()
{
for id in ids {
if let Err(error) = self.objects.get_mut(&id).unwrap().0.mark_as_maybe_freed() {
if let Err(error) = self.objects.get_mut(&id).unwrap().mark_as_maybe_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
} else if let Some(id) = ids.get(0) {
if let Err(error) = self.objects.get_mut(id).unwrap().0.mark_as_freed() {
if let Err(error) = self.objects.get_mut(id).unwrap().mark_as_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
......
......@@ -3,36 +3,13 @@
use super::*;
impl AbstractObjectList {
/// Replace one abstract identifier with another one. Adjust offsets of all pointers accordingly.
///
/// **Example:**
/// Assume the `old_id` points to offset 0 in the corresponding memory object and the `new_id` points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in `self.memory.ids` (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &ValueDomain,
) {
let negative_offset = -offset_adjustment.clone();
for (object, _) in self.objects.values_mut() {
object.replace_abstract_id(old_id, new_id, &negative_offset);
}
if let Some((object, old_offset)) = self.objects.remove(old_id) {
let new_offset = old_offset + offset_adjustment.clone();
self.objects.insert(new_id.clone(), (object, new_offset));
}
}
/// Return all IDs that may be referenced by the memory object pointed to by the given ID.
/// The returned set is an overapproximation of the actual referenced IDs.
pub fn get_referenced_ids_overapproximation(
&self,
id: &AbstractIdentifier,
) -> BTreeSet<AbstractIdentifier> {
if let Some((object, _offset)) = self.objects.get(id) {
if let Some(object) = self.objects.get(id) {
object.get_referenced_ids_overapproximation().clone()
} else {
BTreeSet::new()
......@@ -46,10 +23,10 @@ impl AbstractObjectList {
&self,
id: &AbstractIdentifier,
) -> BTreeSet<AbstractIdentifier> {
if let Some((object, _offset)) = self.objects.get(id) {
if let Some(object) = self.objects.get(id) {
object.get_referenced_ids_underapproximation()
} else {
panic!("Abstract ID not associated to an object")
BTreeSet::new()
}
}
}
......@@ -4,9 +4,10 @@
use super::*;
impl AbstractObjectList {
/// Remove the memory object that `object_id` points to from the object list.
pub fn remove_object(&mut self, object_id: &AbstractIdentifier) {
self.objects.remove(object_id);
/// Get a reference to the object corresponding to the given ID.
#[cfg(test)]
pub fn get_object(&self, id: &AbstractIdentifier) -> Option<&AbstractObject> {
self.objects.get(id)
}
/// Add a new abstract object to the object list
......@@ -16,18 +17,28 @@ impl AbstractObjectList {
pub fn add_abstract_object(
&mut self,
object_id: AbstractIdentifier,
initial_offset: ValueDomain,
type_: ObjectType,
address_bytesize: ByteSize,
generic_address_bytesize: ByteSize,
type_: Option<ObjectType>,
) {
let new_object = AbstractObject::new(type_, address_bytesize);
if let Some((object, offset)) = self.objects.get_mut(&object_id) {
let new_object = AbstractObject::new(type_, generic_address_bytesize);
if let Some(object) = self.objects.get_mut(&object_id) {
// If the identifier already exists, we have to assume that more than one object may be referenced by this identifier.
object.mark_as_not_unique();
*object = object.merge(&new_object);
*offset = offset.merge(&initial_offset);
} else {
self.objects.insert(object_id, (new_object, initial_offset));
self.objects.insert(object_id, new_object);
}
}
/// Insert an existing object to the object list.
/// If the object identifier already exists, the object is marked as non-unique
/// and merged with the corresponding object already present in the object list.
pub fn insert(&mut self, id: AbstractIdentifier, object: AbstractObject) {
if let Some(existing_object) = self.objects.get_mut(&id) {
existing_object.mark_as_not_unique();
*existing_object = existing_object.merge(&object);
} else {
self.objects.insert(id, object);
}
}
......@@ -48,32 +59,14 @@ impl AbstractObjectList {
self.objects.keys().cloned().collect()
}
/// Get an iterator over the contained abstract objects in `self`.
pub fn iter(&self) -> std::collections::btree_map::Iter<AbstractIdentifier, AbstractObject> {
self.objects.iter()
}
/// Get the number of objects that are currently tracked.
#[cfg(test)]
pub fn get_num_objects(&self) -> usize {
self.objects.len()
}
/// Append those objects from another object list, whose abstract IDs are not known to self.
pub fn append_unknown_objects(&mut self, other_object_list: &AbstractObjectList) {
for (id, (other_object, other_offset)) in other_object_list.objects.iter() {
if self.objects.get(id) == None {
self.objects
.insert(id.clone(), (other_object.clone(), other_offset.clone()));
}
}
}
/// Remove the provided IDs as targets from all pointers in all objects.
/// Also remove the objects, that these IDs point to.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
for id in ids_to_remove {
if self.objects.get(id).is_some() {
self.objects.remove(id);
}
}
for (object, _) in self.objects.values_mut() {
object.remove_ids(ids_to_remove);
}
}
}
......@@ -17,12 +17,7 @@ mod list_manipulation;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct AbstractObjectList {
/// The abstract objects.
///
/// Each abstract object comes with an offset given as a [`ValueDomain`].
/// This offset determines where the zero offset corresponding to the abstract identifier inside the object is.
/// Note that this offset may be a `Top` element
/// if the exact offset corresponding to the identifier is unknown.
objects: BTreeMap<AbstractIdentifier, (AbstractObject, ValueDomain)>,
objects: BTreeMap<AbstractIdentifier, AbstractObject>,
}
impl AbstractObjectList {
......@@ -35,15 +30,9 @@ impl AbstractObjectList {
address_bytesize: ByteSize,
) -> AbstractObjectList {
let mut objects = BTreeMap::new();
let mut stack_object = AbstractObject::new(ObjectType::Stack, address_bytesize);
let mut stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize);
stack_object.set_upper_index_bound(Bitvector::zero(address_bytesize.into()).into());
objects.insert(
stack_id,
(
stack_object,
Bitvector::zero(apint::BitWidth::from(address_bytesize)).into(),
),
);
objects.insert(stack_id, stack_object);
AbstractObjectList { objects }
}
......@@ -54,9 +43,8 @@ impl AbstractObjectList {
/// If the address does not contain any relative targets an empty value is returned.
pub fn get_value(&self, address: &Data, size: ByteSize) -> Data {
let mut merged_value = Data::new_empty(size);
for (id, offset_pointer) in address.get_relative_values() {
if let Some((object, offset_identifier)) = self.objects.get(id) {
let offset = offset_pointer.clone() + offset_identifier.clone();
for (id, offset) in address.get_relative_values() {
if let Some(object) = self.objects.get(id) {
if let Ok(concrete_offset) = offset.try_to_bitvec() {
let value = object.get_value(concrete_offset, size);
merged_value = merged_value.merge(&value);
......@@ -73,32 +61,35 @@ impl AbstractObjectList {
merged_value
}
/// Get a mutable reference to the object with the given abstract ID.
pub fn get_object_mut(&mut self, id: &AbstractIdentifier) -> Option<&mut AbstractObject> {
self.objects.get_mut(id)
}
/// Set the value at a given address.
///
/// If the address has more than one target,
/// we merge-write the value to all targets.
pub fn set_value(&mut self, pointer: Data, value: Data) -> Result<(), Error> {
let targets = pointer.get_relative_values();
match targets.len() {
0 => Ok(()),
1 => {
let (id, pointer_offset) = targets.iter().next().unwrap();
let (object, id_offset) = self.objects.get_mut(id).unwrap();
let adjusted_offset = pointer_offset.clone() + id_offset.clone();
object.set_value(value, &adjusted_offset)
}
_ => {
// There is more than one object that the pointer may write to.
if let Some((id, offset)) = pointer.get_if_unique_target() {
let object = self
.objects
.get_mut(id)
.ok_or_else(|| anyhow!("Abstract object does not exist."))?;
object.set_value(value, offset)
} else {
// There may be more than one object that the pointer may write to.
// We merge-write to all possible targets
for (id, offset) in targets {
let (object, object_offset) = self.objects.get_mut(id).unwrap();
let adjusted_offset = offset.clone() + object_offset.clone();
object.merge_value(value.clone(), &adjusted_offset);
for (id, offset) in pointer.get_relative_values() {
let object = self
.objects
.get_mut(id)
.ok_or_else(|| anyhow!("Abstract object does not exist."))?;
object.merge_value(value.clone(), offset);
}
Ok(())
}
}
}
/// Assume that arbitrary writes happened to a memory object,
/// including adding pointers to targets contained in `new_possible_reference_targets` to it.
......@@ -113,7 +104,7 @@ impl AbstractObjectList {
object_id: &AbstractIdentifier,
new_possible_reference_targets: &BTreeSet<AbstractIdentifier>,
) {
if let Some((object, _)) = self.objects.get_mut(object_id) {
if let Some(object) = self.objects.get_mut(object_id) {
object.assume_arbitrary_writes(new_possible_reference_targets);
}
}
......@@ -125,7 +116,7 @@ impl AbstractObjectList {
object_id: &AbstractIdentifier,
) -> Result<Option<ObjectType>, ()> {
match self.objects.get(object_id) {
Some((object, _)) => Ok(object.get_object_type()),
Some(object) => Ok(object.get_object_type()),
None => Err(()),
}
}
......@@ -135,7 +126,7 @@ impl AbstractObjectList {
/// Returns an error if the ID is not contained in the object list.
pub fn is_unique_object(&self, object_id: &AbstractIdentifier) -> Result<bool, Error> {
match self.objects.get(object_id) {
Some((object, _)) => Ok(object.is_unique()),
Some(object) => Ok(object.is_unique()),
None => Err(anyhow!("Object ID not contained in object list.")),
}
}
......@@ -151,12 +142,11 @@ impl AbstractDomain for AbstractObjectList {
/// where more than one ID should point to the same object.
fn merge(&self, other: &Self) -> Self {
let mut merged_objects = self.objects.clone();
for (id, (other_object, other_offset)) in other.objects.iter() {
if let Some((object, offset)) = merged_objects.get_mut(id) {
for (id, other_object) in other.objects.iter() {
if let Some(object) = merged_objects.get_mut(id) {
*object = object.merge(other_object);
*offset = offset.merge(other_offset);
} else {
merged_objects.insert(id.clone(), (other_object.clone(), other_offset.clone()));
merged_objects.insert(id.clone(), other_object.clone());
}
}
AbstractObjectList {
......@@ -176,11 +166,8 @@ impl AbstractObjectList {
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut object_map = Map::new();
for (id, (object, offset)) in self.objects.iter() {
object_map.insert(
format!("{} (base offset {})", id, offset),
object.to_json_compact(),
);
for (id, object) in self.objects.iter() {
object_map.insert(format!("{}", id), object.to_json_compact());
}
Value::Object(object_map)
}
......
......@@ -17,7 +17,6 @@ fn new_id(name: &str) -> AbstractIdentifier {
fn abstract_object_list() {
let mut obj_list = AbstractObjectList::from_stack_id(new_id("RSP".into()), ByteSize::new(8));
assert_eq!(obj_list.objects.len(), 1);
assert_eq!(obj_list.objects.values().next().unwrap().1, bv(0));
let pointer = DataDomain::from_target(new_id("RSP".into()), bv(8));
obj_list.set_value(pointer.clone(), bv(42).into()).unwrap();
......@@ -42,9 +41,8 @@ fn abstract_object_list() {
other_obj_list.add_abstract_object(
new_id("RAX".into()),
bv(0),
ObjectType::Heap,
ByteSize::new(8),
Some(ObjectType::Heap),
);
let heap_pointer = DataDomain::from_target(new_id("RAX".into()), bv(8));
other_obj_list
......@@ -94,66 +92,25 @@ fn abstract_object_list() {
new_id("RAX".into())
);
let modified_heap_pointer = DataDomain::from_target(new_id("ID2".into()), bv(8));
other_obj_list.replace_abstract_id(&new_id("RAX".into()), &new_id("ID2".into()), &bv(0));
assert_eq!(
other_obj_list.get_value(&pointer, ByteSize::new(8)),
modified_heap_pointer.clone()
);
assert_eq!(other_obj_list.objects.get(&new_id("RAX".into())), None);
assert!(matches!(
other_obj_list.objects.get(&new_id("ID2".into())),
Some(_)
));
let mut ids_to_keep = BTreeSet::new();
ids_to_keep.insert(new_id("ID2".into()));
ids_to_keep.insert(new_id("RAX".into()));
other_obj_list.remove_unused_objects(&ids_to_keep);
assert_eq!(other_obj_list.objects.len(), 1);
assert_eq!(
other_obj_list.objects.iter().next().unwrap().0,
&new_id("ID2".into())
&new_id("RAX".into())
);
assert_eq!(
other_obj_list
.objects
.values()
.next()
.unwrap()
.0
.get_state(),
other_obj_list.objects.values().next().unwrap().get_state(),
crate::analysis::pointer_inference::object::ObjectState::Alive
);
let modified_heap_pointer = DataDomain::from_target(new_id("RAX".into()), bv(8));
other_obj_list
.mark_mem_object_as_freed(&modified_heap_pointer)
.unwrap();
assert_eq!(
other_obj_list
.objects
.values()
.next()
.unwrap()
.0
.get_state(),
other_obj_list.objects.values().next().unwrap().get_state(),
crate::analysis::pointer_inference::object::ObjectState::Dangling
);
}
#[test]
fn append_unknown_objects_test() {
let mut obj_list = AbstractObjectList::from_stack_id(new_id("stack"), ByteSize::new(8));
let mut other_obj_list = AbstractObjectList::from_stack_id(new_id("stack"), ByteSize::new(8));
other_obj_list.add_abstract_object(
new_id("heap_obj"),
bv(0).into(),
ObjectType::Heap,
ByteSize::new(8),
);
obj_list.append_unknown_objects(&other_obj_list);
assert_eq!(obj_list.objects.len(), 2);
assert!(obj_list.objects.get(&new_id("stack")).is_some());
assert!(obj_list.objects.get(&new_id("heap_obj")).is_some());
}
......@@ -35,25 +35,8 @@ impl State {
value: &Data,
global_memory: &RuntimeMemoryImage,
) -> Result<(), Error> {
// If the address is a unique caller stack address, write to *all* caller stacks.
if let Some(offset) = self.unwrap_offset_if_caller_stack_address(address) {
let caller_addresses: Vec<_> = self
.caller_stack_ids
.iter()
.map(|caller_stack_id| Data::from_target(caller_stack_id.clone(), offset.clone()))
.collect();
let mut result = Ok(());
for address in caller_addresses {
if let Err(err) = self.store_value(&address, &value.clone(), global_memory) {
result = Err(err);
}
}
// Note that this only returns the last error that was detected.
result
} else {
let pointer = self.adjust_pointer_for_read(address);
self.memory.set_value(pointer.clone(), value.clone())?;
if let Some(absolute_address) = pointer.get_absolute_value() {
self.memory.set_value(address.clone(), value.clone())?;
if let Some(absolute_address) = address.get_absolute_value() {
if let Ok(address_to_global_data) = absolute_address.try_to_bitvec() {
match global_memory.is_address_writeable(&address_to_global_data) {
Ok(true) => Ok(()),
......@@ -74,7 +57,6 @@ impl State {
Ok(())
}
}
}
/// Write a value to the address one gets when evaluating the address expression.
pub fn write_to_address(
......@@ -105,7 +87,7 @@ impl State {
size: ByteSize,
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
let address = self.adjust_pointer_for_read(&self.eval(address));
let address = self.eval(address);
let mut result = if let Some(global_address) = address.get_absolute_value() {
if let Ok(address_bitvector) = global_address.try_to_bitvec() {
match global_memory.read(&address_bitvector, size) {
......@@ -160,38 +142,6 @@ impl State {
}
}
/// If the pointer contains a reference to the stack with offset >= 0, replace it with a pointer
/// pointing to all possible caller IDs.
fn adjust_pointer_for_read(&self, address: &Data) -> Data {
let mut adjusted_address = address.clone();
let mut new_targets = BTreeMap::new();
for (id, offset) in address.get_relative_values() {
if *id == self.stack_id {
if let Ok((interval_start, interval_end)) = offset.try_to_offset_interval() {
if interval_start >= 0 && interval_end >= 0 && !self.caller_stack_ids.is_empty()
{
for caller_id in self.caller_stack_ids.iter() {
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that the id of the current stack frame was *not* added.
} else {
new_targets.insert(id.clone(), offset.clone());
}
} else {
for caller_id in self.caller_stack_ids.iter() {
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that we also add the id of the current stack frame
new_targets.insert(id.clone(), offset.clone());
}
} else {
new_targets.insert(id.clone(), offset.clone());
}
}
adjusted_address.set_relative_values(new_targets);
adjusted_address
}
/// Evaluate the value of an expression in the current state
pub fn eval(&self, expression: &Expression) -> Data {
use Expression::*;
......@@ -257,16 +207,11 @@ impl State {
def: &Def,
global_data: &RuntimeMemoryImage,
) -> bool {
let (raw_address, size) = match def {
let (address, size) = match def {
Def::Load { address, var } => (self.eval(address), var.size),
Def::Store { address, value } => (self.eval(address), value.bytesize()),
_ => return false,
};
if self.is_stack_pointer_with_nonnegative_offset(&raw_address) {
// Access to a parameter or the return address of the function
return false;
}
let address = self.adjust_pointer_for_read(&raw_address);
self.memory
.is_out_of_bounds_mem_access(&address, size, global_data)
}
......@@ -279,47 +224,12 @@ impl State {
data: &Data,
global_data: &RuntimeMemoryImage,
) -> bool {
let mut data = self.adjust_pointer_for_read(data);
let mut data = data.clone();
data.set_absolute_value(None); // Do not check absolute_values
self.memory
.is_out_of_bounds_mem_access(&data, ByteSize::new(1), global_data)
}
/// Return `true` if `data` is a pointer to the current stack frame with a constant positive address,
/// i.e. if it accesses a stack parameter (or the return-to address for x86) of the current function.
pub fn is_stack_pointer_with_nonnegative_offset(&self, data: &Data) -> bool {
if let Some((target, offset)) = data.get_if_unique_target() {
if *target == self.stack_id {
if let Ok(offset_val) = offset.try_to_offset() {
if offset_val >= 0 {
return true;
}
}
}
}
false
}
/// If the given address is a positive stack offset and `self.caller_stack_ids` is non-empty,
/// i.e. it is an access to the caller stack, return the offset.
///
/// In all other cases, including the case that the address has more than one target, return `None`.
fn unwrap_offset_if_caller_stack_address(&self, address: &Data) -> Option<ValueDomain> {
if self.caller_stack_ids.is_empty() {
return None;
}
if let Some((id, offset)) = address.get_if_unique_target() {
if self.stack_id == *id {
if let Ok((interval_start, _interval_end)) = offset.try_to_offset_interval() {
if interval_start >= 0 {
return Some(offset.clone());
}
}
}
}
None
}
/// Check whether the given `def` could result in a memory access through a NULL pointer.
///
/// If no NULL pointer dereference is detected then `Ok(false)` is returned.
......
//! Methods of [`State`] for manipulating abstract IDs.
use super::*;
use crate::analysis::pointer_inference::object::AbstractObject;
impl State {
/// Replace all occurences of old_id with new_id and adjust offsets accordingly.
/// This is needed to replace stack/caller IDs on call and return instructions.
///
/// **Example:**
/// Assume the old_id points to offset 0 in the corresponding memory object and the new_id points to offset -32.
/// Then the offset_adjustment is -32.
/// The offset_adjustment gets *added* to the base offset in self.memory.ids (so that it points to offset -32 in the memory object),
/// while it gets *subtracted* from all pointer values (so that they still point to the same spot in the corresponding memory object).
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &ValueDomain,
) {
for register_data in self.register.values_mut() {
register_data.replace_abstract_id(old_id, new_id, &(-offset_adjustment.clone()));
}
self.memory
.replace_abstract_id(old_id, new_id, offset_adjustment);
if &self.stack_id == old_id {
self.stack_id = new_id.clone();
}
if self.caller_stack_ids.get(old_id).is_some() {
self.caller_stack_ids.remove(old_id);
self.caller_stack_ids.insert(new_id.clone());
}
if self.ids_known_to_caller.get(old_id).is_some() {
self.ids_known_to_caller.remove(old_id);
self.ids_known_to_caller.insert(new_id.clone());
}
}
/// Search (recursively) through all memory objects referenced by the given IDs
/// and add all IDs reachable through concrete pointers contained in them to the set of IDs.
///
......@@ -84,23 +53,40 @@ impl State {
ids
}
/// Recursively remove all `caller_stack_ids` not corresponding to the given caller.
pub fn remove_other_caller_stack_ids(&mut self, caller_id: &AbstractIdentifier) {
let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove);
if register_value.is_empty() {
*register_value = register_value.top();
}
}
self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new();
self.caller_stack_ids.insert(caller_id.clone());
self.ids_known_to_caller = self
.ids_known_to_caller
.difference(&ids_to_remove)
.cloned()
.collect();
/// Add the given `param_object` from the callee state to `self`
/// (where `self` represents the state after returning from the callee).
///
/// `param_value_at_call` is the value that the parameter had at the callsite.
/// It is assumed that all IDs contained in the `param_object` are already replaced with values relative to the caller.
///
/// If the `param_object` corresponds to a unique object in `self`
/// then the contents of that object are overwritten with those of `param_object`.
/// Else the contents are only merged with all possible caller objects,
/// since the exact object that corresponds to the callee object is unknown.
pub fn add_param_object_from_callee(
&mut self,
param_object: AbstractObject,
param_value_at_call: &Data,
) -> Result<(), Error> {
if let Some((caller_id, offset)) = param_value_at_call.get_if_unique_target() {
// The corresponding caller object is unique
if let Some(caller_object) = self.memory.get_object_mut(caller_id) {
caller_object.overwrite_with(&param_object, offset);
} else {
return Err(anyhow!("Missing caller memory object"));
}
} else {
// We cannot exactly identify to which caller object the callee object corresponds.
for (caller_id, offset) in param_value_at_call.get_relative_values() {
if let Some(caller_object) = self.memory.get_object_mut(caller_id) {
let mut param_object = param_object.clone();
param_object.add_offset_to_all_indices(offset);
*caller_object = caller_object.merge(&param_object);
} else {
return Err(anyhow!("Missing caller memory object"));
}
}
}
Ok(())
}
}
use super::object_list::AbstractObjectList;
use super::{Data, ValueDomain};
use super::Data;
use crate::abstract_domain::*;
use crate::analysis::function_signature::FunctionSignature;
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::binary::RuntimeMemoryImage;
......@@ -15,28 +16,12 @@ mod value_specialization;
pub struct State {
/// Maps a register variable to the data known about its content.
/// A variable not contained in the map has value `Data::Top(..)`, i.e. nothing is known about its content.
register: BTreeMap<Variable, Data>,
register: DomainMap<Variable, Data, MergeTopStrategy>,
/// The list of all known memory objects.
pub memory: AbstractObjectList,
/// The abstract identifier of the current stack frame.
/// It points to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
pub stack_id: AbstractIdentifier,
/// All known IDs of caller stack frames.
/// Note that these IDs are named after the callsite,
/// i.e. we can distinguish every callsite and for recursive functions the caller and current stack frames have different IDs.
///
/// Writes to the current stack frame with offset >= 0 are written to *all* caller stack frames.
/// Reads to the current stack frame with offset >= 0 are handled as merge-read from all caller stack frames.
pub caller_stack_ids: BTreeSet<AbstractIdentifier>,
/// All IDs of objects that are known to some caller.
/// This is an overapproximation of all object IDs that may have been passed as parameters to the function.
/// The corresponding objects are not allowed to be deleted (even if no pointer to them exists anymore)
/// so that after returning from a call the caller can recover their modified contents
/// and the callee does not accidentally delete this information if it loses all pointers to an object.
///
/// Note that IDs that the callee should not have access to are not included here.
/// For these IDs the caller can assume that the contents of the corresponding memory object were not accessed or modified by the call.
pub ids_known_to_caller: BTreeSet<AbstractIdentifier>,
}
impl State {
......@@ -47,7 +32,7 @@ impl State {
function_tid,
AbstractLocation::from_var(stack_register).unwrap(),
);
let mut register: BTreeMap<Variable, Data> = BTreeMap::new();
let mut register = DomainMap::from(BTreeMap::new());
register.insert(
stack_register.clone(),
Data::from_target(
......@@ -59,39 +44,57 @@ impl State {
register,
memory: AbstractObjectList::from_stack_id(stack_id.clone(), stack_register.size),
stack_id,
caller_stack_ids: BTreeSet::new(),
ids_known_to_caller: BTreeSet::new(),
}
}
/// Create a new state that contains one memory object corresponding to the stack
/// and one memory object for each provided parameter register.
/// Create a new state from a function signature.
///
/// This function can be used to approximate states of entry points
/// where the number and types of its parameters is unknown.
/// Note that this may also cause analysis errors,
/// e.g. if two parameters point to the same memory object instead of different ones.
pub fn new_with_generic_parameter_objects(
/// The created state contains one memory object for the stack frame of the function
/// and one memory object for each parameter that is dereferenced by the function
/// (according to the function signature).
pub fn from_fn_sig(
fn_sig: &FunctionSignature,
stack_register: &Variable,
function_tid: Tid,
params: &[Variable],
) -> State {
let mock_global_memory = RuntimeMemoryImage::empty(true);
let mut state = State::new(stack_register, function_tid.clone());
for param in params {
let param_id = AbstractIdentifier::new(
function_tid.clone(),
AbstractLocation::from_var(param).unwrap(),
);
state.memory.add_abstract_object(
param_id.clone(),
Bitvector::zero(stack_register.size.into()).into(),
super::object::ObjectType::Heap,
stack_register.size,
);
state.set_register(
param,
DataDomain::from_target(param_id, Bitvector::zero(param.size.into()).into()),
)
// Adjust the upper bound of the stack frame to include all stack parameters
// (and the return address at stack offset 0 for x86).
let stack_upper_bound: i64 = match stack_register.name.as_str() {
"ESP" => 4,
"RSP" => 8,
_ => 0,
};
let stack_upper_bound =
std::cmp::max(stack_upper_bound, fn_sig.get_stack_params_total_size());
let stack_obj = state.memory.get_object_mut(&state.stack_id).unwrap();
stack_obj.add_to_upper_index_bound(stack_upper_bound);
// Set parameter values and create parameter memory objects.
for (arg, access_pattern) in &fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(&function_tid, arg);
match arg {
Arg::Register {
expr: Expression::Var(var),
..
} => state.set_register(
var,
Data::from_target(param_id.clone(), Bitvector::zero(var.size.into()).into()),
),
Arg::Register { .. } => continue, // Parameters in floating point registers are currently ignored.
Arg::Stack { address, size, .. } => {
let param_data =
Data::from_target(param_id.clone(), Bitvector::zero((*size).into()).into());
state
.write_to_address(address, &param_data, &mock_global_memory)
.unwrap();
}
}
if access_pattern.is_dereferenced() {
state
.memory
.add_abstract_object(param_id, stack_register.size, None);
}
}
state
}
......@@ -171,39 +174,22 @@ impl State {
/// The function uses an underapproximation of all possible pointer targets contained in a memory object.
/// This keeps the number of tracked objects reasonably small.
pub fn remove_unreferenced_objects(&mut self) {
// get all referenced IDs
// get all referenced IDs from registers
let mut referenced_ids = BTreeSet::new();
for (_reg_name, data) in self.register.iter() {
referenced_ids.extend(data.referenced_ids().cloned());
}
referenced_ids.insert(self.stack_id.clone());
referenced_ids.append(&mut self.caller_stack_ids.clone());
referenced_ids.append(&mut self.ids_known_to_caller.clone());
// get all IDs of parameter objects and the current stack frame
for id in self.memory.get_all_object_ids() {
if id.get_tid() == self.stack_id.get_tid() && id.get_path_hints().is_empty() {
referenced_ids.insert(id);
}
}
referenced_ids = self.add_directly_reachable_ids_to_id_set(referenced_ids);
// remove unreferenced objects
self.memory.remove_unused_objects(&referenced_ids);
}
/// Merge the callee stack with the caller stack.
///
/// This deletes the memory object corresponding to the callee_id
/// and updates all other references pointing to the callee_id to point to the caller_id.
/// The offset adjustment is handled as in `replace_abstract_id`.
///
/// Note that right now the content of the callee memory object is *not* merged into the caller memory object.
/// In general this is the correct behaviour
/// as the content below the stack pointer should be considered uninitialized memory after returning to the caller.
/// However, an aggressively optimizing compiler or an unknown calling convention may deviate from this.
pub fn merge_callee_stack_to_caller_stack(
&mut self,
callee_id: &AbstractIdentifier,
caller_id: &AbstractIdentifier,
offset_adjustment: &ValueDomain,
) {
self.memory.remove_object(callee_id);
self.replace_abstract_id(callee_id, caller_id, offset_adjustment);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
......@@ -216,56 +202,20 @@ impl State {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all virtual register from the state.
/// This should only be done in cases where it is known that no virtual registers can be alive.
///
/// Example: At the start of a basic block no virtual registers should be alive.
pub fn remove_virtual_register(&mut self) {
self.register = self
.register
.clone()
.into_iter()
.filter(|(register, _value)| !register.is_temp)
.collect();
/// Remove all knowledge about the contents of non-callee-saved registers from the state.
pub fn remove_non_callee_saved_register(&mut self, cconv: &CallingConvention) {
let mut callee_saved_register = BTreeMap::new();
for var in &cconv.callee_saved_register {
if let Some(value) = self.register.get(var) {
callee_saved_register.insert(var.clone(), value.clone());
}
/// Add those objects from the `caller_state` to `self`, that are not known to `self`.
///
/// Since self does not know these objects, we assume that the current function could not have accessed
/// them in any way during execution.
/// This means they are unchanged from the moment of the call until the return from the call,
/// thus we can simply copy their object-state from the moment of the call.
pub fn readd_caller_objects(&mut self, caller_state: &State) {
self.memory.append_unknown_objects(&caller_state.memory);
}
/// Restore the content of callee-saved registers from the caller state
/// with the exception of the stack register.
///
/// This function does not check what the callee state currently contains in these registers.
/// If the callee does not adhere to the given calling convention, this may introduce analysis errors!
/// It will also mask cases
/// where a callee-saved register was incorrectly modified (e.g. because of a bug in the callee).
pub fn restore_callee_saved_register(
&mut self,
caller_state: &State,
cconv: &CallingConvention,
stack_register: &Variable,
) {
for register in cconv
.callee_saved_register
.iter()
.filter(|reg| *reg != stack_register)
{
self.set_register(register, caller_state.get_register(register));
}
self.register = callee_saved_register.into();
}
/// Remove all knowledge about the contents of callee-saved registers from the state.
pub fn remove_callee_saved_register(&mut self, cconv: &CallingConvention) {
for register in &cconv.callee_saved_register {
self.register.remove(register);
}
/// Get the Tid of the function that this state belongs to.
pub fn get_fn_tid(&self) -> &Tid {
self.stack_id.get_tid()
}
}
......@@ -273,31 +223,11 @@ impl AbstractDomain for State {
/// Merge two states
fn merge(&self, other: &Self) -> Self {
assert_eq!(self.stack_id, other.stack_id);
let mut merged_register = BTreeMap::new();
for (register, other_value) in other.register.iter() {
if let Some(value) = self.register.get(register) {
let merged_value = value.merge(other_value);
if !merged_value.is_top() {
// We only have to keep non-*Top* elements.
merged_register.insert(register.clone(), merged_value);
}
}
}
let merged_memory_objects = self.memory.merge(&other.memory);
State {
register: merged_register,
register: self.register.merge(&other.register),
memory: merged_memory_objects,
stack_id: self.stack_id.clone(),
caller_stack_ids: self
.caller_stack_ids
.union(&other.caller_stack_ids)
.cloned()
.collect(),
ids_known_to_caller: self
.ids_known_to_caller
.union(&other.ids_known_to_caller)
.cloned()
.collect(),
}
}
......@@ -325,24 +255,6 @@ impl State {
"stack_id".into(),
Value::String(format!("{}", self.stack_id)),
);
state_map.insert(
"caller_stack_ids".into(),
Value::Array(
self.caller_stack_ids
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
state_map.insert(
"ids_known_to_caller".into(),
Value::Array(
self.ids_known_to_caller
.iter()
.map(|id| Value::String(format!("{}", id)))
.collect(),
),
);
Value::Object(state_map)
}
......
use super::super::ValueDomain;
use super::*;
use crate::analysis::pointer_inference::object::*;
use crate::utils::binary::RuntimeMemoryImage;
use Expression::*;
fn bv(value: i64) -> ValueDomain {
ValueDomain::from(Bitvector::from_i64(value))
......@@ -34,8 +37,6 @@ fn reg_sub(name: &str, value: i64) -> Expression {
#[test]
fn state() {
use crate::analysis::pointer_inference::object::*;
use Expression::*;
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("time0", "RSP");
......@@ -60,85 +61,31 @@ fn state() {
other_state.register.insert(register("RBX"), bv(35).into());
let merged_state = state.merge(&other_state);
assert_eq!(merged_state.register[&register("RAX")], bv(42).into());
assert_eq!(merged_state.register.get(&register("RBX")), None);
assert!(merged_state
.load_value(&Var(register("RSP")), ByteSize::new(8), &global_memory)
.unwrap()
.contains_top());
// Test pointer adjustment on reads
state.memory.add_abstract_object(
new_id("time0", "caller"),
bv(0),
ObjectType::Stack,
ByteSize::new(8),
);
state.caller_stack_ids.insert(new_id("time0", "caller"));
state
.store_value(&stack_addr, &bv(15).into(), &global_memory)
.unwrap();
assert_eq!(
state.memory.get_value(
&Data::from_target(new_id("time0", "caller"), bv(8)),
ByteSize::new(8)
),
bv(15).into()
);
assert_eq!(
state
.load_value(&Var(register("RSP")), ByteSize::new(8), &global_memory)
.unwrap(),
bv(15).into()
);
// Test replace_abstract_id
let pointer = Data::from_target(stack_id.clone(), bv(-16));
state.register.insert(register("RSP"), pointer.clone());
state
.store_value(&pointer, &bv(7).into(), &global_memory)
.unwrap();
assert_eq!(
state
.load_value(&Var(register("RSP")), ByteSize::new(8), &global_memory)
merged_state
.get_register(&register("RBX"))
.get_absolute_value()
.unwrap(),
bv(7).into()
&bv(35).into()
);
state.replace_abstract_id(&stack_id, &new_id("time0", "callee"), &bv(-8));
assert_eq!(
state
assert!(merged_state.get_register(&register("RBX")).contains_top());
assert!(merged_state
.load_value(&Var(register("RSP")), ByteSize::new(8), &global_memory)
.unwrap(),
bv(7).into()
);
assert_eq!(
state.memory.get_value(
&Data::from_target(new_id("time0", "callee"), bv(-8)),
ByteSize::new(8)
),
bv(7).into()
);
assert_eq!(
state.memory.get_value(
&Data::from_target(new_id("time0", "callee"), bv(-16)),
ByteSize::new(8)
),
Data::new_top(ByteSize::new(8))
);
.unwrap()
.contains_top());
state.memory.add_abstract_object(
new_id("time0", "heap_obj"),
bv(0),
ObjectType::Heap,
new_id("heap_time", "heap_obj"),
ByteSize::new(8),
Some(ObjectType::Heap),
);
assert_eq!(state.memory.get_num_objects(), 3);
state.remove_unreferenced_objects();
assert_eq!(state.memory.get_num_objects(), 2);
state.remove_unreferenced_objects();
assert_eq!(state.memory.get_num_objects(), 1);
}
#[test]
fn handle_store() {
use Expression::*;
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0"));
let stack_id = new_id("time0", "RSP");
......@@ -202,61 +149,7 @@ fn handle_store() {
}
#[test]
fn handle_caller_stack_stores() {
use super::super::object::ObjectType;
use Expression::*;
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0"));
state.memory.add_abstract_object(
new_id("caller1", "RSP"),
bv(0),
ObjectType::Stack,
ByteSize::new(8),
);
state.memory.add_abstract_object(
new_id("caller2", "RSP"),
bv(0),
ObjectType::Stack,
ByteSize::new(8),
);
state.caller_stack_ids.insert(new_id("caller1", "RSP"));
state.caller_stack_ids.insert(new_id("caller2", "RSP"));
// store something on the caller stack
state
.handle_store(
&reg_add("RSP", 8),
&Const(Bitvector::from_i64(42)),
&global_memory,
)
.unwrap();
// check that it was saved in all caller objects and not on the callee stack object
let pointer = Data::from_target(new_id("time0", "RSP"), bv(8));
assert_eq!(
state.memory.get_value(&pointer, ByteSize::new(8)),
Data::new_top(ByteSize::new(8))
);
let pointer = Data::from_target(new_id("caller1", "RSP"), bv(8));
assert_eq!(
state.memory.get_value(&pointer, ByteSize::new(8)),
bv(42).into()
);
let pointer = Data::from_target(new_id("caller2", "RSP"), bv(8));
assert_eq!(
state.memory.get_value(&pointer, ByteSize::new(8)),
bv(42).into()
);
// accessing through a positive stack register offset should yield the value of the caller stacks
assert_eq!(
state
.load_value(&reg_add("RSP", 8), ByteSize::new(8), &global_memory)
.unwrap(),
bv(42).into()
);
}
#[test]
fn clear_parameters_on_the_stack_on_extern_calls() {
use Expression::*;
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0"));
state.register.insert(
......@@ -305,59 +198,6 @@ fn clear_parameters_on_the_stack_on_extern_calls() {
}
#[test]
fn merge_callee_stack_to_caller_stack() {
use super::super::object::ObjectType;
let mut state = State::new(&register("RSP"), Tid::new("callee"));
state.memory.add_abstract_object(
new_id("callsite", "RSP"),
bv(52),
ObjectType::Stack,
ByteSize::new(8),
);
state.caller_stack_ids.insert(new_id("callsite", "RSP"));
// check the state before merging to the caller stack
assert_eq!(
state.register.get(&register("RSP")).unwrap(),
&Data::from_target(new_id("callee", "RSP"), bv(0))
);
assert_eq!(state.memory.get_all_object_ids().len(), 2);
// check state after merging to the caller stack
state.merge_callee_stack_to_caller_stack(
&new_id("callee", "RSP"),
&new_id("callsite", "RSP"),
&bv(-52),
);
assert_eq!(
state.register.get(&register("RSP")).unwrap(),
&Data::from_target(new_id("callsite", "RSP"), bv(52))
);
assert_eq!(state.memory.get_all_object_ids().len(), 1);
}
#[test]
fn remove_and_restore_callee_saved_register() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
let value: Data = Bitvector::from_u64(42).into();
let cconv = CallingConvention::mock_x64();
state.set_register(&register("RBP"), value.clone());
state.set_register(&register("RAX"), value.clone());
let mut callee_state = state.clone();
callee_state.remove_callee_saved_register(&cconv);
assert_eq!(
callee_state.get_register(&register("RBP")),
Data::new_top(ByteSize::new(8))
);
assert_eq!(callee_state.get_register(&register("RAX")), value.clone());
let other_value: Data = Bitvector::from_u64(13).into();
callee_state.set_register(&register("RAX"), other_value.clone());
callee_state.restore_callee_saved_register(&state, &cconv, &register("RSP"));
assert_eq!(callee_state.get_register(&register("RBP")), value);
assert_eq!(callee_state.get_register(&register("RAX")), other_value);
}
#[test]
fn reachable_ids_under_and_overapproximation() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
......@@ -366,12 +206,9 @@ fn reachable_ids_under_and_overapproximation() {
let stack_address: Data = Data::from_target(stack_id.clone(), Bitvector::from_i64(-8).into());
let heap_address: Data = Data::from_target(heap_id.clone(), Bitvector::from_i64(0).into());
// Add the heap object to the state, so that it can be recursively searched.
state.memory.add_abstract_object(
heap_id.clone(),
Bitvector::from_i64(0).into(),
crate::analysis::pointer_inference::object::ObjectType::Heap,
ByteSize::new(8),
);
state
.memory
.add_abstract_object(heap_id.clone(), ByteSize::new(8), Some(ObjectType::Heap));
state
.store_value(&stack_address, &heap_address, &global_memory)
......@@ -998,26 +835,14 @@ fn specialize_by_unsigned_comparison_op() {
}
#[test]
fn stack_pointer_with_nonnegative_offset() {
let state = State::new(&register("RSP"), Tid::new("func_tid"));
let pointer = Data::from_target(state.stack_id.clone(), Bitvector::from_i64(-1).into());
assert!(!state.is_stack_pointer_with_nonnegative_offset(&pointer));
let pointer = Data::from_target(state.stack_id.clone(), Bitvector::from_i64(5).into());
assert!(state.is_stack_pointer_with_nonnegative_offset(&pointer));
let pointer = Data::from_target(state.stack_id.clone(), IntervalDomain::mock(2, 3));
assert!(!state.is_stack_pointer_with_nonnegative_offset(&pointer)); // The offset is not a constant
}
#[test]
fn out_of_bounds_access_recognition() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
let global_data = RuntimeMemoryImage::mock();
let heap_obj_id = new_id("heap_malloc", "RAX");
state.memory.add_abstract_object(
heap_obj_id.clone(),
Bitvector::from_u64(0).into(),
crate::analysis::pointer_inference::object::ObjectType::Heap,
ByteSize::new(8),
Some(ObjectType::Heap),
);
state
.memory
......@@ -1050,7 +875,7 @@ fn out_of_bounds_access_recognition() {
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(1).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(state.stack_id.clone(), Bitvector::from_u64(8).into());
let address = Data::from_target(state.stack_id.clone(), Bitvector::from_i64(-8).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
}
......@@ -1119,11 +944,81 @@ fn test_check_def_for_null_dereferences() {
}
#[test]
fn test_new_with_generic_parameter_objects() {
let params = vec![Variable::mock("param1", 8), Variable::mock("param2", 8)];
let state =
State::new_with_generic_parameter_objects(&register("RSP"), Tid::new("func_tid"), &params);
assert_eq!(state.memory.get_num_objects(), 3);
assert!(!state.get_register(&Variable::mock("param1", 8)).is_top());
assert!(!state.get_register(&Variable::mock("param1", 8)).is_top());
fn from_fn_sig() {
let fn_sig = FunctionSignature::mock_x64();
let state = State::from_fn_sig(&fn_sig, &Variable::mock("RSP", 8), Tid::new("func"));
assert_eq!(state.memory.get_num_objects(), 2);
assert_eq!(
*state.memory.get_object(&new_id("func", "RSI")).unwrap(),
AbstractObject::new(None, ByteSize::new(8))
);
assert_eq!(
state.get_register(&Variable::mock("RSP", 8)),
Data::from_target(new_id("func", "RSP"), bv(0).into())
);
assert_eq!(
state.get_register(&Variable::mock("RDI", 8)),
Data::from_target(new_id("func", "RDI"), bv(0).into())
);
assert_eq!(
state.get_register(&Variable::mock("RSI", 8)),
Data::from_target(new_id("func", "RSI"), bv(0).into())
);
}
#[test]
fn add_param_object_from_callee() {
let global_memory = RuntimeMemoryImage::empty(true);
let mut generic_state = State::new(&Variable::mock("RSP", 8), Tid::new("func"));
generic_state
.write_to_address(
&Expression::Var(Variable::mock("RSP", 8)).plus_const(-8),
&bv(1).into(),
&global_memory,
)
.unwrap();
let mut param_object = AbstractObject::new(None, ByteSize::new(8));
param_object.set_value(bv(2).into(), &bv(0).into()).unwrap();
let mut param_value = Data::from_target(new_id("func", "RSP"), bv(-16).into());
// Testcase 1: param object is unique
let mut state = generic_state.clone();
state
.add_param_object_from_callee(param_object.clone(), &param_value)
.unwrap();
let value = state
.load_value(
&Expression::Var(Variable::mock("RSP", 8)).plus_const(-8),
ByteSize::new(8),
&global_memory,
)
.unwrap();
assert_eq!(value.get_absolute_value().unwrap(), &bv(1).into());
assert!(value.contains_top());
let value = state
.load_value(
&Expression::Var(Variable::mock("RSP", 8)).plus_const(-16),
ByteSize::new(8),
&global_memory,
)
.unwrap();
assert_eq!(value.get_absolute_value().unwrap(), &bv(2).into());
assert!(!value.contains_top());
// Testcase 2: param object is not unique
let mut state = generic_state.clone();
param_value.set_contains_top_flag();
state
.add_param_object_from_callee(param_object.clone(), &param_value)
.unwrap();
let value = state
.load_value(
&Expression::Var(Variable::mock("RSP", 8)).plus_const(-16),
ByteSize::new(8),
&global_memory,
)
.unwrap();
assert_eq!(value.get_absolute_value().unwrap(), &bv(2).into());
assert!(value.contains_top());
}
......@@ -86,7 +86,7 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
domain_input_string: T,
) {
for (target, offset) in pointer.iter() {
if pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target {
if pi_state.stack_id == *target {
if let Ok(offset_value) = offset.try_to_offset() {
state.add_new_stack_offset_to_string_entry(
offset_value,
......@@ -115,7 +115,7 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
let mut domains: Vec<T> = Vec::new();
for (target, offset) in pointer.iter() {
// Check the stack offset map if the target points to a stack position.
if pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target {
if pi_state.stack_id == *target {
if let Ok(offset_value) = offset.try_to_offset() {
if let Some(domain) = state.get_stack_offset_to_string_map().get(&offset_value)
{
......
......@@ -220,7 +220,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -283,7 +283,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -308,7 +308,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -336,7 +336,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -359,7 +359,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -401,7 +401,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -447,7 +447,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let input_target = DataDomain::from(Bitvector::from_i32(0x7000));
......
......@@ -207,7 +207,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -290,7 +290,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -366,7 +366,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -439,7 +439,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -481,7 +481,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -502,7 +502,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -523,7 +523,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -576,7 +576,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -658,7 +658,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -716,7 +716,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......
......@@ -19,7 +19,7 @@ fn test_handle_sprintf_and_snprintf_calls() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -78,7 +78,7 @@ fn test_parse_format_string_and_add_new_string_domain() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -118,7 +118,7 @@ fn test_create_string_domain_for_sprintf_snprintf() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -151,7 +151,7 @@ fn test_create_string_domain_using_data_type_approximations() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -194,7 +194,7 @@ fn test_create_string_domain_using_constants_and_sub_domains() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -358,7 +358,7 @@ fn test_fetch_constant_and_domain_for_format_specifier() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -539,7 +539,7 @@ fn test_fetch_subdomains_if_available() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -597,7 +597,7 @@ fn test_fetch_constant_domain_if_available() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
let string_data: DataDomain<IntervalDomain> = DataDomain::from(Bitvector::from_i32(0x7000));
......
......@@ -119,7 +119,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -153,7 +153,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -204,7 +204,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -226,7 +226,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -253,7 +253,7 @@ mod tests {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......
......@@ -115,7 +115,7 @@ fn test_handle_generic_symbol_calls() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -139,7 +139,7 @@ fn test_handle_unknown_symbol_calls() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -167,7 +167,7 @@ fn test_add_new_string_abstract_domain() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -222,7 +222,7 @@ fn test_merge_domains_from_multiple_pointer_targets() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -304,7 +304,7 @@ fn test_handle_sprintf_and_snprintf_calls_known_format_string() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -339,7 +339,7 @@ fn test_handle_sprintf_and_snprintf_calls_unknown_format_string() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -374,7 +374,7 @@ fn test_insert_constant_char_into_format_string() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -406,7 +406,7 @@ fn test_insert_constant_string_into_format_string() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -435,7 +435,7 @@ fn test_handle_free() {
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......
......@@ -25,7 +25,7 @@ fn test_update_def() {
);
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
setup.context.block_first_def_set = HashSet::new();
......@@ -138,7 +138,7 @@ fn test_update_jump() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -163,7 +163,7 @@ fn test_update_return() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......@@ -219,7 +219,7 @@ fn test_update_call_stub() {
"func",
);
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let setup: Setup<CharacterInclusionDomain> = Setup::new(&pi_results);
......
......@@ -626,7 +626,7 @@ impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> State<T>
/// Checks whether a target refers to the Stack.
pub fn is_stack_pointer(pi_state: &PointerInferenceState, target: &AbstractIdentifier) -> bool {
pi_state.caller_stack_ids.contains(target) || pi_state.stack_id == *target
pi_state.stack_id == *target
}
}
......
......@@ -329,42 +329,36 @@ fn test_add_pointer_to_variable_maps_if_tracked() {
#[test]
fn test_pointer_targets_partially_tracked() {
let sp_reg = Variable::mock("sp", 4);
let mut mock_state =
State::<CharacterInclusionDomain>::mock_with_default_pi_state(Sub::mock("func"));
let pi_state = mock_state.get_pointer_inference_state().unwrap().clone();
let stack_id = AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
);
let caller_stack_id = AbstractIdentifier::new(
Tid::new("caller_func"),
AbstractLocation::from_var(&sp_reg).unwrap(),
let heap_id = AbstractIdentifier::new(
Tid::new("heap"),
AbstractLocation::from_var(&Variable::mock("r0", 4)).unwrap(),
);
let stack_id = pi_state.stack_id.clone();
let mut string_pointer = DataDomain::from_target(
stack_id,
heap_id.clone(),
IntervalDomain::new(Bitvector::from_i32(0), Bitvector::from_i32(0)),
);
string_pointer.insert_relative_value(
caller_stack_id.clone(),
stack_id.clone(),
IntervalDomain::new(Bitvector::from_i32(-8), Bitvector::from_i32(-8)),
);
let mut pi_state = mock_state.get_pointer_inference_state().unwrap().clone();
pi_state.caller_stack_ids.insert(caller_stack_id);
mock_state.set_pointer_inference_state(Some(pi_state.clone()));
assert!(!mock_state.pointer_targets_partially_tracked(&pi_state, &string_pointer));
mock_state
.stack_offset_to_string_map
.insert(0, CharacterInclusionDomain::Top);
.insert(-8, CharacterInclusionDomain::Top);
assert!(mock_state.pointer_targets_partially_tracked(&pi_state, &string_pointer));
assert!(mock_state.stack_offset_to_string_map.contains_key(&(-8)));
assert!(mock_state.heap_to_string_map.contains_key(&heap_id));
}
#[test]
......
......@@ -223,7 +223,7 @@ pub mod tests {
let project = mock_project();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project);
pi_results.compute();
pi_results.compute(false);
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
// Get the BlkEnd node with the function call.
......
......@@ -58,42 +58,41 @@ impl Arg {
/// If the argument is a stack argument,
/// return its offset relative to the current stack register value.
/// Return an error for register arguments or if the offset could not be computed.
pub fn eval_stack_offset(&self, stack_register: &Variable) -> Result<Bitvector, Error> {
pub fn eval_stack_offset(&self) -> Result<Bitvector, Error> {
let expression = match self {
Arg::Register { .. } => return Err(anyhow!("The argument is not a stack argument.")),
Arg::Stack { address, .. } => address,
};
Self::eval_stack_offset_expression(expression, stack_register)
Self::eval_stack_offset_expression(expression)
}
/// If the given expression computes a constant offset to the given stack register,
/// then return the offset.
/// Else return an error.
fn eval_stack_offset_expression(
expression: &Expression,
stack_register: &Variable,
) -> Result<Bitvector, Error> {
fn eval_stack_offset_expression(expression: &Expression) -> Result<Bitvector, Error> {
match expression {
Expression::Var(var) => {
if var == stack_register {
Ok(Bitvector::zero(var.size.into()))
} else {
Err(anyhow!("Input register is not the stack register"))
}
}
Expression::Var(var) => Ok(Bitvector::zero(var.size.into())),
Expression::Const(bitvec) => Ok(bitvec.clone()),
Expression::BinOp { op, lhs, rhs } => {
let lhs = Self::eval_stack_offset_expression(lhs, stack_register)?;
let rhs = Self::eval_stack_offset_expression(rhs, stack_register)?;
let lhs = Self::eval_stack_offset_expression(lhs)?;
let rhs = Self::eval_stack_offset_expression(rhs)?;
lhs.bin_op(*op, &rhs)
}
Expression::UnOp { op, arg } => {
let arg = Self::eval_stack_offset_expression(arg, stack_register)?;
let arg = Self::eval_stack_offset_expression(arg)?;
arg.un_op(*op)
}
_ => Err(anyhow!("Expression type not supported for argument values")),
}
}
/// Return the bytesize of the argument.
pub fn bytesize(&self) -> ByteSize {
match self {
Arg::Register { expr, .. } => expr.bytesize(),
Arg::Stack { size, .. } => *size,
}
}
}
/// An extern symbol represents a funtion that is dynamically linked from another binary.
......
......@@ -145,6 +145,15 @@ impl MemorySegment {
}
impl RuntimeMemoryImage {
/// Generate a runtime memory image containing no memory segments.
/// Primarily useful in situations where any access to global memory would be an error.
pub fn empty(is_little_endian: bool) -> RuntimeMemoryImage {
RuntimeMemoryImage {
memory_segments: Vec::new(),
is_little_endian,
}
}
/// Generate a runtime memory image for a given binary.
///
/// The function can parse ELF and PE files as input.
......
......@@ -17,10 +17,22 @@ void print_array_sum(int* array) {
int main() {
int* array = calloc(5, sizeof(int));
// intraprocedural buffer overflow
for(int i = 0; i<= 10; i++) {
array[i] = i*i; // Out-of-bounds write for arrays that are too small.
}
// interprocedural buffer overflow
set_array_elements(array);
free(array);
array = malloc(5 * sizeof(int));
// intraprocedural buffer overflow
int sum = 0;
for(int i = 0; i<= 10; i++) {
sum += array[i]; // Out-of-bounds read for arrays that are too small.
}
printf("%d\n", sum);
// interprocedural buffer overflow
print_array_sum(array);
puts((void*) array - 1); // Parameter is an out-of-bounds pointer.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment