Unverified Commit b4697385 by Enkelmann Committed by GitHub

Implement global memory tracking for Pointer Inference (#361)

parent 37c02d9b
...@@ -94,7 +94,22 @@ impl<'a> Context<'a> { ...@@ -94,7 +94,22 @@ impl<'a> Context<'a> {
// If yes, we can compute it relative to the value of the parameter at the callsite and add the result to the return value. // If yes, we can compute it relative to the value of the parameter at the callsite and add the result to the return value.
// Else we just set the Top-flag of the return value to indicate some value originating in the callee. // Else we just set the Top-flag of the return value to indicate some value originating in the callee.
for (callee_id, callee_offset) in callee_value.get_relative_values() { for (callee_id, callee_offset) in callee_value.get_relative_values() {
if let Some(param_arg) = callee_state.get_arg_corresponding_to_id(callee_id) { if callee_id.get_tid() == callee_state.get_current_function_tid()
&& matches!(
callee_id.get_location(),
AbstractLocation::GlobalAddress { .. }
)
{
// Globals get the same ID as if the global pointer originated in the caller.
let caller_global_id = AbstractIdentifier::new(
caller_state.get_current_function_tid().clone(),
callee_id.get_location().clone(),
);
caller_state.add_id_to_tracked_ids(&caller_global_id);
let caller_global =
DataDomain::from_target(caller_global_id, callee_offset.clone());
return_value = return_value.merge(&caller_global);
} else if let Some(param_arg) = callee_state.get_arg_corresponding_to_id(callee_id) {
let param_value = caller_state.eval_parameter_arg(&param_arg); let param_value = caller_state.eval_parameter_arg(&param_arg);
let param_value = caller_state let param_value = caller_state
.substitute_global_mem_address(param_value, &self.project.runtime_memory_image); .substitute_global_mem_address(param_value, &self.project.runtime_memory_image);
...@@ -335,6 +350,8 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -335,6 +350,8 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
var.size, var.size,
Some(&self.project.runtime_memory_image), Some(&self.project.runtime_memory_image),
); );
let value = new_state
.substitute_global_mem_address(value, &self.project.runtime_memory_image);
new_state.set_register(var, value); new_state.set_register(var, value);
} }
Def::Store { address, value } => { Def::Store { address, value } => {
......
...@@ -142,7 +142,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> { ...@@ -142,7 +142,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
let caller_globals: Self::NodeValue = callee_globals let caller_globals: Self::NodeValue = callee_globals
.iter() .iter()
.filter_map(|(address, access_pattern)| { .filter_map(|(address, access_pattern)| {
if caller_known_globals.contains(address) && access_pattern.is_accessed() { if caller_known_globals.contains(address) {
Some((*address, *access_pattern)) Some((*address, *access_pattern))
} else { } else {
None None
...@@ -176,13 +176,7 @@ fn propagate_globals_bottom_up( ...@@ -176,13 +176,7 @@ fn propagate_globals_bottom_up(
let globals = fn_sig let globals = fn_sig
.global_parameters .global_parameters
.iter() .iter()
.filter_map(|(address, access_pattern)| { .map(|(address, access_pattern)| (*address, *access_pattern))
if access_pattern.is_accessed() {
Some((*address, *access_pattern))
} else {
None
}
})
.collect(); .collect();
computation.set_node_value(node, globals); computation.set_node_value(node, globals);
} }
......
...@@ -69,14 +69,16 @@ fn generate_fixpoint_computation<'a>( ...@@ -69,14 +69,16 @@ fn generate_fixpoint_computation<'a>(
.get_standard_calling_convention() .get_standard_calling_convention()
.expect("No standard calling convention found.") .expect("No standard calling convention found.")
}); });
computation.set_node_value( let mut fn_start_state = State::new(
node, &sub.tid,
NodeValue::Value(State::new( &project.stack_pointer_register,
&sub.tid, calling_convention,
&project.stack_pointer_register, );
calling_convention, if project.cpu_architecture.contains("MIPS") {
)), let _ = fn_start_state
) .set_mips_link_register(&sub.tid, project.stack_pointer_register.size);
}
computation.set_node_value(node, NodeValue::Value(fn_start_state))
} }
} }
} }
...@@ -277,7 +279,9 @@ pub mod tests { ...@@ -277,7 +279,9 @@ pub mod tests {
use super::*; use super::*;
impl FunctionSignature { impl FunctionSignature {
/// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably. /// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably,
/// one mutably accessed global variable at address 0x2000
/// and one immutably accessed global variable at address 0x3000.
pub fn mock_x64() -> FunctionSignature { pub fn mock_x64() -> FunctionSignature {
let mut write_access_pattern = AccessPattern::new(); let mut write_access_pattern = AccessPattern::new();
write_access_pattern.set_unknown_access_flags(); write_access_pattern.set_unknown_access_flags();
...@@ -293,7 +297,10 @@ pub mod tests { ...@@ -293,7 +297,10 @@ pub mod tests {
]); ]);
FunctionSignature { FunctionSignature {
parameters, parameters,
global_parameters: HashMap::new(), global_parameters: HashMap::from([
(0x2000, AccessPattern::new_unknown_access()),
(0x3000, AccessPattern::new().with_dereference_flag()),
]),
} }
} }
} }
......
...@@ -66,6 +66,31 @@ impl State { ...@@ -66,6 +66,31 @@ impl State {
} }
} }
/// Set the MIPS link register `t9` to the address of the function TID.
///
/// According to the System V ABI for MIPS the caller has to save the callee address in register `t9`
/// on a function call to position-independent code.
/// This function manually sets `t9` to the correct value.
///
/// Returns an error if the function address could not be parsed (e.g. for `UNKNOWN` addresses).
pub fn set_mips_link_register(
&mut self,
fn_tid: &Tid,
generic_pointer_size: ByteSize,
) -> Result<(), Error> {
let link_register = Variable {
name: "t9".into(),
size: generic_pointer_size,
is_temp: false,
};
let address = Bitvector::from_u64(u64::from_str_radix(&fn_tid.address, 16)?)
.into_resize_unsigned(generic_pointer_size);
// Note that we do not replace the absolute value by a relative value representing a global memory pointer.
// Else we would risk every global variable to get assigned the same abstract ID.
self.set_register(&link_register, address.into());
Ok(())
}
/// Get the value of the given register in the current state. /// Get the value of the given register in the current state.
pub fn get_register(&self, register: &Variable) -> DataDomain<BitvectorDomain> { pub fn get_register(&self, register: &Variable) -> DataDomain<BitvectorDomain> {
self.register self.register
......
...@@ -156,7 +156,7 @@ impl State { ...@@ -156,7 +156,7 @@ impl State {
pub fn get_global_mem_params_of_current_function(&self) -> Vec<(u64, AccessPattern)> { pub fn get_global_mem_params_of_current_function(&self) -> Vec<(u64, AccessPattern)> {
let mut global_params = Vec::new(); let mut global_params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() { for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() && access_pattern.is_accessed() { if id.get_tid() == self.get_current_function_tid() {
match id.get_location() { match id.get_location() {
AbstractLocation::GlobalPointer(address, _) AbstractLocation::GlobalPointer(address, _)
| AbstractLocation::GlobalAddress { address, .. } => { | AbstractLocation::GlobalAddress { address, .. } => {
......
...@@ -49,6 +49,14 @@ impl<'a> Context<'a> { ...@@ -49,6 +49,14 @@ impl<'a> Context<'a> {
state_before_return.stack_id.clone(), state_before_return.stack_id.clone(),
Data::new_top(stack_register.size), Data::new_top(stack_register.size),
); );
// Also insert the global memory IDs to the map.
id_map.insert(
state_before_return.get_global_mem_id(),
Data::from_target(
state_before_call.get_global_mem_id(),
Bitvector::zero(stack_register.size.into()).into(),
),
);
id_map id_map
} }
......
...@@ -7,6 +7,7 @@ use crate::prelude::*; ...@@ -7,6 +7,7 @@ use crate::prelude::*;
use crate::utils::log::*; use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use super::object::AbstractObject;
use super::state::State; use super::state::State;
use super::{Config, Data, VERSION}; use super::{Config, Data, VERSION};
...@@ -295,6 +296,115 @@ impl<'a> Context<'a> { ...@@ -295,6 +296,115 @@ impl<'a> Context<'a> {
}; };
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning)); let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
} }
/// Merge global memory data from the callee global memory object to the caller global memory object
/// if the corresponding global variable is marked as mutable in both the caller and callee.
fn merge_global_mem_from_callee(
&self,
caller_state: &mut State,
callee_global_mem: &AbstractObject,
replacement_map: &BTreeMap<AbstractIdentifier, Data>,
callee_fn_sig: &FunctionSignature,
call_tid: &Tid,
) {
let caller_global_mem_id = caller_state.get_global_mem_id();
let caller_fn_sig = self.fn_signatures.get(caller_state.get_fn_tid()).unwrap();
let caller_global_mem = caller_state
.memory
.get_object_mut(&caller_global_mem_id)
.unwrap();
// Get the intervals corresponding to global variables
// and the access pattern that denotes which globals should be overwritten by callee data.
let intervals =
compute_call_return_global_var_access_intervals(caller_fn_sig, callee_fn_sig);
let mut caller_mem_region = caller_global_mem.get_mem_region().clone();
mark_values_in_caller_global_mem_as_potentially_overwritten(
&mut caller_mem_region,
&intervals,
);
// Insert values from the callee into the memory object.
let mut referenced_ids = BTreeSet::new();
for (index, value) in callee_global_mem.get_mem_region().iter() {
if let Some((_interval_start, access_pattern)) =
intervals.range(..((*index + 1) as u64)).last()
{
if access_pattern.is_mutably_dereferenced() {
let mut value = value.clone();
value.replace_all_ids(replacement_map);
referenced_ids.extend(value.referenced_ids().cloned());
caller_mem_region.insert_at_byte_index(value, *index);
}
} else {
self.log_debug(
Err(anyhow!("Unexpected occurrence of global variables.")),
Some(call_tid),
);
}
}
caller_global_mem.overwrite_mem_region(caller_mem_region);
caller_global_mem.add_ids_to_pointer_targets(referenced_ids);
}
}
/// Generate a list of global indices as a union of the global indices known to caller and callee.
/// The corresponding access patterns are mutably derefenced
/// if and only if they are mutably dereferenced in both the caller and the callee.
///
/// Note that each index is supposed to denote the interval from that index until the next index in the map.
/// This is a heuristic approximation, since we do not know the actual sizes of the global variables here.
fn compute_call_return_global_var_access_intervals(
caller_fn_sig: &FunctionSignature,
callee_fn_sig: &FunctionSignature,
) -> BTreeMap<u64, AccessPattern> {
let mut intervals: BTreeMap<u64, AccessPattern> = caller_fn_sig
.global_parameters
.keys()
.chain(callee_fn_sig.global_parameters.keys())
.map(|index| (*index, AccessPattern::new()))
.collect();
for (index, access_pattern) in intervals.iter_mut() {
if let (Some(caller_pattern), Some(callee_pattern)) = (
caller_fn_sig.global_parameters.get(index),
callee_fn_sig.global_parameters.get(index),
) {
if caller_pattern.is_mutably_dereferenced() && callee_pattern.is_mutably_dereferenced()
{
access_pattern.set_mutably_dereferenced_flag();
}
}
}
intervals
}
/// Mark all values in the caller memory object representing global memory,
/// that may have been overwritten by the callee, as potential `Top` values.
fn mark_values_in_caller_global_mem_as_potentially_overwritten(
caller_global_mem_region: &mut MemRegion<Data>,
access_intervals: &BTreeMap<u64, AccessPattern>,
) {
let mut interval_iter = access_intervals.iter().peekable();
while let Some((index, access_pattern)) = interval_iter.next() {
if access_pattern.is_mutably_dereferenced() {
if let Some((next_index, _next_pattern)) = interval_iter.peek() {
caller_global_mem_region.mark_interval_values_as_top(
*index as i64,
(**next_index - 1) as i64,
ByteSize::new(1),
);
} else {
caller_global_mem_region.mark_interval_values_as_top(
*index as i64,
std::i64::MAX - 1,
ByteSize::new(1),
);
}
}
}
} }
#[cfg(test)] #[cfg(test)]
......
...@@ -72,7 +72,10 @@ fn mock_context() -> Context<'static> { ...@@ -72,7 +72,10 @@ fn mock_context() -> Context<'static> {
let (log_sender, _log_receiver) = crossbeam_channel::unbounded(); let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let mut mock_context = Context::new(analysis_results, config, log_sender); let mut mock_context = Context::new(analysis_results, config, log_sender);
// Create mocked function signatures // Create mocked function signatures
let fn_sigs = BTreeMap::from_iter([(Tid::new("callee"), FunctionSignature::mock_x64())]); let fn_sigs = BTreeMap::from_iter([
(Tid::new("caller"), FunctionSignature::mock_x64()),
(Tid::new("callee"), FunctionSignature::mock_x64()),
]);
let fn_sigs = Box::new(fn_sigs); let fn_sigs = Box::new(fn_sigs);
let fn_sigs = Box::leak(fn_sigs); let fn_sigs = Box::leak(fn_sigs);
mock_context.fn_signatures = fn_sigs; mock_context.fn_signatures = fn_sigs;
...@@ -87,7 +90,7 @@ fn context_problem_implementation() { ...@@ -87,7 +90,7 @@ fn context_problem_implementation() {
use Expression::*; use Expression::*;
let context = mock_context(); let context = mock_context();
let mut state = State::new(&register("RSP"), Tid::new("main")); let mut state = State::new(&register("RSP"), Tid::new("main"), BTreeSet::new());
let def = Term { let def = Term {
tid: Tid::new("def"), tid: Tid::new("def"),
...@@ -120,7 +123,7 @@ fn context_problem_implementation() { ...@@ -120,7 +123,7 @@ fn context_problem_implementation() {
state_after_malloc.get_register(&register("RAX")), state_after_malloc.get_register(&register("RAX")),
Data::from_target(new_id("call_malloc", "RAX"), bv(0)) Data::from_target(new_id("call_malloc", "RAX"), bv(0))
); );
assert_eq!(state_after_malloc.memory.get_num_objects(), 2); assert_eq!(state_after_malloc.memory.get_num_objects(), 3);
assert_eq!( assert_eq!(
state_after_malloc.get_register(&register("RSP")), state_after_malloc.get_register(&register("RSP")),
state state
...@@ -142,7 +145,7 @@ fn context_problem_implementation() { ...@@ -142,7 +145,7 @@ fn context_problem_implementation() {
.update_call_stub(&state_after_malloc, &free) .update_call_stub(&state_after_malloc, &free)
.unwrap(); .unwrap();
assert!(state_after_free.get_register(&register("RDX")).is_top()); assert!(state_after_free.get_register(&register("RDX")).is_top());
assert_eq!(state_after_free.memory.get_num_objects(), 2); assert_eq!(state_after_free.memory.get_num_objects(), 3);
assert_eq!( assert_eq!(
state_after_free.get_register(&register("RBP")), state_after_free.get_register(&register("RBP")),
Data::from_target(new_id("call_malloc", "RAX"), bv(0)) Data::from_target(new_id("call_malloc", "RAX"), bv(0))
...@@ -198,7 +201,7 @@ fn update_return() { ...@@ -198,7 +201,7 @@ fn update_return() {
Data::from_target(new_id("callee", "RDI"), bv(0)), Data::from_target(new_id("callee", "RDI"), bv(0)),
); );
let state_before_call = State::new(&register("RSP"), Tid::new("caller")); let state_before_call = State::new(&register("RSP"), Tid::new("caller"), BTreeSet::new());
let mut state_before_call = context let mut state_before_call = context
.update_def( .update_def(
&state_before_call, &state_before_call,
...@@ -252,7 +255,7 @@ fn update_return() { ...@@ -252,7 +255,7 @@ fn update_return() {
state.get_register(&register("RSP")), state.get_register(&register("RSP")),
Data::from_target(new_id("caller", "RSP"), bv(-8).into()) Data::from_target(new_id("caller", "RSP"), bv(-8).into())
); );
assert!(state.memory.get_all_object_ids().len() == 3); assert_eq!(state.memory.get_all_object_ids().len(), 4);
assert!(state assert!(state
.memory .memory
.get_all_object_ids() .get_all_object_ids()
...@@ -277,7 +280,7 @@ fn specialize_conditional() { ...@@ -277,7 +280,7 @@ fn specialize_conditional() {
let analysis_results = AnalysisResults::mock_from_project(&project); let analysis_results = AnalysisResults::mock_from_project(&project);
let context = Context::new(&analysis_results, config, log_sender); let context = Context::new(&analysis_results, config, log_sender);
let mut state = State::new(&register("RSP"), Tid::new("func")); let mut state = State::new(&register("RSP"), Tid::new("func"), BTreeSet::new());
state.set_register(&register("RAX"), IntervalDomain::mock(-10, 20).into()); state.set_register(&register("RAX"), IntervalDomain::mock(-10, 20).into());
let condition = Expression::BinOp { let condition = Expression::BinOp {
...@@ -340,7 +343,11 @@ fn get_unsound_caller_ids() { ...@@ -340,7 +343,11 @@ fn get_unsound_caller_ids() {
#[test] #[test]
fn handle_extern_symbol_stubs() { fn handle_extern_symbol_stubs() {
let context = mock_context(); let context = mock_context();
let mut state = State::new(&context.project.stack_pointer_register, Tid::new("main")); let mut state = State::new(
&context.project.stack_pointer_register,
Tid::new("main"),
BTreeSet::new(),
);
let mut extern_symbol = ExternSymbol::mock_x64("strchr"); let mut extern_symbol = ExternSymbol::mock_x64("strchr");
extern_symbol.parameters = vec![Arg::mock_register("RDI", 8), Arg::mock_register("RSI", 8)]; extern_symbol.parameters = vec![Arg::mock_register("RDI", 8), Arg::mock_register("RSI", 8)];
...@@ -368,3 +375,67 @@ fn handle_extern_symbol_stubs() { ...@@ -368,3 +375,67 @@ fn handle_extern_symbol_stubs() {
.merge(&Bitvector::from_u64(0).into()) .merge(&Bitvector::from_u64(0).into())
); );
} }
#[test]
fn test_merge_global_mem_from_callee() {
let context = mock_context();
let mut caller_state = State::new(
&context.project.stack_pointer_register,
Tid::new("caller"),
BTreeSet::from([0x2000, 0x2002, 0x3000]),
);
let mut callee_state = State::new(
&context.project.stack_pointer_register,
Tid::new("callee"),
BTreeSet::from([0x2000, 0x2002]),
);
let write = |state: &mut State, address: u64, value: u16| {
state
.write_to_address(
&Expression::Const(Bitvector::from_u64(address)),
&Data::from(Bitvector::from_u16(value)),
&context.project.runtime_memory_image,
)
.unwrap();
};
let load = |state: &State, address: u64| -> Data {
state
.load_value(
&Expression::Const(Bitvector::from_u64(address)),
ByteSize::new(2),
&context.project.runtime_memory_image,
)
.unwrap()
};
write(&mut caller_state, 0x2000, 0);
write(&mut caller_state, 0x2002, 2);
write(&mut caller_state, 0x3000, 4);
write(&mut callee_state, 0x2000, 42);
let callee_global_mem = callee_state
.memory
.get_object(&callee_state.get_global_mem_id())
.unwrap();
let callee_fn_sig = FunctionSignature::mock_x64();
let replacement_map = BTreeMap::from([(
callee_state.get_global_mem_id(),
Data::from_target(
caller_state.get_global_mem_id(),
Bitvector::from_u64(0).into(),
),
)]);
context.merge_global_mem_from_callee(
&mut caller_state,
callee_global_mem,
&replacement_map,
&callee_fn_sig,
&Tid::new("call"),
);
assert_eq!(load(&caller_state, 0x2000), Bitvector::from_u16(42).into());
let mut expected_result = Data::from(Bitvector::from_u16(2));
expected_result.set_contains_top_flag();
assert_eq!(load(&caller_state, 0x2002), expected_result);
assert_eq!(load(&caller_state, 0x3000), Bitvector::from_u16(4).into());
}
...@@ -166,6 +166,20 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -166,6 +166,20 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
// The callee stack frame does not exist anymore after return to the caller. // The callee stack frame does not exist anymore after return to the caller.
continue; continue;
} }
if *callee_object_id == state_before_return.get_global_mem_id() {
let callee_fn_sig = self
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
self.merge_global_mem_from_callee(
&mut state_after_return,
callee_object,
&id_map,
callee_fn_sig,
&call_term.tid,
);
continue;
}
if Some(false) if Some(false)
== callee_id_to_access_pattern_map == callee_id_to_access_pattern_map
.get(callee_object_id) .get(callee_object_id)
......
...@@ -37,19 +37,21 @@ struct Inner { ...@@ -37,19 +37,21 @@ struct Inner {
pointer_targets: BTreeSet<AbstractIdentifier>, pointer_targets: BTreeSet<AbstractIdentifier>,
/// Tracks whether this may represent more than one actual memory object. /// Tracks whether this may represent more than one actual memory object.
is_unique: bool, is_unique: bool,
/// Is the object a stack frame or a heap object /// Is the object a stack frame, a heap object, or a global memory object.
type_: Option<ObjectType>, type_: Option<ObjectType>,
/// The actual content of the memory object /// The actual content of the memory object
memory: MemRegion<Data>, memory: MemRegion<Data>,
} }
/// An object is either a stack or a heap object. /// An object can be a stack, a heap, or a global memory object.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectType { pub enum ObjectType {
/// A stack object, i.e. the stack frame of a function. /// A stack object, i.e. the stack frame of a function.
Stack, Stack,
/// A memory object located on the heap. /// A memory object located on the heap.
Heap, Heap,
/// A memory oject indicating the global memory space.
GlobalMem,
} }
#[allow(clippy::from_over_into)] #[allow(clippy::from_over_into)]
...@@ -149,6 +151,24 @@ impl AbstractObject { ...@@ -149,6 +151,24 @@ impl AbstractObject {
inner.memory = MemRegion::new(inner.memory.get_address_bytesize()); inner.memory = MemRegion::new(inner.memory.get_address_bytesize());
} }
} }
/// Get the memory region abstract domain associated to the memory object.
pub fn get_mem_region(&self) -> &MemRegion<Data> {
&self.inner.memory
}
/// Overwrite the memory region abstract domain associated to the memory object.
/// Note that this function does not update the list of known pointer targets accordingly!
pub fn overwrite_mem_region(&mut self, new_memory_region: MemRegion<Data>) {
let inner = Arc::make_mut(&mut self.inner);
inner.memory = new_memory_region;
}
/// Add IDs to the list of pointer targets for the memory object.
pub fn add_ids_to_pointer_targets(&mut self, mut ids_to_add: BTreeSet<AbstractIdentifier>) {
let inner = Arc::make_mut(&mut self.inner);
inner.pointer_targets.append(&mut ids_to_add);
}
} }
impl AbstractDomain for AbstractObject { impl AbstractDomain for AbstractObject {
......
...@@ -20,17 +20,26 @@ pub struct AbstractObjectList { ...@@ -20,17 +20,26 @@ pub struct AbstractObjectList {
} }
impl AbstractObjectList { impl AbstractObjectList {
/// Create a new abstract object list with just one abstract object corresponding to the stack. /// Create a new abstract object list with one abstract object corresponding to the stack
/// and one abstract object corresponding to global memory
/// ///
/// The offset into the stack object and the `upper_index_bound` of the stack object will be both set to zero. /// The offset into the stack object will be set to zero.
/// This corresponds to the generic stack state at the start of a function. /// This corresponds to the generic stack state at the start of a function.
pub fn from_stack_id( pub fn from_stack_id(
stack_id: AbstractIdentifier, stack_id: AbstractIdentifier,
address_bytesize: ByteSize, address_bytesize: ByteSize,
) -> AbstractObjectList { ) -> AbstractObjectList {
let mut objects = BTreeMap::new();
let stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize); let stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize);
objects.insert(stack_id, stack_object); let global_mem_id = AbstractIdentifier::new(
stack_id.get_tid().clone(),
AbstractLocation::GlobalAddress {
address: 0,
size: address_bytesize,
},
);
let global_mem_object = AbstractObject::new(Some(ObjectType::GlobalMem), address_bytesize);
let objects =
BTreeMap::from([(stack_id, stack_object), (global_mem_id, global_mem_object)]);
AbstractObjectList { objects } AbstractObjectList { objects }
} }
......
...@@ -14,23 +14,45 @@ fn new_id(name: &str) -> AbstractIdentifier { ...@@ -14,23 +14,45 @@ fn new_id(name: &str) -> AbstractIdentifier {
) )
} }
fn new_global_id() -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("time0"),
AbstractLocation::GlobalAddress {
address: 0,
size: ByteSize::new(8),
},
)
}
#[test] #[test]
fn abstract_object_list() { fn abstract_object_list() {
// A new object list has 2 memory objects.
let mut obj_list = AbstractObjectList::from_stack_id(new_id("RSP".into()), ByteSize::new(8)); let mut obj_list = AbstractObjectList::from_stack_id(new_id("RSP".into()), ByteSize::new(8));
assert_eq!(obj_list.objects.len(), 1); assert_eq!(obj_list.objects.len(), 2);
// Test writing to and reading from the stack object
let pointer = DataDomain::from_target(new_id("RSP".into()), bv(8)); let stack_pointer = DataDomain::from_target(new_id("RSP".into()), bv(8));
obj_list.set_value(pointer.clone(), bv(42).into()).unwrap(); obj_list
.set_value(stack_pointer.clone(), bv(42).into())
.unwrap();
assert_eq!( assert_eq!(
obj_list.get_value(&pointer, ByteSize::new(8)), obj_list.get_value(&stack_pointer, ByteSize::new(8)),
bv(42).into() bv(42).into()
); );
// Test writing to and reading from the global memory object
let global_pointer = DataDomain::from_target(new_global_id(), bv(1000));
obj_list
.set_value(global_pointer.clone(), bv(13).into())
.unwrap();
assert_eq!(
obj_list.get_value(&global_pointer, ByteSize::new(8)),
bv(13).into()
);
let mut other_obj_list = let mut other_obj_list =
AbstractObjectList::from_stack_id(new_id("RSP".into()), ByteSize::new(8)); AbstractObjectList::from_stack_id(new_id("RSP".into()), ByteSize::new(8));
let second_pointer = DataDomain::from_target(new_id("RSP".into()), bv(-8)); let second_pointer = DataDomain::from_target(new_id("RSP".into()), bv(-8));
other_obj_list other_obj_list
.set_value(pointer.clone(), bv(42).into()) .set_value(stack_pointer.clone(), bv(42).into())
.unwrap(); .unwrap();
other_obj_list other_obj_list
.set_value(second_pointer.clone(), bv(35).into()) .set_value(second_pointer.clone(), bv(35).into())
...@@ -51,7 +73,10 @@ fn abstract_object_list() { ...@@ -51,7 +73,10 @@ fn abstract_object_list() {
.unwrap(); .unwrap();
let mut merged = obj_list.merge(&other_obj_list); let mut merged = obj_list.merge(&other_obj_list);
assert_eq!(merged.get_value(&pointer, ByteSize::new(8)), bv(42).into()); assert_eq!(
merged.get_value(&stack_pointer, ByteSize::new(8)),
bv(42).into()
);
assert!(merged assert!(merged
.get_value(&second_pointer, ByteSize::new(8)) .get_value(&second_pointer, ByteSize::new(8))
...@@ -60,23 +85,23 @@ fn abstract_object_list() { ...@@ -60,23 +85,23 @@ fn abstract_object_list() {
merged.get_value(&heap_pointer, ByteSize::new(8)), merged.get_value(&heap_pointer, ByteSize::new(8)),
bv(3).into() bv(3).into()
); );
assert_eq!(merged.objects.len(), 2); assert_eq!(merged.objects.len(), 3);
merged merged
.set_value(pointer.merge(&heap_pointer), bv(3).into()) .set_value(stack_pointer.merge(&heap_pointer), bv(3).into())
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
merged.get_value(&pointer, ByteSize::new(8)), merged.get_value(&stack_pointer, ByteSize::new(8)),
IntervalDomain::mock(3, 42).with_stride(39).into() IntervalDomain::mock(3, 42).with_stride(39).into()
); );
assert_eq!( assert_eq!(
merged.get_value(&heap_pointer, ByteSize::new(8)), merged.get_value(&heap_pointer, ByteSize::new(8)),
bv(3).into() bv(3).into()
); );
assert_eq!(merged.objects.len(), 2); assert_eq!(merged.objects.len(), 3);
other_obj_list other_obj_list
.set_value(pointer.clone(), heap_pointer.clone()) .set_value(stack_pointer.clone(), heap_pointer.clone())
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
other_obj_list other_obj_list
......
...@@ -111,6 +111,19 @@ impl State { ...@@ -111,6 +111,19 @@ impl State {
}; };
result = result.merge(&self.memory.get_value(&address, size)); result = result.merge(&self.memory.get_value(&address, size));
if let Ok(offset) = result.try_to_offset() {
if result.bytesize() == self.stack_id.bytesize()
&& self.known_global_addresses.contains(&(offset as u64))
{
// The loaded value is most likely a pointer to a mutable global variable,
// so we replace it with a pointer to the global memory object
result = Data::from_target(
self.get_global_mem_id(),
result.try_to_bitvec().unwrap().into(),
);
}
}
if address.contains_top() { if address.contains_top() {
result.set_contains_top_flag() result.set_contains_top_flag()
} }
...@@ -130,6 +143,7 @@ impl State { ...@@ -130,6 +143,7 @@ impl State {
) -> Result<(), Error> { ) -> Result<(), Error> {
match self.load_value(address, var.size, global_memory) { match self.load_value(address, var.size, global_memory) {
Ok(data) => { Ok(data) => {
let data = self.replace_if_global_pointer(data);
self.set_register(var, data); self.set_register(var, data);
Ok(()) Ok(())
} }
...@@ -140,8 +154,31 @@ impl State { ...@@ -140,8 +154,31 @@ impl State {
} }
} }
/// Evaluate the value of an expression in the current state /// Evaluate the value of an expression in the current state.
pub fn eval(&self, expression: &Expression) -> Data { pub fn eval(&self, expression: &Expression) -> Data {
let result = self.eval_recursive(expression);
self.replace_if_global_pointer(result)
}
/// If the input value is a constant that is also the address of a global variable known to the function
/// then replace it with a value relative to the global memory ID of the state.
fn replace_if_global_pointer(&self, mut value: Data) -> Data {
if let Ok(constant) = value.try_to_offset() {
if self.known_global_addresses.contains(&(constant as u64)) {
// The result is a constant that denotes a pointer to global writeable memory.
// Thus we replace it with a value relative the global memory ID.
value = Data::from_target(
self.get_global_mem_id(),
value.try_to_interval().unwrap().into(),
);
}
}
value
}
/// Recursively evaluate the value of an expression in the current state.
/// Should only be called by [`State::eval`].
fn eval_recursive(&self, expression: &Expression) -> Data {
use Expression::*; use Expression::*;
match expression { match expression {
Var(variable) => self.get_register(variable), Var(variable) => self.get_register(variable),
...@@ -151,11 +188,11 @@ impl State { ...@@ -151,11 +188,11 @@ impl State {
// the result of `x XOR x` is always zero. // the result of `x XOR x` is always zero.
return Bitvector::zero(apint::BitWidth::from(lhs.bytesize())).into(); return Bitvector::zero(apint::BitWidth::from(lhs.bytesize())).into();
} }
let (left, right) = (self.eval(lhs), self.eval(rhs)); let (left, right) = (self.eval_recursive(lhs), self.eval_recursive(rhs));
left.bin_op(*op, &right) left.bin_op(*op, &right)
} }
UnOp { op, arg } => self.eval(arg).un_op(*op), UnOp { op, arg } => self.eval_recursive(arg).un_op(*op),
Cast { op, size, arg } => self.eval(arg).cast(*op, *size), Cast { op, size, arg } => self.eval_recursive(arg).cast(*op, *size),
Unknown { Unknown {
description: _, description: _,
size, size,
...@@ -164,7 +201,7 @@ impl State { ...@@ -164,7 +201,7 @@ impl State {
low_byte, low_byte,
size, size,
arg, arg,
} => self.eval(arg).subpiece(*low_byte, *size), } => self.eval_recursive(arg).subpiece(*low_byte, *size),
} }
} }
......
...@@ -5,6 +5,7 @@ use crate::analysis::function_signature::FunctionSignature; ...@@ -5,6 +5,7 @@ use crate::analysis::function_signature::FunctionSignature;
use crate::intermediate_representation::*; use crate::intermediate_representation::*;
use crate::prelude::*; use crate::prelude::*;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc;
mod access_handling; mod access_handling;
mod id_manipulation; mod id_manipulation;
...@@ -21,12 +22,21 @@ pub struct State { ...@@ -21,12 +22,21 @@ pub struct State {
/// The abstract identifier of the current stack frame. /// The abstract identifier of the current stack frame.
/// It points to the base of the stack frame, i.e. only negative offsets point into the current stack frame. /// It points to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
pub stack_id: AbstractIdentifier, pub stack_id: AbstractIdentifier,
/// A list of constants that are assumed to be addresses of global variables accessed by this function.
/// Used to replace constants by relative values pointing to the global memory object.
known_global_addresses: Arc<BTreeSet<u64>>,
} }
impl State { impl State {
/// Create a new state that contains only one memory object corresponding to the stack. /// Create a new state that contains one memory object corresponding to the stack
/// and one memory object corresponding to global memory.
///
/// The stack offset will be set to zero. /// The stack offset will be set to zero.
pub fn new(stack_register: &Variable, function_tid: Tid) -> State { pub fn new(
stack_register: &Variable,
function_tid: Tid,
global_addresses: BTreeSet<u64>,
) -> State {
let stack_id = AbstractIdentifier::new( let stack_id = AbstractIdentifier::new(
function_tid, function_tid,
AbstractLocation::from_var(stack_register).unwrap(), AbstractLocation::from_var(stack_register).unwrap(),
...@@ -43,6 +53,7 @@ impl State { ...@@ -43,6 +53,7 @@ impl State {
register, register,
memory: AbstractObjectList::from_stack_id(stack_id.clone(), stack_register.size), memory: AbstractObjectList::from_stack_id(stack_id.clone(), stack_register.size),
stack_id, stack_id,
known_global_addresses: Arc::new(global_addresses),
} }
} }
...@@ -56,8 +67,9 @@ impl State { ...@@ -56,8 +67,9 @@ impl State {
stack_register: &Variable, stack_register: &Variable,
function_tid: Tid, function_tid: Tid,
) -> State { ) -> State {
let global_addresses = fn_sig.global_parameters.keys().cloned().collect();
let mock_global_memory = RuntimeMemoryImage::empty(true); let mock_global_memory = RuntimeMemoryImage::empty(true);
let mut state = State::new(stack_register, function_tid.clone()); let mut state = State::new(stack_register, function_tid.clone(), global_addresses);
// Set parameter values and create parameter memory objects. // Set parameter values and create parameter memory objects.
for (arg, access_pattern) in &fn_sig.parameters { for (arg, access_pattern) in &fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(&function_tid, arg); let param_id = AbstractIdentifier::from_arg(&function_tid, arg);
...@@ -91,9 +103,10 @@ impl State { ...@@ -91,9 +103,10 @@ impl State {
/// ///
/// According to the System V ABI for MIPS the caller has to save the callee address in register `t9` /// According to the System V ABI for MIPS the caller has to save the callee address in register `t9`
/// on a function call to position-independent code. /// on a function call to position-independent code.
/// This function manually sets `t9` to the correct value /// In MIPS this value is used to compute the addresses of some global variables,
/// to mitigate cases where `t9` could not be correctly computed due to previous analysis errors. /// since MIPS does not use program-counter-relative access instructions like other instruction set architectures do.
/// ///
/// This function sets `t9` to the correct value.
/// Returns an error if the callee address could not be parsed (e.g. for `UNKNOWN` addresses). /// Returns an error if the callee address could not be parsed (e.g. for `UNKNOWN` addresses).
pub fn set_mips_link_register( pub fn set_mips_link_register(
&mut self, &mut self,
...@@ -107,10 +120,6 @@ impl State { ...@@ -107,10 +120,6 @@ impl State {
}; };
let address = Bitvector::from_u64(u64::from_str_radix(&callee_tid.address, 16)?) let address = Bitvector::from_u64(u64::from_str_radix(&callee_tid.address, 16)?)
.into_resize_unsigned(generic_pointer_size); .into_resize_unsigned(generic_pointer_size);
// FIXME: A better way would be to test whether the link register contains the correct value
// and only fix and log cases where it doesn't contain the correct value.
// Right now this is unfortunately the common case,
// so logging every case would generate too many log messages.
self.set_register(&link_register, address.into()); self.set_register(&link_register, address.into());
Ok(()) Ok(())
} }
...@@ -173,6 +182,9 @@ impl State { ...@@ -173,6 +182,9 @@ impl State {
referenced_ids.insert(id); referenced_ids.insert(id);
} }
} }
// get the global memory ID, as it is always reachable
referenced_ids.insert(self.get_global_mem_id());
// Add IDs that are recursively reachable through the known IDs.
referenced_ids = self.add_directly_reachable_ids_to_id_set(referenced_ids); referenced_ids = self.add_directly_reachable_ids_to_id_set(referenced_ids);
// remove unreferenced objects // remove unreferenced objects
self.memory.remove_unused_objects(&referenced_ids); self.memory.remove_unused_objects(&referenced_ids);
...@@ -193,6 +205,17 @@ impl State { ...@@ -193,6 +205,17 @@ impl State {
pub fn get_fn_tid(&self) -> &Tid { pub fn get_fn_tid(&self) -> &Tid {
self.stack_id.get_tid() self.stack_id.get_tid()
} }
/// Get the abstract ID of the global memory object corresponding to this function.
pub fn get_global_mem_id(&self) -> AbstractIdentifier {
AbstractIdentifier::new(
self.stack_id.get_tid().clone(),
AbstractLocation::GlobalAddress {
address: 0,
size: self.stack_id.bytesize(),
},
)
}
} }
impl AbstractDomain for State { impl AbstractDomain for State {
...@@ -204,6 +227,7 @@ impl AbstractDomain for State { ...@@ -204,6 +227,7 @@ impl AbstractDomain for State {
register: self.register.merge(&other.register), register: self.register.merge(&other.register),
memory: merged_memory_objects, memory: merged_memory_objects,
stack_id: self.stack_id.clone(), stack_id: self.stack_id.clone(),
known_global_addresses: self.known_global_addresses.clone(),
} }
} }
......
...@@ -37,7 +37,7 @@ fn reg_sub(name: &str, value: i64) -> Expression { ...@@ -37,7 +37,7 @@ fn reg_sub(name: &str, value: i64) -> Expression {
#[test] #[test]
fn state() { fn state() {
let global_memory = RuntimeMemoryImage::mock(); let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0")); let mut state = State::new(&register("RSP"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP"); let stack_id = new_id("time0", "RSP");
let stack_addr = Data::from_target(stack_id.clone(), bv(8)); let stack_addr = Data::from_target(stack_id.clone(), bv(8));
state state
...@@ -51,7 +51,7 @@ fn state() { ...@@ -51,7 +51,7 @@ fn state() {
bv(42).into() bv(42).into()
); );
let mut other_state = State::new(&register("RSP"), Tid::new("time0")); let mut other_state = State::new(&register("RSP"), Tid::new("time0"), BTreeSet::new());
state.register.insert(register("RAX"), bv(42).into()); state.register.insert(register("RAX"), bv(42).into());
other_state other_state
.register .register
...@@ -78,15 +78,15 @@ fn state() { ...@@ -78,15 +78,15 @@ fn state() {
ByteSize::new(8), ByteSize::new(8),
Some(ObjectType::Heap), Some(ObjectType::Heap),
); );
assert_eq!(state.memory.get_num_objects(), 2); assert_eq!(state.memory.get_num_objects(), 3);
state.remove_unreferenced_objects(); state.remove_unreferenced_objects();
assert_eq!(state.memory.get_num_objects(), 1); assert_eq!(state.memory.get_num_objects(), 2);
} }
#[test] #[test]
fn handle_store() { fn handle_store() {
let global_memory = RuntimeMemoryImage::mock(); let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0")); let mut state = State::new(&register("RSP"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP"); let stack_id = new_id("time0", "RSP");
assert_eq!( assert_eq!(
state.eval(&Var(register("RSP"))), state.eval(&Var(register("RSP"))),
...@@ -150,7 +150,7 @@ fn handle_store() { ...@@ -150,7 +150,7 @@ fn handle_store() {
#[test] #[test]
fn clear_parameters_on_the_stack_on_extern_calls() { fn clear_parameters_on_the_stack_on_extern_calls() {
let global_memory = RuntimeMemoryImage::mock(); let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("time0")); let mut state = State::new(&register("RSP"), Tid::new("time0"), BTreeSet::new());
state.register.insert( state.register.insert(
register("RSP"), register("RSP"),
Data::from_target(new_id("time0", "RSP"), bv(-20)), Data::from_target(new_id("time0", "RSP"), bv(-20)),
...@@ -199,7 +199,7 @@ fn clear_parameters_on_the_stack_on_extern_calls() { ...@@ -199,7 +199,7 @@ fn clear_parameters_on_the_stack_on_extern_calls() {
#[test] #[test]
fn reachable_ids_under_and_overapproximation() { fn reachable_ids_under_and_overapproximation() {
let global_memory = RuntimeMemoryImage::mock(); let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let stack_id = new_id("func_tid", "RSP"); let stack_id = new_id("func_tid", "RSP");
let heap_id = new_id("heap_obj", "RAX"); let heap_id = new_id("heap_obj", "RAX");
let stack_address: Data = Data::from_target(stack_id.clone(), Bitvector::from_i64(-8).into()); let stack_address: Data = Data::from_target(stack_id.clone(), Bitvector::from_i64(-8).into());
...@@ -248,8 +248,11 @@ fn reachable_ids_under_and_overapproximation() { ...@@ -248,8 +248,11 @@ fn reachable_ids_under_and_overapproximation() {
#[test] #[test]
fn global_mem_access() { fn global_mem_access() {
let global_memory = RuntimeMemoryImage::mock(); let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(
&register("RSP"),
Tid::new("func_tid"),
BTreeSet::from([0x2000]),
);
// global read-only address // global read-only address
let address_expr = Expression::Const(Bitvector::from_u64(0x1000)); let address_expr = Expression::Const(Bitvector::from_u64(0x1000));
assert_eq!( assert_eq!(
...@@ -265,7 +268,6 @@ fn global_mem_access() { ...@@ -265,7 +268,6 @@ fn global_mem_access() {
&global_memory &global_memory
) )
.is_err()); .is_err());
// global writeable address // global writeable address
let address_expr = Expression::Const(Bitvector::from_u64(0x2000)); let address_expr = Expression::Const(Bitvector::from_u64(0x2000));
assert_eq!( assert_eq!(
...@@ -277,10 +279,16 @@ fn global_mem_access() { ...@@ -277,10 +279,16 @@ fn global_mem_access() {
assert!(state assert!(state
.write_to_address( .write_to_address(
&address_expr, &address_expr,
&DataDomain::new_top(ByteSize::new(4)), &Bitvector::from_u32(21).into(),
&global_memory &global_memory
) )
.is_ok()); .is_ok());
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
Bitvector::from_u32(21).into()
);
// invalid global address // invalid global address
let address_expr = Expression::Const(Bitvector::from_u64(0x3456)); let address_expr = Expression::Const(Bitvector::from_u64(0x3456));
...@@ -299,7 +307,7 @@ fn global_mem_access() { ...@@ -299,7 +307,7 @@ fn global_mem_access() {
/// Test expression specialization except for binary operations. /// Test expression specialization except for binary operations.
#[test] #[test]
fn specialize_by_expression_results() { fn specialize_by_expression_results() {
let mut base_state = State::new(&register("RSP"), Tid::new("func_tid")); let mut base_state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
base_state.set_register( base_state.set_register(
&register("RAX"), &register("RAX"),
IntervalDomain::new(Bitvector::from_i64(5), Bitvector::from_i64(10)).into(), IntervalDomain::new(Bitvector::from_i64(5), Bitvector::from_i64(10)).into(),
...@@ -367,7 +375,7 @@ fn specialize_by_expression_results() { ...@@ -367,7 +375,7 @@ fn specialize_by_expression_results() {
); );
// Expr = IntSExt(Var(EAX)) // Expr = IntSExt(Var(EAX))
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let eax_register = Variable { let eax_register = Variable {
name: "EAX".to_string(), name: "EAX".to_string(),
size: ByteSize::new(4), size: ByteSize::new(4),
...@@ -388,7 +396,7 @@ fn specialize_by_expression_results() { ...@@ -388,7 +396,7 @@ fn specialize_by_expression_results() {
); );
// Expr = Subpiece(Var(RAX)) // Expr = Subpiece(Var(RAX))
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let rax_register = Variable { let rax_register = Variable {
name: "RAX".to_string(), name: "RAX".to_string(),
size: ByteSize::new(8), size: ByteSize::new(8),
...@@ -416,7 +424,7 @@ fn specialize_by_expression_results() { ...@@ -416,7 +424,7 @@ fn specialize_by_expression_results() {
/// except equality and inequality operations /// except equality and inequality operations
#[test] #[test]
fn specialize_by_binop() { fn specialize_by_binop() {
let base_state = State::new(&register("RSP"), Tid::new("func_tid")); let base_state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
// Expr = RAX + Const // Expr = RAX + Const
let mut state = base_state.clone(); let mut state = base_state.clone();
...@@ -532,7 +540,7 @@ fn specialize_by_binop() { ...@@ -532,7 +540,7 @@ fn specialize_by_binop() {
/// Test expression specialization for comparison operations `==` and `!=`. /// Test expression specialization for comparison operations `==` and `!=`.
#[test] #[test]
fn specialize_by_equality_comparison() { fn specialize_by_equality_comparison() {
let mut base_state = State::new(&register("RSP"), Tid::new("func_tid")); let mut base_state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
base_state.set_register(&register("RAX"), IntervalDomain::mock(0, 50).into()); base_state.set_register(&register("RAX"), IntervalDomain::mock(0, 50).into());
// Expr = RAX == Const // Expr = RAX == Const
...@@ -596,7 +604,7 @@ fn specialize_by_equality_comparison() { ...@@ -596,7 +604,7 @@ fn specialize_by_equality_comparison() {
/// Test expression specialization for signed comparison operations `<` and `<=`. /// Test expression specialization for signed comparison operations `<` and `<=`.
#[test] #[test]
fn specialize_by_signed_comparison_op() { fn specialize_by_signed_comparison_op() {
let mut base_state = State::new(&register("RSP"), Tid::new("func_tid")); let mut base_state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let interval = IntervalDomain::mock(5, 10); let interval = IntervalDomain::mock(5, 10);
base_state.set_register(&register("RAX"), interval.into()); base_state.set_register(&register("RAX"), interval.into());
...@@ -716,7 +724,7 @@ fn specialize_by_signed_comparison_op() { ...@@ -716,7 +724,7 @@ fn specialize_by_signed_comparison_op() {
/// Test expression specialization for unsigned comparison operations `<` and `<=`. /// Test expression specialization for unsigned comparison operations `<` and `<=`.
#[test] #[test]
fn specialize_by_unsigned_comparison_op() { fn specialize_by_unsigned_comparison_op() {
let mut base_state = State::new(&register("RSP"), Tid::new("func_tid")); let mut base_state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let interval = IntervalDomain::mock(-5, 10); let interval = IntervalDomain::mock(-5, 10);
base_state.set_register(&register("RAX"), interval.into()); base_state.set_register(&register("RAX"), interval.into());
...@@ -835,7 +843,7 @@ fn specialize_by_unsigned_comparison_op() { ...@@ -835,7 +843,7 @@ fn specialize_by_unsigned_comparison_op() {
#[test] #[test]
fn specialize_pointer_comparison() { fn specialize_pointer_comparison() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let interval = IntervalDomain::mock(-5, 10); let interval = IntervalDomain::mock(-5, 10);
state.set_register( state.set_register(
&register("RAX"), &register("RAX"),
...@@ -869,7 +877,7 @@ fn specialize_pointer_comparison() { ...@@ -869,7 +877,7 @@ fn specialize_pointer_comparison() {
/// (resulting in two-sided widenings) instead of one-sided bounds. /// (resulting in two-sided widenings) instead of one-sided bounds.
#[test] #[test]
fn test_widening_hints_after_pointer_specialization() { fn test_widening_hints_after_pointer_specialization() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
state.set_register( state.set_register(
&register("RAX"), &register("RAX"),
Data::from_target(new_id("func_tid", "RSP"), Bitvector::from_i64(10).into()), Data::from_target(new_id("func_tid", "RSP"), Bitvector::from_i64(10).into()),
...@@ -912,7 +920,7 @@ fn test_widening_hints_after_pointer_specialization() { ...@@ -912,7 +920,7 @@ fn test_widening_hints_after_pointer_specialization() {
#[test] #[test]
fn test_check_def_for_null_dereferences() { fn test_check_def_for_null_dereferences() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid")); let mut state = State::new(&register("RSP"), Tid::new("func_tid"), BTreeSet::new());
let var_rax = Variable::mock("RAX", 8); let var_rax = Variable::mock("RAX", 8);
let def = Def::load( let def = Def::load(
"load_def", "load_def",
...@@ -947,7 +955,7 @@ fn from_fn_sig() { ...@@ -947,7 +955,7 @@ fn from_fn_sig() {
let fn_sig = FunctionSignature::mock_x64(); let fn_sig = FunctionSignature::mock_x64();
let state = State::from_fn_sig(&fn_sig, &Variable::mock("RSP", 8), Tid::new("func")); let state = State::from_fn_sig(&fn_sig, &Variable::mock("RSP", 8), Tid::new("func"));
assert_eq!(state.memory.get_num_objects(), 2); assert_eq!(state.memory.get_num_objects(), 3);
assert_eq!( assert_eq!(
*state.memory.get_object(&new_id("func", "RSI")).unwrap(), *state.memory.get_object(&new_id("func", "RSI")).unwrap(),
AbstractObject::new(None, ByteSize::new(8)) AbstractObject::new(None, ByteSize::new(8))
...@@ -969,7 +977,8 @@ fn from_fn_sig() { ...@@ -969,7 +977,8 @@ fn from_fn_sig() {
#[test] #[test]
fn add_param_object_from_callee() { fn add_param_object_from_callee() {
let global_memory = RuntimeMemoryImage::empty(true); let global_memory = RuntimeMemoryImage::empty(true);
let mut generic_state = State::new(&Variable::mock("RSP", 8), Tid::new("func")); let mut generic_state =
State::new(&Variable::mock("RSP", 8), Tid::new("func"), BTreeSet::new());
generic_state generic_state
.write_to_address( .write_to_address(
&Expression::Var(Variable::mock("RSP", 8)).plus_const(-8), &Expression::Var(Variable::mock("RSP", 8)).plus_const(-8),
......
use super::*; use super::*;
use crate::abstract_domain::TryToBitvec; use crate::abstract_domain::{TryToBitvec, TryToInterval};
use crossbeam_channel::Sender; use crossbeam_channel::Sender;
/// Compute various statistics about how exact memory accesses through `Load` and `Store` instructions are tracked. /// Compute various statistics about how exact memory accesses through `Load` and `Store` instructions are tracked.
...@@ -14,9 +14,15 @@ struct MemAccessStats { ...@@ -14,9 +14,15 @@ struct MemAccessStats {
contains_top_flag: u64, contains_top_flag: u64,
empty_errors: u64, empty_errors: u64,
is_only_top: u64, is_only_top: u64,
global_mem_access: u64, global_mem_access: u64,
global_mem_ro_access: u64,
global_mem_writeable_access: u64,
global_mem_error_write_access: u64,
global_mem_interval_error: u64,
current_stack_access: u64, current_stack_access: u64,
non_current_stack_access: u64, other_mem_object_access: u64,
exact_target_with_exact_offset: u64, exact_target_with_exact_offset: u64,
exact_target_with_top_offset: u64, exact_target_with_top_offset: u64,
} }
...@@ -27,7 +33,7 @@ impl MemAccessStats { ...@@ -27,7 +33,7 @@ impl MemAccessStats {
} }
fn ops_with_exact_target_known(&self) -> u64 { fn ops_with_exact_target_known(&self) -> u64 {
self.global_mem_access + self.current_stack_access + self.non_current_stack_access self.global_mem_access + self.current_stack_access + self.other_mem_object_access
} }
fn print_general_stats(&self, log_collector: Sender<LogThreadMsg>) { fn print_general_stats(&self, log_collector: Sender<LogThreadMsg>) {
...@@ -37,12 +43,14 @@ impl MemAccessStats { ...@@ -37,12 +43,14 @@ impl MemAccessStats {
\t{:.2}% tracked,\n\ \t{:.2}% tracked,\n\
\t{:.2}% partially tracked,\n\ \t{:.2}% partially tracked,\n\
\t{:.2}% untracked,\n\ \t{:.2}% untracked,\n\
\t{:.2}% errors.", \t{:.2}% errors (empty value),\n\
\t{:.2}% errors (invalid global address, e.g. Null pointer dereference),",
self.all_mem_ops, self.all_mem_ops,
self.tracked_mem_ops() as f64 / all_mem_ops * 100., self.tracked_mem_ops() as f64 / all_mem_ops * 100.,
self.contains_top_flag as f64 / all_mem_ops * 100., self.contains_top_flag as f64 / all_mem_ops * 100.,
self.is_only_top as f64 / all_mem_ops * 100., self.is_only_top as f64 / all_mem_ops * 100.,
self.empty_errors as f64 / all_mem_ops * 100., self.empty_errors as f64 / all_mem_ops * 100.,
self.global_mem_interval_error as f64 / all_mem_ops * 100.,
); );
let log_msg = LogMessage::new_info(msg).source("Pointer Inference"); let log_msg = LogMessage::new_info(msg).source("Pointer Inference");
let _ = log_collector.send(LogThreadMsg::Log(log_msg)); let _ = log_collector.send(LogThreadMsg::Log(log_msg));
...@@ -53,15 +61,23 @@ impl MemAccessStats { ...@@ -53,15 +61,23 @@ impl MemAccessStats {
let msg = format!( let msg = format!(
"{} ({:.2}%) memory operations with exactly known target. Of these are\n\ "{} ({:.2}%) memory operations with exactly known target. Of these are\n\
\t{:.2}% global memory access,\n\ \t{:.2}% global memory access,\n\
\t\t{:.2}% global read-only memory access,\n\
\t\t{:.2}% global writeable memory access,\n\
\t\t{:.2}% global writeable memory access (mishandled by analysis),\n\
\t{:.2}% current stack access,\n\ \t{:.2}% current stack access,\n\
\t{:.2}% other (heap or stack) access,\n\ \t{:.2}% access to memory of unknown type,\n\
\t{:.2}% with constant offset,\n\ \t{:.2}% with constant offset,\n\
\t{:.2}% with unknown offset.", \t{:.2}% with unknown offset.",
self.ops_with_exact_target_known(), self.ops_with_exact_target_known(),
self.ops_with_exact_target_known() as f64 / all_mem_ops * 100., self.ops_with_exact_target_known() as f64 / all_mem_ops * 100.,
self.global_mem_access as f64 / self.ops_with_exact_target_known() as f64 * 100., self.global_mem_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.global_mem_ro_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.global_mem_writeable_access as f64 / self.ops_with_exact_target_known() as f64
* 100.,
self.global_mem_error_write_access as f64 / self.ops_with_exact_target_known() as f64
* 100.,
self.current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100., self.current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.non_current_stack_access as f64 / self.ops_with_exact_target_known() as f64 * 100., self.other_mem_object_access as f64 / self.ops_with_exact_target_known() as f64 * 100.,
self.exact_target_with_exact_offset as f64 / self.ops_with_exact_target_known() as f64 self.exact_target_with_exact_offset as f64 / self.ops_with_exact_target_known() as f64
* 100., * 100.,
self.exact_target_with_top_offset as f64 / self.ops_with_exact_target_known() as f64 self.exact_target_with_top_offset as f64 / self.ops_with_exact_target_known() as f64
...@@ -71,7 +87,7 @@ impl MemAccessStats { ...@@ -71,7 +87,7 @@ impl MemAccessStats {
let _ = log_collector.send(LogThreadMsg::Log(log_msg)); let _ = log_collector.send(LogThreadMsg::Log(log_msg));
} }
fn count_for_def(&mut self, state: &State, def: &Term<Def>) { fn count_for_def(&mut self, state: &State, def: &Term<Def>, global_mem: &RuntimeMemoryImage) {
use crate::abstract_domain::AbstractDomain; use crate::abstract_domain::AbstractDomain;
match &def.term { match &def.term {
Def::Load { address, .. } | Def::Store { address, .. } => { Def::Load { address, .. } | Def::Store { address, .. } => {
...@@ -88,16 +104,30 @@ impl MemAccessStats { ...@@ -88,16 +104,30 @@ impl MemAccessStats {
if let Some(offset) = address_val.get_if_absolute_value() { if let Some(offset) = address_val.get_if_absolute_value() {
self.global_mem_access += 1; self.global_mem_access += 1;
if offset.try_to_bitvec().is_ok() { if let Ok((start_address, end_address)) = offset.try_to_offset_interval() {
self.exact_target_with_exact_offset += 1; self.exact_target_with_exact_offset += 1;
if let Ok(true) = global_mem
.is_interval_writeable(start_address as u64, end_address as u64)
{
self.global_mem_error_write_access += 1;
} else if let Ok(true) = global_mem
.is_interval_readable(start_address as u64, end_address as u64)
{
self.global_mem_ro_access += 1;
} else {
self.global_mem_interval_error += 1;
}
} else if offset.is_top() { } else if offset.is_top() {
self.exact_target_with_top_offset += 1; self.exact_target_with_top_offset += 1;
} }
} else if let Some((id, offset)) = address_val.get_if_unique_target() { } else if let Some((id, offset)) = address_val.get_if_unique_target() {
if *id == state.stack_id { if *id == state.stack_id {
self.current_stack_access += 1; self.current_stack_access += 1;
} else if *id == state.get_global_mem_id() {
self.global_mem_access += 1;
self.global_mem_writeable_access += 1;
} else { } else {
self.non_current_stack_access += 1; self.other_mem_object_access += 1;
} }
if offset.try_to_bitvec().is_ok() { if offset.try_to_bitvec().is_ok() {
self.exact_target_with_exact_offset += 1; self.exact_target_with_exact_offset += 1;
...@@ -116,12 +146,13 @@ impl MemAccessStats { ...@@ -116,12 +146,13 @@ impl MemAccessStats {
let mut stats = Self::default(); let mut stats = Self::default();
let graph = pointer_inference.computation.get_graph(); let graph = pointer_inference.computation.get_graph();
let context = pointer_inference.get_context(); let context = pointer_inference.get_context();
let global_memory = &context.project.runtime_memory_image;
for (node_id, node) in graph.node_references() { for (node_id, node) in graph.node_references() {
if let Node::BlkStart(block, _sub) = node { if let Node::BlkStart(block, _sub) = node {
if let Some(state) = pointer_inference.computation.get_node_value(node_id) { if let Some(state) = pointer_inference.computation.get_node_value(node_id) {
let mut state = state.unwrap_value().clone(); let mut state = state.unwrap_value().clone();
for def in &block.term.defs { for def in &block.term.defs {
stats.count_for_def(&state, def); stats.count_for_def(&state, def, global_memory);
state = match context.update_def(&state, def) { state = match context.update_def(&state, def) {
Some(new_state) => new_state, Some(new_state) => new_state,
None => break, None => break,
......
...@@ -6,11 +6,15 @@ use crate::{ ...@@ -6,11 +6,15 @@ use crate::{
string_abstraction::tests::mock_project_with_intraprocedural_control_flow, string_abstraction::tests::mock_project_with_intraprocedural_control_flow,
}, },
}; };
use std::collections::BTreeSet;
impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> State<T> { impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> State<T> {
pub fn mock_with_default_pi_state(current_sub: Term<Sub>) -> Self { pub fn mock_with_default_pi_state(current_sub: Term<Sub>) -> Self {
let pi_state = let pi_state = PointerInferenceState::new(
PointerInferenceState::new(&Variable::mock("sp", 4 as u64), current_sub.tid.clone()); &Variable::mock("sp", 4 as u64),
current_sub.tid.clone(),
BTreeSet::new(),
);
State { State {
unassigned_return_pointer: HashSet::new(), unassigned_return_pointer: HashSet::new(),
variable_to_pointer_map: HashMap::new(), variable_to_pointer_map: HashMap::new(),
......
use super::*; use super::*;
use std::collections::BTreeSet;
impl<'a> Context<'a> { impl<'a> Context<'a> {
/// Create a mock context. /// Create a mock context.
...@@ -27,7 +28,8 @@ fn test_compute_size_value_of_malloc_like_call() { ...@@ -27,7 +28,8 @@ fn test_compute_size_value_of_malloc_like_call() {
use crate::analysis::pointer_inference::State as PiState; use crate::analysis::pointer_inference::State as PiState;
let project = Project::mock_x64(); let project = Project::mock_x64();
let mut pi_results = PointerInference::mock(&project); let mut pi_results = PointerInference::mock(&project);
let mut malloc_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("func")); let mut malloc_state =
PiState::new(&Variable::mock("RSP", 8), Tid::new("func"), BTreeSet::new());
malloc_state.set_register(&Variable::mock("RDI", 8), Bitvector::from_i64(3).into()); malloc_state.set_register(&Variable::mock("RDI", 8), Bitvector::from_i64(3).into());
*pi_results.get_mut_states_at_tids() = HashMap::from([(Tid::new("malloc_call"), malloc_state)]); *pi_results.get_mut_states_at_tids() = HashMap::from([(Tid::new("malloc_call"), malloc_state)]);
let malloc_symbol = ExternSymbol::mock_x64("malloc"); let malloc_symbol = ExternSymbol::mock_x64("malloc");
......
...@@ -168,9 +168,9 @@ impl AbstractDomain for State { ...@@ -168,9 +168,9 @@ impl AbstractDomain for State {
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use crate::intermediate_representation::Variable;
use super::*; use super::*;
use crate::intermediate_representation::Variable;
use std::collections::BTreeSet;
#[test] #[test]
fn test_check_address_for_use_after_free() { fn test_check_address_for_use_after_free() {
...@@ -225,7 +225,7 @@ pub mod tests { ...@@ -225,7 +225,7 @@ pub mod tests {
AbstractIdentifier::mock("obj_id", "RAX", 8), AbstractIdentifier::mock("obj_id", "RAX", 8),
Bitvector::from_i64(0).into(), Bitvector::from_i64(0).into(),
); );
let pi_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("call")); let pi_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("call"), BTreeSet::new());
// Check that the parameter is correctly marked as freed in the state. // Check that the parameter is correctly marked as freed in the state.
assert!(state assert!(state
.handle_param_of_free_call(&Tid::new("free_call"), &param, &pi_state) .handle_param_of_free_call(&Tid::new("free_call"), &param, &pi_state)
...@@ -251,7 +251,7 @@ pub mod tests { ...@@ -251,7 +251,7 @@ pub mod tests {
AbstractIdentifier::mock("callee_obj_tid", "RAX", 8), AbstractIdentifier::mock("callee_obj_tid", "RAX", 8),
ObjectState::Dangling(Tid::new("free_tid")), ObjectState::Dangling(Tid::new("free_tid")),
); );
let pi_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("call")); let pi_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("call"), BTreeSet::new());
let id_replacement_map = BTreeMap::from([( let id_replacement_map = BTreeMap::from([(
AbstractIdentifier::mock("callee_obj_tid", "RAX", 8), AbstractIdentifier::mock("callee_obj_tid", "RAX", 8),
Data::from_target( Data::from_target(
......
...@@ -27,6 +27,7 @@ use crate::prelude::*; ...@@ -27,6 +27,7 @@ use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage}; use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::{get_callsites, get_symbol_map}; use crate::utils::symbol_utils::{get_callsites, get_symbol_map};
use crate::CweModule; use crate::CweModule;
use std::collections::BTreeSet;
/// The module name and version /// The module name and version
pub static CWE_MODULE: CweModule = CweModule { pub static CWE_MODULE: CweModule = CweModule {
...@@ -46,7 +47,7 @@ pub struct Config { ...@@ -46,7 +47,7 @@ pub struct Config {
/// assuming nothing is known about the state at the start of the block. /// assuming nothing is known about the state at the start of the block.
fn compute_block_end_state(project: &Project, block: &Term<Blk>) -> State { fn compute_block_end_state(project: &Project, block: &Term<Blk>) -> State {
let stack_register = &project.stack_pointer_register; let stack_register = &project.stack_pointer_register;
let mut state = State::new(stack_register, block.tid.clone()); let mut state = State::new(stack_register, block.tid.clone(), BTreeSet::new());
for def in block.term.defs.iter() { for def in block.term.defs.iter() {
match &def.term { match &def.term {
......
...@@ -376,6 +376,7 @@ mod tests { ...@@ -376,6 +376,7 @@ mod tests {
use super::*; use super::*;
use crate::abstract_domain::*; use crate::abstract_domain::*;
use crate::analysis::pointer_inference::ValueDomain; use crate::analysis::pointer_inference::ValueDomain;
use std::collections::BTreeSet;
impl State { impl State {
pub fn mock() -> State { pub fn mock() -> State {
...@@ -396,7 +397,8 @@ mod tests { ...@@ -396,7 +397,8 @@ mod tests {
size: ByteSize::new(8), size: ByteSize::new(8),
data_type: None, data_type: None,
}; };
let pi_state = PointerInferenceState::new(&register("RSP"), Tid::new("func")); let pi_state =
PointerInferenceState::new(&register("RSP"), Tid::new("func"), BTreeSet::new());
let symbol = ExternSymbol { let symbol = ExternSymbol {
tid: Tid::new("extern_symbol".to_string()), tid: Tid::new("extern_symbol".to_string()),
addresses: vec![], addresses: vec![],
......
...@@ -29,6 +29,7 @@ use crate::prelude::*; ...@@ -29,6 +29,7 @@ use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage}; use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::{get_callsites, get_symbol_map}; use crate::utils::symbol_utils::{get_callsites, get_symbol_map};
use crate::CweModule; use crate::CweModule;
use std::collections::BTreeSet;
/// The module name and version /// The module name and version
pub static CWE_MODULE: CweModule = CweModule { pub static CWE_MODULE: CweModule = CweModule {
...@@ -51,7 +52,7 @@ fn get_umask_permission_arg( ...@@ -51,7 +52,7 @@ fn get_umask_permission_arg(
project: &Project, project: &Project,
) -> Result<u64, Error> { ) -> Result<u64, Error> {
let stack_register = &project.stack_pointer_register; let stack_register = &project.stack_pointer_register;
let mut state = State::new(stack_register, block.tid.clone()); let mut state = State::new(stack_register, block.tid.clone(), BTreeSet::new());
for def in block.term.defs.iter() { for def in block.term.defs.iter() {
match &def.term { match &def.term {
......
use std::collections::BTreeSet;
use crate::{ use crate::{
abstract_domain::IntervalDomain, abstract_domain::IntervalDomain,
intermediate_representation::{Bitvector, Tid}, intermediate_representation::{Bitvector, Tid},
...@@ -6,7 +8,11 @@ use crate::{ ...@@ -6,7 +8,11 @@ use crate::{
use super::*; use super::*;
fn mock_pi_state() -> PointerInferenceState { fn mock_pi_state() -> PointerInferenceState {
PointerInferenceState::new(&Variable::mock("RSP", 8 as u64), Tid::new("func")) PointerInferenceState::new(
&Variable::mock("RSP", 8 as u64),
Tid::new("func"),
BTreeSet::new(),
)
} }
#[test] #[test]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment