Unverified Commit a16e6589 by Enkelmann Committed by GitHub

add global memory tracking to FunctionSignature analysis (#358)

parent 0359ea16
......@@ -86,6 +86,11 @@ impl AbstractIdentifier {
AbstractIdentifier::new(time.clone(), location)
}
/// Create an abstract identifier from an address into global memory.
pub fn from_global_address(time: &Tid, address: &Bitvector) -> AbstractIdentifier {
AbstractIdentifier::new(time.clone(), AbstractLocation::from_global_address(address))
}
/// Create a new abstract identifier
/// by pushing the given path hint to the array of path hints of `self`.
/// Returns an error if the path hint is already contained in the path hints of `self`.
......@@ -106,11 +111,13 @@ impl AbstractIdentifier {
}
/// Get the register associated to the abstract location.
/// Panics if the abstract location is a memory location and not a register.
/// Panics if the abstract location is not a register but a memory location.
pub fn unwrap_register(&self) -> &Variable {
match &self.location {
AbstractLocation::Register(var) => var,
AbstractLocation::Pointer(_, _) => panic!("Abstract location is not a register."),
AbstractLocation::GlobalAddress { .. }
| AbstractLocation::GlobalPointer(_, _)
| AbstractLocation::Pointer(_, _) => panic!("Abstract location is not a register."),
}
}
......@@ -153,18 +160,36 @@ impl std::fmt::Display for AbstractIdentifier {
pub enum AbstractLocation {
/// The location is given by a register.
Register(Variable),
/// The value itself is a constant address to global memory.
/// Note that the `size` is the size of the pointer and not the size
/// of the value residing at the specific address in global memory.
GlobalAddress {
/// The address in global memory.
address: u64,
/// The byte size of the address (not the pointed-to value!).
size: ByteSize,
},
/// The location is in memory.
/// One needs to follow the pointer in the given register
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(Variable, AbstractMemoryLocation),
/// The location is in memory.
/// One needs to follow the pointer located at the given global address
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
GlobalPointer(u64, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(var) => write!(formatter, "{}", var.name),
Self::GlobalAddress { address, size: _ } => write!(formatter, "0x{:x}", address),
Self::Pointer(var, location) => write!(formatter, "{}->{}", var.name, location),
Self::GlobalPointer(address, location) => {
write!(formatter, "0x{:x}->{}", address, location)
}
}
}
}
......@@ -193,11 +218,23 @@ impl AbstractLocation {
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Create an abstract location representing an address pointing to global memory.
pub fn from_global_address(address: &Bitvector) -> AbstractLocation {
let size = address.bytesize();
let address = address
.try_to_u64()
.expect("Global address larger than 64 bits encountered.");
AbstractLocation::GlobalAddress { address, size }
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::Pointer(_pointer_var, mem_location) => mem_location.bytesize(),
Self::GlobalAddress { size, .. } => *size,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
mem_location.bytesize()
}
}
}
}
......
//! Generate call graphs out of a program term.
use std::collections::HashMap;
use crate::intermediate_representation::*;
use petgraph::graph::DiGraph;
/// The graph type of a call graph
pub type CallGraph<'a> = DiGraph<Tid, &'a Term<Jmp>>;
/// Generate a call graph for the given program.
///
/// The nodes of the returned graph correspond to the TIDs of functions in the program.
/// Edges are jump terms of call operations.
///
/// Note that calls to external symbols are not represented in the graph,
/// i.e. there are neither nodes nor edges representing (calls to) external symbols in the graph.
/// Also, there are currently no edges for indirect calls,
/// because a corresponding analysis for resolving indirect calls is not implemented yet.
pub fn get_program_callgraph(program: &Term<Program>) -> CallGraph {
let mut callgraph = CallGraph::new();
let mut tid_to_node_index_map = HashMap::new();
for sub_tid in program.term.subs.keys() {
let node_index = callgraph.add_node(sub_tid.clone());
tid_to_node_index_map.insert(sub_tid.clone(), node_index);
}
for sub in program.term.subs.values() {
let source_index = tid_to_node_index_map.get(&sub.tid).unwrap();
for block in &sub.term.blocks {
for jump in &block.term.jmps {
if let Jmp::Call { target, .. } = &jump.term {
if let Some(target_index) = tid_to_node_index_map.get(target) {
callgraph.add_edge(*source_index, *target_index, jump);
}
}
}
}
}
callgraph
}
#[cfg(test)]
pub mod tests {
use super::*;
#[test]
fn test_get_program_callgraph() {
// Create a program with 2 functions and one call between them
let mut project = Project::mock_x64();
let mut caller = Sub::mock("caller");
let callee = Sub::mock("callee");
let call = Jmp::Call {
target: Tid::new("callee"),
return_: None,
};
let mut call_block = Blk::mock();
call_block.term.jmps.push(Term {
tid: Tid::new("call"),
term: call,
});
caller.term.blocks.push(call_block);
project.program.term.subs.insert(Tid::new("caller"), caller);
project.program.term.subs.insert(Tid::new("callee"), callee);
// Test correctness of the call graph
let callgraph = get_program_callgraph(&project.program);
assert_eq!(callgraph.node_indices().len(), 2);
assert_eq!(callgraph.edge_indices().len(), 1);
let (start, end) = callgraph
.edge_endpoints(callgraph.edge_indices().next().unwrap())
.unwrap();
assert_eq!(callgraph[start], Tid::new("caller"));
assert_eq!(callgraph[end], Tid::new("callee"));
}
}
......@@ -112,3 +112,26 @@ fn test_call_stub_handling() {
);
assert_eq!(params.len(), 5);
}
#[test]
fn test_get_global_mem_address() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let context = Context::new(&project, &graph);
// Check global address from abstract ID
let global_address_id: DataDomain<BitvectorDomain> = DataDomain::from_target(
AbstractIdentifier::from_global_address(&Tid::new("fn_tid"), &Bitvector::from_i32(0x2000)),
Bitvector::from_i32(0x2).into(),
);
let result = context.get_global_mem_address(&global_address_id);
assert_eq!(result, Some(Bitvector::from_i32(0x2002)));
// Check global address from absolute value
let global_address_const = Bitvector::from_i32(0x2003).into();
let result = context.get_global_mem_address(&global_address_const);
assert_eq!(result, Some(Bitvector::from_i32(0x2003)));
// Check global address not returned if it may not be unique
let value = global_address_id.merge(&global_address_const);
let result = context.get_global_mem_address(&value);
assert!(result.is_none());
}
......@@ -4,6 +4,7 @@
//! although only stack accesses with known, constant offset are processed.
//! Accesses to potential function parameters are collected together with the type of the access
//! (is the value read, dereferenced for read access or dereferenced for write access).
//! Accesses to constant addresses that may correspond to global variables are also tracked.
//!
//! Known limitations of the analysis:
//! * The analysis is an overapproximation in the sense that it may generate more input parameters
......@@ -13,8 +14,9 @@
//! For functions that use other registers
//! than those in the standard calling convention for parameter passing
//! the results of this analysis will be wrong.
//! * Parameters that are used as input values for variadic functions (e.g. sprintf) may be missed
//! since detection of variadic function parameters is not yet implemented for this analysis.
//! * Parameters that are used as input values for variadic functions may be missed.
//! Some variadic functions are stubbed, i.e. parameter recognition should work for these.
//! But not all variadic functions are stubbed.
//! * If only a part (e.g. a single byte) of a stack parameter is accessed instead of the whole parameter
//! then a duplicate stack parameter may be generated.
//! A proper sanitation for this case is not yet implemented,
......@@ -42,6 +44,8 @@ mod state;
use state::State;
mod access_pattern;
pub use access_pattern::AccessPattern;
mod global_var_propagation;
use global_var_propagation::propagate_globals;
pub mod stubs;
/// Generate the computation object for the fixpoint computation
......@@ -152,6 +156,8 @@ pub fn compute_function_signatures<'a>(
);
}
}
// Propagate globals in bottom-up direction in the call graph
propagate_globals(project, &mut fn_sig_map);
(fn_sig_map, logs)
}
......@@ -162,6 +168,9 @@ pub fn compute_function_signatures<'a>(
pub struct FunctionSignature {
/// The parameters of the function together with their access patterns.
pub parameters: HashMap<Arg, AccessPattern>,
/// Values in writeable global memory accessed by the function.
/// Does not contain indirectly accessed values, e.g. values accessed by callees of this function.
pub global_parameters: HashMap<u64, AccessPattern>,
}
impl FunctionSignature {
......@@ -169,6 +178,7 @@ impl FunctionSignature {
pub fn new() -> Self {
Self {
parameters: HashMap::new(),
global_parameters: HashMap::new(),
}
}
......@@ -186,8 +196,12 @@ impl FunctionSignature {
stack_params_total_size
}
/// Merge the parameter list of `self` with the given parameter list.
fn merge_parameter_list(&mut self, params: &[(Arg, AccessPattern)]) {
/// Merge the parameter list and the global parameter list of `self` with the given lists.
fn merge_parameter_lists(
&mut self,
params: &[(Arg, AccessPattern)],
global_params: &[(u64, AccessPattern)],
) {
for (arg, sig_new) in params {
if let Some(sig_self) = self.parameters.get_mut(arg) {
*sig_self = sig_self.merge(sig_new);
......@@ -195,12 +209,20 @@ impl FunctionSignature {
self.parameters.insert(arg.clone(), *sig_new);
}
}
for (address, sig_new) in global_params {
if let Some(sig_self) = self.global_parameters.get_mut(address) {
*sig_self = sig_self.merge(sig_new);
} else {
self.global_parameters.insert(*address, *sig_new);
}
}
}
/// Merge the function signature with the signature extracted from the given state.
fn merge_with_fn_sig_of_state(&mut self, state: &State) {
let params = state.get_params_of_current_function();
self.merge_parameter_list(&params);
let global_params = state.get_global_mem_params_of_current_function();
self.merge_parameter_lists(&params, &global_params);
}
/// Sanitize the function signature:
......@@ -269,7 +291,10 @@ pub mod tests {
write_access_pattern,
),
]);
FunctionSignature { parameters }
FunctionSignature {
parameters,
global_parameters: HashMap::new(),
}
}
}
}
......@@ -195,6 +195,14 @@ impl State {
}
}
/// Add an abstract ID to the set of tracked IDs if it is not already tracked.
/// No access flags are set if the ID was not already tracked.
pub fn add_id_to_tracked_ids(&mut self, id: &AbstractIdentifier) {
if self.tracked_ids.get(id).is_none() {
self.tracked_ids.insert(id.clone(), AccessPattern::new());
}
}
/// Get the value located at a positive stack offset.
///
/// If no corresponding stack parameter ID exists for the value,
......@@ -308,30 +316,75 @@ impl State {
}
}
/// Set the read and dereferenced flag for every ID
/// Set the read and dereferenced flag for every tracked ID
/// that may be referenced when computing the value of the expression.
pub fn set_deref_flag_for_input_ids_of_expression(&mut self, expression: &Expression) {
for register in expression.input_vars() {
for id in self.get_register(register).referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
object.set_read_flag();
object.set_dereference_flag();
}
}
self.set_deref_flag_for_contained_ids(&self.get_register(register));
}
}
/// Set the read and mutably dereferenced flag for every ID
/// Set the read and mutably dereferenced flag for every tracked ID
/// that may be referenced when computing the value of the expression.
pub fn set_mutable_deref_flag_for_input_ids_of_expression(&mut self, expression: &Expression) {
for register in expression.input_vars() {
for id in self.get_register(register).referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
object.set_read_flag();
object.set_mutably_dereferenced_flag();
self.set_deref_mut_flag_for_contained_ids(&self.get_register(register));
}
}
/// Set the read and dereferenced flag for every tracked ID contained in the given value.
pub fn set_deref_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) {
for id in value.referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
object.set_read_flag();
object.set_dereference_flag();
}
}
}
/// Set the read and mutably dereferenced flag for every tracked ID contained in the given value.
pub fn set_deref_mut_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) {
for id in value.referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
object.set_read_flag();
object.set_mutably_dereferenced_flag();
}
}
}
/// If the absolute value part of the given value might represent an address into writeable global memory
/// then substitute it by a relative value relative to a new global memory ID.
///
/// The generated ID will be also added to the tracked IDs of `self`.
/// However, no access flags will be set for the newly generated ID.
pub fn substitute_global_mem_address(
&mut self,
mut value: DataDomain<BitvectorDomain>,
global_memory: &RuntimeMemoryImage,
) -> DataDomain<BitvectorDomain> {
if value.bytesize() != self.stack_id.bytesize() {
// Only pointer-sized values can represent global addresses.
return value;
} else if let Some(absolute_value) = value.get_absolute_value() {
if let Ok(bitvec) = absolute_value.try_to_bitvec() {
if let Ok(true) = global_memory.is_address_writeable(&bitvec) {
// The absolute value might be a pointer to global memory.
let global_id = AbstractIdentifier::from_global_address(
self.get_current_function_tid(),
&bitvec,
);
// Add the ID to the set of tracked IDs for the state.
self.add_id_to_tracked_ids(&global_id);
// Convert the absolute value to a relative value (relative the new global ID).
value = value.merge(&DataDomain::from_target(
global_id,
Bitvector::zero(value.bytesize().into()).into(),
));
value.set_absolute_value(None);
}
}
}
value
}
}
......
......@@ -10,8 +10,9 @@ impl State {
call_tid: &Tid,
extern_symbol: &ExternSymbol,
calling_convention: &CallingConvention,
global_memory: &RuntimeMemoryImage,
) {
let input_ids = self.collect_input_ids_of_call(&extern_symbol.parameters);
let input_ids = self.collect_input_ids_of_call(&extern_symbol.parameters, global_memory);
self.clear_non_callee_saved_register(&calling_convention.callee_saved_register);
self.generate_return_values_for_call(&input_ids, &extern_symbol.return_values, call_tid);
}
......@@ -26,6 +27,7 @@ impl State {
&mut self,
call: &Term<Jmp>,
calling_convention: &CallingConvention,
global_memory: &RuntimeMemoryImage,
) {
let mut parameters =
generate_args_from_registers(&calling_convention.integer_parameter_register);
......@@ -43,22 +45,24 @@ impl State {
data_type: None,
});
}
let input_ids = self.collect_input_ids_of_call(&parameters);
let input_ids = self.collect_input_ids_of_call(&parameters, global_memory);
self.clear_non_callee_saved_register(&calling_convention.callee_saved_register);
self.generate_return_values_for_call(&input_ids, &return_register, &call.tid);
}
/// Get all input IDs referenced in the parameters of a call.
/// Marks every input ID as accessed (with access flags for unknown access)
/// and generates stack parameter IDs for the current function if necessary.
fn collect_input_ids_of_call(&mut self, parameters: &[Arg]) -> BTreeSet<AbstractIdentifier> {
/// Marks every input ID as accessed (with access flags for unknown access).
/// Also generates stack parameter IDs and global memory IDs for the current function if necessary.
fn collect_input_ids_of_call(
&mut self,
parameters: &[Arg],
global_memory: &RuntimeMemoryImage,
) -> BTreeSet<AbstractIdentifier> {
let mut input_ids = BTreeSet::new();
for input_param in parameters {
for (id, offset) in self
.eval_parameter_arg(input_param)
.get_relative_values()
.iter()
{
let param = self.eval_parameter_arg(input_param);
let param = self.substitute_global_mem_address(param, global_memory);
for (id, offset) in param.get_relative_values() {
input_ids.insert(id.clone());
// If the relative value points to the stack we also have to collect all IDs contained in the pointed-to value.
if *id == self.stack_id {
......@@ -131,29 +135,55 @@ impl State {
let mut params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() {
if access_pattern.is_accessed() {
params.push((generate_arg_from_abstract_id(id), *access_pattern));
} else if matches!(id.get_location(), &AbstractLocation::Pointer { .. }) {
// This is a stack parameter.
// If it was only loaded into a register but otherwise not used, then the read-flag needs to be set.
let mut access_pattern = *access_pattern;
access_pattern.set_read_flag();
params.push((generate_arg_from_abstract_id(id), access_pattern));
if let Ok(param_arg) = generate_param_arg_from_abstract_id(id) {
if access_pattern.is_accessed() {
params.push((param_arg, *access_pattern));
} else if matches!(id.get_location(), &AbstractLocation::Pointer { .. }) {
// This is a stack parameter.
// If it was only loaded into a register but otherwise not used, then the read-flag needs to be set.
let mut access_pattern = *access_pattern;
access_pattern.set_read_flag();
params.push((param_arg, access_pattern));
}
}
}
}
params
}
/// Return a list of all potential global memory addresses
/// for which any type of access has been tracked by the current state.
pub fn get_global_mem_params_of_current_function(&self) -> Vec<(u64, AccessPattern)> {
let mut global_params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() && access_pattern.is_accessed() {
match id.get_location() {
AbstractLocation::GlobalPointer(address, _)
| AbstractLocation::GlobalAddress { address, .. } => {
global_params.push((*address, *access_pattern));
}
AbstractLocation::Pointer(_, _) | AbstractLocation::Register(_) => (),
}
}
}
global_params
}
/// Merges the access patterns of callee parameters with those of the caller (represented by `self`).
/// The result represents the access patterns after returning to the caller and is written to `self`.
///
/// If a parameter is a pointer to the stack frame of self, it is dereferenced
/// to set the access patterns of the target.
/// Note that this may create new stack parameter objects for self.
pub fn merge_parameter_access(&mut self, params: &[(Arg, AccessPattern)]) {
pub fn merge_parameter_access(
&mut self,
params: &[(Arg, AccessPattern)],
global_memory: &RuntimeMemoryImage,
) {
for (parameter, call_access_pattern) in params {
for (id, offset) in self.eval_parameter_arg(parameter).get_relative_values() {
let param_value = self.eval_parameter_arg(parameter);
let param_value = self.substitute_global_mem_address(param_value, global_memory);
for (id, offset) in param_value.get_relative_values() {
if let Some(object) = self.tracked_ids.get_mut(id) {
*object = object.merge(call_access_pattern);
}
......@@ -188,7 +218,7 @@ impl State {
/// then return an argument object corresponding to the parameter.
pub fn get_arg_corresponding_to_id(&self, id: &AbstractIdentifier) -> Option<Arg> {
if id.get_tid() == self.stack_id.get_tid() {
Some(generate_arg_from_abstract_id(id))
generate_param_arg_from_abstract_id(id).ok()
} else {
None
}
......@@ -205,19 +235,23 @@ fn generate_args_from_registers(registers: &[Variable]) -> Vec<Arg> {
/// Generate an argument representing the location in the given abstract ID.
/// If the location is a pointer, it is assumed that the pointer points to the stack.
/// Panics if the location contains a second level of indirection.
fn generate_arg_from_abstract_id(id: &AbstractIdentifier) -> Arg {
/// Returns an error if the location contains a second level of indirection
/// or if the location is associated to global memory.
fn generate_param_arg_from_abstract_id(id: &AbstractIdentifier) -> Result<Arg, Error> {
match id.get_location() {
AbstractLocation::Register(var) => Arg::from_var(var.clone(), None),
AbstractLocation::Register(var) => Ok(Arg::from_var(var.clone(), None)),
AbstractLocation::Pointer(var, mem_location) => match mem_location {
AbstractMemoryLocation::Location { offset, size } => Arg::Stack {
AbstractMemoryLocation::Location { offset, size } => Ok(Arg::Stack {
address: Expression::Var(var.clone()).plus_const(*offset),
size: *size,
data_type: None,
},
}),
AbstractMemoryLocation::Pointer { .. } => {
panic!("Memory location is not a stack offset.")
Err(anyhow!("Memory location is not a stack offset."))
}
},
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _) => {
Err(anyhow!("Global values are not parameters."))
}
}
}
......@@ -150,7 +150,12 @@ fn test_extern_symbol_handling() {
let return_val_id =
AbstractIdentifier::from_var(Tid::new("call_tid"), &Variable::mock("r0", 4));
// Test extern symbol handling.
state.handle_generic_extern_symbol(&call_tid, &extern_symbol, &cconv);
state.handle_generic_extern_symbol(
&call_tid,
&extern_symbol,
&cconv,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
state
.tracked_ids
......@@ -177,3 +182,30 @@ fn test_extern_symbol_handling() {
&Bitvector::from_i32(0).into()
);
}
#[test]
fn test_substitute_global_mem_address() {
let mut state = State::mock_arm32();
let global_memory = RuntimeMemoryImage::mock();
// Test that addresses into non-writeable memory do not get substituted.
let global_address: DataDomain<BitvectorDomain> = Bitvector::from_i32(0x1000).into();
let substituted_address =
state.substitute_global_mem_address(global_address.clone(), &global_memory);
assert_eq!(global_address, substituted_address);
// Test substitution for addresses into writeable global memory.
let global_address: DataDomain<BitvectorDomain> = Bitvector::from_i32(0x2000).into();
let substituted_address = state.substitute_global_mem_address(global_address, &global_memory);
let expected_global_id = AbstractIdentifier::from_global_address(
state.get_current_function_tid(),
&Bitvector::from_i32(0x2000),
);
assert_eq!(
state.tracked_ids.get(&expected_global_id),
Some(&AccessPattern::new())
);
assert_eq!(
substituted_address,
DataDomain::from_target(expected_global_id, Bitvector::from_i32(0).into())
);
}
......@@ -2,6 +2,7 @@
//! as well as analyses depending on these modules.
pub mod backward_interprocedural_fixpoint;
pub mod callgraph;
pub mod dead_variable_elimination;
pub mod expression_propagation;
pub mod fixpoint;
......
......@@ -92,3 +92,23 @@ impl fmt::Display for Jmp {
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl Jmp {
/// Create a mock call to a TID with the given `target` and `return_`
/// as the names of the target and return TIDs.
pub fn mock_call(target: &str, return_: Option<&str>) -> Term<Jmp> {
let call = Jmp::Call {
target: Tid::new(target.to_string()),
return_: return_.map(|tid_name| Tid::new(tid_name)),
};
Term {
tid: Tid::new(format!("call_{}", target.to_string())),
term: call,
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment