Unverified Commit 99042d01 by Enkelmann Committed by GitHub

Add libC function stubs to function signature analysis (#348)

parent 1e46589f
...@@ -23,6 +23,33 @@ impl AccessPattern { ...@@ -23,6 +23,33 @@ impl AccessPattern {
} }
} }
/// Generate a new `AccessPattern` object with all access flags set to true (to model unknown access).
pub fn new_unknown_access() -> Self {
Self {
dereferenced: true,
read: true,
mutably_dereferenced: true,
}
}
/// Set the access flag for read access and return `self`.
pub fn with_read_flag(mut self) -> Self {
self.read = true;
self
}
/// Set the access flag for immutable pointer dereference and return `self`.
pub fn with_dereference_flag(mut self) -> Self {
self.dereferenced = true;
self
}
/// Set the access flag for pointer dereference with write access to the pointer target and return `self`.
pub fn with_mutably_dereferenced_flag(mut self) -> Self {
self.mutably_dereferenced = true;
self
}
/// Set the access flag for immutable pointer dereference. /// Set the access flag for immutable pointer dereference.
pub fn set_dereference_flag(&mut self) { pub fn set_dereference_flag(&mut self) {
self.dereferenced = true; self.dereferenced = true;
......
use crate::abstract_domain::{AbstractDomain, AbstractIdentifier, BitvectorDomain, DataDomain}; use crate::abstract_domain::{
AbstractDomain, AbstractIdentifier, BitvectorDomain, DataDomain, TryToBitvec,
};
use crate::utils::arguments;
use crate::{ use crate::{
analysis::{forward_interprocedural_fixpoint, graph::Graph}, analysis::{forward_interprocedural_fixpoint, graph::Graph},
intermediate_representation::Project, intermediate_representation::Project,
...@@ -10,12 +13,22 @@ use super::*; ...@@ -10,12 +13,22 @@ use super::*;
pub struct Context<'a> { pub struct Context<'a> {
graph: &'a Graph<'a>, graph: &'a Graph<'a>,
project: &'a Project, project: &'a Project,
/// Parameter access patterns for stubbed extern symbols.
param_access_stubs: BTreeMap<&'static str, Vec<AccessPattern>>,
/// Assigns to the name of a stubbed variadic symbol the index of its format string parameter
/// and the access pattern for all variadic parameters.
stubbed_variadic_symbols: BTreeMap<&'static str, (usize, AccessPattern)>,
} }
impl<'a> Context<'a> { impl<'a> Context<'a> {
/// Generate a new context object. /// Generate a new context object.
pub fn new(project: &'a Project, graph: &'a Graph<'a>) -> Self { pub fn new(project: &'a Project, graph: &'a Graph<'a>) -> Self {
Context { graph, project } Context {
graph,
project,
param_access_stubs: stubs::generate_param_access_stubs(),
stubbed_variadic_symbols: stubs::get_stubbed_variadic_symbols(),
}
} }
/// Compute the return values of a call and return them (without adding them to the caller state). /// Compute the return values of a call and return them (without adding them to the caller state).
...@@ -108,6 +121,133 @@ impl<'a> Context<'a> { ...@@ -108,6 +121,133 @@ impl<'a> Context<'a> {
return_value return_value
} }
/// Handle a call to a specific extern symbol.
/// If function stubs exist for the symbol, then these are used to compute the effect of the call.
/// Else the [generic symbol handler](State::handle_generic_extern_symbol) is called.
fn handle_extern_symbol_call(
&self,
state: &mut State,
extern_symbol: &ExternSymbol,
call_tid: &Tid,
) {
let cconv = self.project.get_calling_convention(extern_symbol);
if let Some(param_access_list) = self.param_access_stubs.get(extern_symbol.name.as_str()) {
// Set access flags for parameter access
for (param, access_pattern) in extern_symbol.parameters.iter().zip(param_access_list) {
for id in state.eval_parameter_arg(param).get_relative_values().keys() {
state.merge_access_pattern_of_id(id, access_pattern);
}
}
if self
.stubbed_variadic_symbols
.get(extern_symbol.name.as_str())
.is_some()
&& self
.set_access_flags_for_variadic_parameters(state, extern_symbol)
.is_none()
{
self.set_access_flags_for_generic_variadic_parameters(state, extern_symbol);
}
let return_val = stubs::compute_return_value_for_stubbed_function(
self.project,
state,
extern_symbol,
call_tid,
);
state.clear_non_callee_saved_register(&cconv.callee_saved_register);
state.set_register(&cconv.integer_return_register[0], return_val);
} else {
state.handle_generic_extern_symbol(call_tid, extern_symbol, cconv);
}
}
/// Merges the access patterns for all variadic parameters of the given symbol.
///
/// This function can only handle stubbed symbols where the number of variadic parameters can be parsed from a format string.
/// If the parsing of the variadic parameters failed for any reason
/// (e.g. because the format string could not be statically determined)
/// then this function does not modify any access patterns.
///
/// If the variadic access pattern contains the mutable dereference flag
/// then all variadic parameters are assumed to be pointers.
fn set_access_flags_for_variadic_parameters(
&self,
state: &mut State,
extern_symbol: &ExternSymbol,
) -> Option<()> {
let (format_string_index, variadic_access_pattern) = self
.stubbed_variadic_symbols
.get(extern_symbol.name.as_str())?;
let format_string_address = state
.eval_parameter_arg(&extern_symbol.parameters[*format_string_index])
.get_if_absolute_value()
.map(|value| value.try_to_bitvec().ok())??;
let format_string = arguments::parse_format_string_destination_and_return_content(
format_string_address,
&self.project.runtime_memory_image,
)
.ok()?;
let mut format_string_params = arguments::parse_format_string_parameters(
&format_string,
&self.project.datatype_properties,
)
.ok()?;
if variadic_access_pattern.is_mutably_dereferenced() {
// All parameters are pointers to where values shall be written.
format_string_params =
vec![
(Datatype::Pointer, self.project.stack_pointer_register.size);
format_string_params.len()
];
}
let format_string_args = arguments::calculate_parameter_locations(
format_string_params,
extern_symbol,
self.project,
);
for param in format_string_args {
for id in state
.eval_parameter_arg(&param)
.get_relative_values()
.keys()
{
state.merge_access_pattern_of_id(id, variadic_access_pattern);
}
}
Some(())
}
/// Sets access patterns for variadic parameters
/// of a call to a variadic function with unknown number of variadic parameters.
/// This function assumes that all remaining integer parameter registers of the corresponding calling convention
/// are filled with variadic parameters,
/// but no variadic parameters are supplied as stack parameters.
fn set_access_flags_for_generic_variadic_parameters(
&self,
state: &mut State,
extern_symbol: &ExternSymbol,
) {
let (_, variadic_access_pattern) = self
.stubbed_variadic_symbols
.get(extern_symbol.name.as_str())
.unwrap();
let cconv = self.project.get_calling_convention(extern_symbol);
if extern_symbol.parameters.len() < cconv.integer_parameter_register.len() {
for index in [
extern_symbol.parameters.len(),
cconv.integer_parameter_register.len() - 1,
] {
for id in state
.get_register(&cconv.integer_parameter_register[index])
.get_relative_values()
.keys()
{
state.merge_access_pattern_of_id(id, variadic_access_pattern);
}
}
}
}
} }
impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
...@@ -130,7 +270,11 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -130,7 +270,11 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
} }
Def::Load { var, address } => { Def::Load { var, address } => {
new_state.set_deref_flag_for_input_ids_of_expression(address); new_state.set_deref_flag_for_input_ids_of_expression(address);
let value = new_state.load_value(new_state.eval(address), var.size); let value = new_state.load_value(
new_state.eval(address),
var.size,
Some(&self.project.runtime_memory_image),
);
new_state.set_register(var, value); new_state.set_register(var, value);
} }
Def::Store { address, value } => { Def::Store { address, value } => {
...@@ -194,8 +338,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -194,8 +338,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
} }
Jmp::Call { target, .. } => { Jmp::Call { target, .. } => {
if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) { if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) {
let cconv = self.project.get_calling_convention(extern_symbol); self.handle_extern_symbol_call(&mut new_state, extern_symbol, &call.tid);
new_state.handle_extern_symbol(call, extern_symbol, cconv);
if !extern_symbol.no_return { if !extern_symbol.no_return {
return Some(new_state); return Some(new_state);
} }
...@@ -206,7 +349,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -206,7 +349,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
} }
_ => (), _ => (),
} }
// The call could not be properly handled, so we treat it as a dead end in the control flow graph. // The call could not be properly handled or is a non-returning function, so we treat it as a dead end in the control flow graph.
None None
} }
......
...@@ -49,3 +49,66 @@ fn test_compute_return_values_of_call() { ...@@ -49,3 +49,66 @@ fn test_compute_return_values_of_call() {
assert_eq!(return_values.iter().len(), 3); assert_eq!(return_values.iter().len(), 3);
assert_eq!(return_values[0], (&Variable::mock("RAX", 8), expected_val)); assert_eq!(return_values[0], (&Variable::mock("RAX", 8), expected_val));
} }
#[test]
fn test_call_stub_handling() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let context = Context::new(&project, &graph);
// Test handling of malloc call
let mut state = State::new(
&Tid::new("func"),
&project.stack_pointer_register,
project.get_standard_calling_convention().unwrap(),
);
let extern_symbol = ExternSymbol::mock_malloc_symbol_arm();
let call_tid = Tid::new("call_malloc");
context.handle_extern_symbol_call(&mut state, &extern_symbol, &call_tid);
assert_eq!(
state.get_params_of_current_function(),
vec![(
Arg::from_var(Variable::mock("r0", 4), None),
AccessPattern::new().with_read_flag()
)]
);
assert_eq!(
state.get_register(&Variable::mock("r0", 4)),
DataDomain::from_target(
AbstractIdentifier::mock(call_tid, "r0", 4),
Bitvector::from_i32(0).into()
)
.merge(&Bitvector::zero(ByteSize::new(4).into()).into())
);
// Test handling of sprintf call
let mut state = State::new(
&Tid::new("func"),
&project.stack_pointer_register,
project.get_standard_calling_convention().unwrap(),
);
// Set the format string param register to a pointer to the string 'cat %s %s %s %s'.
state.set_register(&Variable::mock("r1", 4), Bitvector::from_i32(0x6000).into());
let extern_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let call_tid = Tid::new("call_sprintf");
context.handle_extern_symbol_call(&mut state, &extern_symbol, &call_tid);
let params = state.get_params_of_current_function();
assert_eq!(
params[0],
(
Arg::from_var(Variable::mock("r0", 4), None),
AccessPattern::new_unknown_access()
)
);
assert_eq!(
params[1],
(
Arg::from_var(Variable::mock("r2", 4), None),
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
)
);
assert_eq!(params.len(), 5);
}
...@@ -42,6 +42,7 @@ mod state; ...@@ -42,6 +42,7 @@ mod state;
use state::State; use state::State;
mod access_pattern; mod access_pattern;
pub use access_pattern::AccessPattern; pub use access_pattern::AccessPattern;
mod stubs;
/// Generate the computation object for the fixpoint computation /// Generate the computation object for the fixpoint computation
/// and set the node values for all function entry nodes. /// and set the node values for all function entry nodes.
......
...@@ -99,9 +99,18 @@ impl State { ...@@ -99,9 +99,18 @@ impl State {
&mut self, &mut self,
address: DataDomain<BitvectorDomain>, address: DataDomain<BitvectorDomain>,
size: ByteSize, size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> { ) -> DataDomain<BitvectorDomain> {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) { if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
self.load_value_from_stack(stack_offset, size) self.load_value_from_stack(stack_offset, size)
} else if let (Ok(global_address), Some(global_mem)) =
(address.try_to_bitvec(), global_memory)
{
if let Ok(Some(value)) = global_mem.read(&global_address, size) {
value.into()
} else {
DataDomain::new_top(size)
}
} else { } else {
DataDomain::new_top(size) DataDomain::new_top(size)
} }
...@@ -220,6 +229,19 @@ impl State { ...@@ -220,6 +229,19 @@ impl State {
None None
} }
/// Merges the access pattern of the given abstract identifer in `self` with the provided access pattern.
///
/// Does not add the identifier to the list of tracked identifiers if it is not already tracked in `self`.
pub fn merge_access_pattern_of_id(
&mut self,
id: &AbstractIdentifier,
access_pattern: &AccessPattern,
) {
if let Some(object) = self.tracked_ids.get_mut(id) {
*object = object.merge(access_pattern);
}
}
/// Evaluate the value of the given expression on the current state. /// Evaluate the value of the given expression on the current state.
pub fn eval(&self, expression: &Expression) -> DataDomain<BitvectorDomain> { pub fn eval(&self, expression: &Expression) -> DataDomain<BitvectorDomain> {
match expression { match expression {
...@@ -255,7 +277,7 @@ impl State { ...@@ -255,7 +277,7 @@ impl State {
} => { } => {
self.set_deref_flag_for_input_ids_of_expression(address); self.set_deref_flag_for_input_ids_of_expression(address);
let address = self.eval(address); let address = self.eval(address);
self.load_value(address, *size) self.load_value(address, *size, None)
} }
} }
} }
......
...@@ -5,15 +5,15 @@ impl State { ...@@ -5,15 +5,15 @@ impl State {
/// ///
/// Marks every possible input ID as accessed and writes to every return register a value /// Marks every possible input ID as accessed and writes to every return register a value
/// that may point to any of the input IDs. /// that may point to any of the input IDs.
pub fn handle_extern_symbol( pub fn handle_generic_extern_symbol(
&mut self, &mut self,
call: &Term<Jmp>, call_tid: &Tid,
extern_symbol: &ExternSymbol, extern_symbol: &ExternSymbol,
calling_convention: &CallingConvention, calling_convention: &CallingConvention,
) { ) {
let input_ids = self.collect_input_ids_of_call(&extern_symbol.parameters); let input_ids = self.collect_input_ids_of_call(&extern_symbol.parameters);
self.clear_non_callee_saved_register(&calling_convention.callee_saved_register); self.clear_non_callee_saved_register(&calling_convention.callee_saved_register);
self.generate_return_values_for_call(&input_ids, &extern_symbol.return_values, &call.tid); self.generate_return_values_for_call(&input_ids, &extern_symbol.return_values, call_tid);
} }
/// Handle a call to a completely unknown function /// Handle a call to a completely unknown function
......
...@@ -78,7 +78,7 @@ fn test_store_and_load_from_stack() { ...@@ -78,7 +78,7 @@ fn test_store_and_load_from_stack() {
state.stack.get(Bitvector::from_i32(-4), ByteSize::new(4)), state.stack.get(Bitvector::from_i32(-4), ByteSize::new(4)),
value.clone() value.clone()
); );
assert_eq!(state.load_value(address, ByteSize::new(4)), value); assert_eq!(state.load_value(address, ByteSize::new(4), None), value);
// Load a parameter register and check that the parameter gets generated // Load a parameter register and check that the parameter gets generated
let address = DataDomain::from_target(mock_stack_id(), Bitvector::from_i32(4).into()); let address = DataDomain::from_target(mock_stack_id(), Bitvector::from_i32(4).into());
let stack_param_id = mock_stack_param_id(4, 4); let stack_param_id = mock_stack_param_id(4, 4);
...@@ -86,7 +86,7 @@ fn test_store_and_load_from_stack() { ...@@ -86,7 +86,7 @@ fn test_store_and_load_from_stack() {
DataDomain::from_target(stack_param_id.clone(), Bitvector::from_i32(0).into()); DataDomain::from_target(stack_param_id.clone(), Bitvector::from_i32(0).into());
assert_eq!(state.tracked_ids.iter().len(), 6); assert_eq!(state.tracked_ids.iter().len(), 6);
assert_eq!( assert_eq!(
state.load_value(address.clone(), ByteSize::new(4)), state.load_value(address.clone(), ByteSize::new(4), None),
stack_param stack_param
); );
assert_eq!(state.tracked_ids.iter().len(), 7); assert_eq!(state.tracked_ids.iter().len(), 7);
...@@ -108,7 +108,7 @@ fn test_load_unsized_from_stack() { ...@@ -108,7 +108,7 @@ fn test_load_unsized_from_stack() {
let stack_param_id = mock_stack_param_id(0, 4); let stack_param_id = mock_stack_param_id(0, 4);
let stack_param = let stack_param =
DataDomain::from_target(stack_param_id.clone(), Bitvector::from_i32(0).into()); DataDomain::from_target(stack_param_id.clone(), Bitvector::from_i32(0).into());
state.load_value(address, ByteSize::new(4)); state.load_value(address, ByteSize::new(4), None);
let unsized_load = state.load_unsized_value_from_stack(Bitvector::from_i32(0)); let unsized_load = state.load_unsized_value_from_stack(Bitvector::from_i32(0));
assert_eq!(unsized_load, stack_param); assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some()); assert!(state.tracked_ids.get(&stack_param_id).is_some());
...@@ -145,18 +145,12 @@ fn test_extern_symbol_handling() { ...@@ -145,18 +145,12 @@ fn test_extern_symbol_handling() {
let mut state = State::mock_arm32(); let mut state = State::mock_arm32();
let extern_symbol = ExternSymbol::mock_arm32("mock_symbol"); let extern_symbol = ExternSymbol::mock_arm32("mock_symbol");
let cconv = CallingConvention::mock_arm32(); let cconv = CallingConvention::mock_arm32();
let call = Term { let call_tid = Tid::new("call_tid");
tid: Tid::new("call_tid"),
term: Jmp::Call {
target: extern_symbol.tid.clone(),
return_: Some(Tid::new("return_tid")),
},
};
let param_id = AbstractIdentifier::from_var(Tid::new("mock_fn"), &Variable::mock("r0", 4)); let param_id = AbstractIdentifier::from_var(Tid::new("mock_fn"), &Variable::mock("r0", 4));
let return_val_id = let return_val_id =
AbstractIdentifier::from_var(Tid::new("call_tid"), &Variable::mock("r0", 4)); AbstractIdentifier::from_var(Tid::new("call_tid"), &Variable::mock("r0", 4));
// Test extern symbol handling. // Test extern symbol handling.
state.handle_extern_symbol(&call, &extern_symbol, &cconv); state.handle_generic_extern_symbol(&call_tid, &extern_symbol, &cconv);
assert_eq!( assert_eq!(
state state
.tracked_ids .tracked_ids
......
use super::State;
use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::BitvectorDomain;
use crate::abstract_domain::DataDomain;
use crate::abstract_domain::SizedDomain;
use crate::intermediate_representation::Project;
use crate::{
analysis::function_signature::AccessPattern, intermediate_representation::ExternSymbol,
prelude::*,
};
use std::collections::BTreeMap;
/// Returns a map that maps the names of known extern functions to the access patterns for their parameters.
///
/// The access patterns are ordered in the same order as the parameters
/// (i.e. the first access pattern corresponds to the first parameter and so on).
pub fn generate_param_access_stubs() -> BTreeMap<&'static str, Vec<AccessPattern>> {
let read = || AccessPattern::new().with_read_flag();
let deref = || {
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
};
let deref_mut = || {
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
.with_mutably_dereferenced_flag()
};
BTreeMap::from([
("abort", vec![]),
("atoi", vec![deref()]),
("bind", vec![read(), deref(), read()]),
("calloc", vec![read(), read()]),
("close", vec![read()]),
("connect", vec![read(), deref(), read()]),
("exit", vec![read()]),
("fclose", vec![deref_mut()]),
("fflush", vec![deref_mut()]),
("fgets", vec![deref_mut(), read(), deref_mut()]),
("fopen", vec![deref(), deref()]),
("fork", vec![]),
("fprintf", vec![deref_mut(), deref()]),
("fputc", vec![read(), deref_mut()]),
("fputs", vec![deref(), deref_mut()]),
("fread", vec![deref_mut(), read(), read(), deref_mut()]),
("free", vec![deref_mut()]),
("fwrite", vec![deref(), read(), read(), deref_mut()]),
("getenv", vec![deref()]), // FIXME: Not exactly allocating, but still returns a pointer to another memory region.
("getpid", vec![]),
("getppid", vec![]),
("gettimeofday", vec![deref_mut(), deref_mut()]),
("kill", vec![read(), read()]),
("localtime", vec![deref()]), // FIXME: The return value is a pointer to static storage.
("malloc", vec![read()]),
("memcmp", vec![deref(), deref(), read()]),
("memcpy", vec![deref_mut(), deref(), read()]),
("memmove", vec![deref_mut(), deref(), read()]),
("memset", vec![deref_mut(), read(), read()]),
("open", vec![deref(), read(), read()]),
("open64", vec![deref(), read(), read()]),
("perror", vec![deref()]),
("printf", vec![deref()]),
("putchar", vec![read()]),
("puts", vec![deref()]),
("qsort", vec![deref_mut(), read(), read(), deref()]),
("raise", vec![]),
("read", vec![read(), deref_mut(), read()]),
("realloc", vec![deref_mut(), read()]),
("recv", vec![read(), deref_mut(), read(), read()]),
(
"recvfrom",
vec![
read(),
deref_mut(),
read(),
read(),
deref_mut(),
deref_mut(),
],
),
(
"select",
vec![read(), deref_mut(), deref_mut(), deref_mut(), deref()],
),
(
"sendto",
vec![read(), deref(), read(), read(), deref(), read()],
),
(
"setsockopt",
vec![read(), read(), read(), deref_mut(), read()],
), // FIXME: The deref_mut parameter may only be deref?
("signal", vec![read(), read()]),
("sleep", vec![read()]),
("snprintf", vec![deref_mut(), read(), deref()]),
("socket", vec![read(), read(), read()]),
("sprintf", vec![deref_mut(), deref()]),
("sscanf", vec![deref(), deref()]),
("strcasecmp", vec![deref(), deref()]),
("strcat", vec![deref_mut(), deref()]),
("strchr", vec![deref(), read()]),
("strcmp", vec![deref(), deref()]),
("strcpy", vec![deref_mut(), deref()]),
("strdup", vec![deref()]),
("strerror", vec![read()]),
("strlen", vec![deref()]),
("strncasecmp", vec![deref(), deref(), read()]),
("strncat", vec![deref_mut(), deref(), read()]),
("strncmp", vec![deref(), deref(), read()]),
("strncpy", vec![deref_mut(), deref(), read()]),
("strrchr", vec![deref(), read()]),
("strstr", vec![deref(), deref()]),
("strtol", vec![deref(), deref_mut(), read()]), // FIXME: We could specify the value written to the second parameter.
("strtoul", vec![deref(), deref_mut(), read()]), // FIXME: We could specify the value written to the second parameter.
("system", vec![deref()]),
("time", vec![deref_mut()]),
("unlink", vec![deref()]),
("vfprintf", vec![deref_mut(), deref(), deref()]),
("write", vec![read(), deref(), read()]),
])
}
/// Return a map that maps names of stubbed variadic symbols to a tuple consisting of:
/// - the index of the format string parameter of the symbol
/// - the access pattern that the called symbols uses to access its variadic parameters.
/// Note that the access pattern may vary between variadic parameters,
/// e.g. some parameters may only be read and not derefenced by a call to `printf`.
/// But we still approximate all accesses by the the maximal possible access to these parameters.
pub fn get_stubbed_variadic_symbols() -> BTreeMap<&'static str, (usize, AccessPattern)> {
let deref = || {
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
};
let deref_mut = || {
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
.with_mutably_dereferenced_flag()
};
BTreeMap::from([
("fprintf", (1, deref())),
("printf", (0, deref())),
("snprintf", (2, deref())),
("sprintf", (1, deref())),
("sscanf", (1, deref_mut())),
])
}
/// Compute the return value of a call to a known extern symbol from the given state.
///
/// Note that this function needs to be called before non-callee-saved registers are cleared from the state,
/// since the return value is usually computed out of the parameter values.
///
/// This function should only be called for symbols contained in the list returned by [generate_param_access_stubs],
/// since it assumes untracked return values (e.g. integers or void) for all not explicitly handled symbols.
pub fn compute_return_value_for_stubbed_function(
project: &Project,
state: &mut State,
extern_symbol: &ExternSymbol,
call_tid: &Tid,
) -> DataDomain<BitvectorDomain> {
use return_value_stubs::*;
match extern_symbol.name.as_str() {
"memcpy" | "memmove" | "memset" | "strcat" | "strcpy" | "strncat" | "strncpy" => {
copy_param(state, extern_symbol, 0)
}
"fgets" => or_null(copy_param(state, extern_symbol, 0)),
"calloc" | "fopen" | "malloc" | "strdup" => {
or_null(new_mem_object_id(call_tid, &extern_symbol.return_values[0]))
}
"realloc" => or_null(
copy_param(state, extern_symbol, 0).merge(&new_mem_object_id(
call_tid,
&extern_symbol.return_values[0],
)),
),
"strchr" | "strrchr" | "strstr" => {
or_null(param_plus_unknown_offset(state, extern_symbol, 0))
}
_ => untracked(project.stack_pointer_register.size),
}
}
/// Helper functions for computing return values for extern symbol calls.
pub mod return_value_stubs {
use crate::{abstract_domain::AbstractIdentifier, intermediate_representation::Arg};
use super::*;
/// An untracked value is just a `Top` value.
/// It is used for any non-pointer return values.
pub fn untracked(register_size: ByteSize) -> DataDomain<BitvectorDomain> {
DataDomain::new_top(register_size)
}
/// A return value that is just a copy of a parameter.
pub fn copy_param(
state: &mut State,
extern_symbol: &ExternSymbol,
param_index: usize,
) -> DataDomain<BitvectorDomain> {
state.eval_parameter_arg(&extern_symbol.parameters[param_index])
}
/// A return value that contains a pointer to the start of a new memory object.
/// The ID of the memory object is given by the return register and the TID of the call instruction.
pub fn new_mem_object_id(call_tid: &Tid, return_arg: &Arg) -> DataDomain<BitvectorDomain> {
DataDomain::from_target(
AbstractIdentifier::from_arg(call_tid, return_arg),
Bitvector::zero(return_arg.bytesize().into()).into(),
)
}
/// A return value that adds an unknown offset to a given parameter.
/// E.g. if the parameter is a pointer to a string,
/// this return value would describe a pointer to an offset inside the string.
pub fn param_plus_unknown_offset(
state: &mut State,
extern_symbol: &ExternSymbol,
param_index: usize,
) -> DataDomain<BitvectorDomain> {
let param = state.eval_parameter_arg(&extern_symbol.parameters[param_index]);
param.add_offset(&BitvectorDomain::new_top(param.bytesize()))
}
/// The return value may also be zero in addition to its other possible values.
pub fn or_null(data: DataDomain<BitvectorDomain>) -> DataDomain<BitvectorDomain> {
data.merge(&Bitvector::zero(data.bytesize().into()).into())
}
}
...@@ -218,7 +218,7 @@ pub mod tests { ...@@ -218,7 +218,7 @@ pub mod tests {
#[test] #[test]
fn test_locate_format_string() { fn test_locate_format_string() {
let sprintf_symbol = ExternSymbol::mock_string(); let sprintf_symbol = ExternSymbol::mock_sprintf_x64();
let project = mock_project(); let project = mock_project();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new()); let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&project); let mut pi_results = PointerInferenceComputation::mock(&project);
......
...@@ -375,7 +375,7 @@ mod tests { ...@@ -375,7 +375,7 @@ mod tests {
} }
} }
pub fn mock_string() -> Self { pub fn mock_sprintf_x64() -> Self {
ExternSymbol { ExternSymbol {
tid: Tid::new("sprintf"), tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()], addresses: vec!["UNKNOWN".to_string()],
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
use crate::prelude::*; use crate::prelude::*;
use crate::{ use crate::{
abstract_domain::{IntervalDomain, TryToBitvec}, abstract_domain::TryToBitvec, analysis::pointer_inference::State as PointerInferenceState,
analysis::pointer_inference::State as PointerInferenceState,
intermediate_representation::*, intermediate_representation::*,
}; };
use regex::Regex; use regex::Regex;
...@@ -22,8 +21,9 @@ pub fn get_input_format_string( ...@@ -22,8 +21,9 @@ pub fn get_input_format_string(
.as_ref() .as_ref()
.map(|param| param.get_if_absolute_value()) .map(|param| param.get_if_absolute_value())
{ {
let address = address.try_to_bitvec()?;
return parse_format_string_destination_and_return_content( return parse_format_string_destination_and_return_content(
address.clone(), address,
runtime_memory_image, runtime_memory_image,
); );
} }
...@@ -42,19 +42,13 @@ pub fn get_input_format_string( ...@@ -42,19 +42,13 @@ pub fn get_input_format_string(
/// It checks whether the address points to another pointer in memory. /// It checks whether the address points to another pointer in memory.
/// If so, it will use the target address of that pointer read the format string from memory. /// If so, it will use the target address of that pointer read the format string from memory.
pub fn parse_format_string_destination_and_return_content( pub fn parse_format_string_destination_and_return_content(
address: IntervalDomain, address: Bitvector,
runtime_memory_image: &RuntimeMemoryImage, runtime_memory_image: &RuntimeMemoryImage,
) -> Result<String, Error> { ) -> Result<String, Error> {
if let Ok(address_vector) = address.try_to_bitvec() { match runtime_memory_image.read_string_until_null_terminator(&address) {
return match runtime_memory_image.read_string_until_null_terminator(&address_vector) { Ok(format_string) => Ok(format_string.to_string()),
Ok(format_string) => Ok(format_string.to_string()), Err(e) => Err(anyhow!("{}", e)),
Err(e) => Err(anyhow!("{}", e)),
};
} }
Err(anyhow!(
"Could not translate format string address to bitvector."
))
} }
/// Parses the format string parameters using a regex, determines their data types, /// Parses the format string parameters using a regex, determines their data types,
...@@ -124,10 +118,8 @@ pub fn get_variable_parameters( ...@@ -124,10 +118,8 @@ pub fn get_variable_parameters(
Ok(parameters) => { Ok(parameters) => {
return Ok(calculate_parameter_locations( return Ok(calculate_parameter_locations(
parameters, parameters,
project.get_calling_convention(extern_symbol), extern_symbol,
format_string_index, project,
&project.stack_pointer_register,
&project.cpu_architecture,
)); ));
} }
Err(e) => { Err(e) => {
...@@ -145,24 +137,31 @@ pub fn get_variable_parameters( ...@@ -145,24 +137,31 @@ pub fn get_variable_parameters(
/// Calculates the register and stack positions of format string parameters. /// Calculates the register and stack positions of format string parameters.
/// The parameters are then returned as an argument vector for later tainting. /// The parameters are then returned as an argument vector for later tainting.
pub fn calculate_parameter_locations( pub fn calculate_parameter_locations(
parameters: Vec<(Datatype, ByteSize)>, variadic_parameters: Vec<(Datatype, ByteSize)>,
calling_convention: &CallingConvention, extern_symbol: &ExternSymbol,
format_string_index: usize, project: &Project,
stack_register: &Variable,
cpu_arch: &str,
) -> Vec<Arg> { ) -> Vec<Arg> {
let calling_convention = project.get_calling_convention(extern_symbol);
let mut var_args: Vec<Arg> = Vec::new(); let mut var_args: Vec<Arg> = Vec::new();
// The number of the remaining integer argument registers are calculated
// from the format string position since it is the last fixed argument.
let mut integer_arg_register_count =
calling_convention.integer_parameter_register.len() - (format_string_index + 1);
let mut float_arg_register_count = calling_convention.float_parameter_register.len(); let mut float_arg_register_count = calling_convention.float_parameter_register.len();
let mut stack_offset: i64 = match cpu_arch { let mut stack_offset: i64 = match project.cpu_architecture.as_str() {
"x86" | "x86_32" | "x86_64" => u64::from(stack_register.size) as i64, "x86" | "x86_32" | "x86_64" => u64::from(project.stack_pointer_register.size) as i64,
_ => 0, _ => 0,
}; };
let mut integer_arg_register_count =
if calling_convention.integer_parameter_register.len() >= extern_symbol.parameters.len() {
calling_convention.integer_parameter_register.len() - extern_symbol.parameters.len()
} else {
for param in extern_symbol.parameters.iter() {
if let Ok(offset) = param.eval_stack_offset() {
let offset_after = offset.try_to_u64().unwrap() + u64::from(param.bytesize());
stack_offset = std::cmp::max(stack_offset, offset_after as i64);
}
}
0
};
for (data_type, size) in parameters.iter() { for (data_type, size) in variadic_parameters.iter() {
match data_type { match data_type {
Datatype::Integer | Datatype::Pointer | Datatype::Char => { Datatype::Integer | Datatype::Pointer | Datatype::Char => {
if integer_arg_register_count > 0 { if integer_arg_register_count > 0 {
...@@ -183,7 +182,7 @@ pub fn calculate_parameter_locations( ...@@ -183,7 +182,7 @@ pub fn calculate_parameter_locations(
*size, *size,
stack_offset, stack_offset,
data_type.clone(), data_type.clone(),
stack_register, &project.stack_pointer_register,
)); ));
stack_offset += u64::from(*size) as i64 stack_offset += u64::from(*size) as i64
} }
...@@ -204,7 +203,7 @@ pub fn calculate_parameter_locations( ...@@ -204,7 +203,7 @@ pub fn calculate_parameter_locations(
*size, *size,
stack_offset, stack_offset,
data_type.clone(), data_type.clone(),
stack_register, &project.stack_pointer_register,
)); ));
stack_offset += u64::from(*size) as i64 stack_offset += u64::from(*size) as i64
} }
...@@ -217,7 +216,7 @@ pub fn calculate_parameter_locations( ...@@ -217,7 +216,7 @@ pub fn calculate_parameter_locations(
} }
/// Creates a stack parameter given a size, stack offset and data type. /// Creates a stack parameter given a size, stack offset and data type.
pub fn create_stack_arg( fn create_stack_arg(
size: ByteSize, size: ByteSize,
stack_offset: i64, stack_offset: i64,
data_type: Datatype, data_type: Datatype,
...@@ -231,7 +230,7 @@ pub fn create_stack_arg( ...@@ -231,7 +230,7 @@ pub fn create_stack_arg(
} }
/// Creates a register parameter given a size, register name and data type. /// Creates a register parameter given a size, register name and data type.
pub fn create_register_arg(expr: Expression, data_type: Datatype) -> Arg { fn create_register_arg(expr: Expression, data_type: Datatype) -> Arg {
Arg::Register { Arg::Register {
expr, expr,
data_type: Some(data_type), data_type: Some(data_type),
......
use crate::intermediate_representation::{Bitvector, Tid}; use crate::{
abstract_domain::IntervalDomain,
intermediate_representation::{Bitvector, Tid},
};
use super::*; use super::*;
...@@ -10,7 +13,7 @@ fn mock_pi_state() -> PointerInferenceState { ...@@ -10,7 +13,7 @@ fn mock_pi_state() -> PointerInferenceState {
/// Tests extraction of format string parameters '/dev/sd%c%d' and 'cat %s'. /// Tests extraction of format string parameters '/dev/sd%c%d' and 'cat %s'.
fn test_get_variable_parameters() { fn test_get_variable_parameters() {
let mut pi_state = mock_pi_state(); let mut pi_state = mock_pi_state();
let sprintf_symbol = ExternSymbol::mock_string(); let sprintf_symbol = ExternSymbol::mock_sprintf_x64();
let mut format_string_index_map: HashMap<String, usize> = HashMap::new(); let mut format_string_index_map: HashMap<String, usize> = HashMap::new();
format_string_index_map.insert("sprintf".to_string(), 1); format_string_index_map.insert("sprintf".to_string(), 1);
let global_address = Bitvector::from_str_radix(16, "5000").unwrap(); let global_address = Bitvector::from_str_radix(16, "5000").unwrap();
...@@ -68,7 +71,7 @@ fn test_get_variable_parameters() { ...@@ -68,7 +71,7 @@ fn test_get_variable_parameters() {
fn test_get_input_format_string() { fn test_get_input_format_string() {
let mem_image = RuntimeMemoryImage::mock(); let mem_image = RuntimeMemoryImage::mock();
let mut pi_state = mock_pi_state(); let mut pi_state = mock_pi_state();
let sprintf_symbol = ExternSymbol::mock_string(); let sprintf_symbol = ExternSymbol::mock_sprintf_x64();
let global_address = Bitvector::from_str_radix(16, "3002").unwrap(); let global_address = Bitvector::from_str_radix(16, "3002").unwrap();
pi_state.set_register( pi_state.set_register(
...@@ -85,8 +88,7 @@ fn test_get_input_format_string() { ...@@ -85,8 +88,7 @@ fn test_get_input_format_string() {
#[test] #[test]
fn test_parse_format_string_destination_and_return_content() { fn test_parse_format_string_destination_and_return_content() {
let mem_image = RuntimeMemoryImage::mock(); let mem_image = RuntimeMemoryImage::mock();
let string_address_vector = Bitvector::from_str_radix(16, "3002").unwrap(); let string_address = Bitvector::from_str_radix(16, "3002").unwrap();
let string_address = IntervalDomain::new(string_address_vector.clone(), string_address_vector);
assert_eq!( assert_eq!(
"Hello World", "Hello World",
...@@ -154,8 +156,8 @@ fn test_parse_format_string_parameters() { ...@@ -154,8 +156,8 @@ fn test_parse_format_string_parameters() {
#[test] #[test]
/// Tests tracking of parameters according to format string /// Tests tracking of parameters according to format string
fn test_calculate_parameter_locations() { fn test_calculate_parameter_locations() {
let cconv = CallingConvention::mock_x64(); let project = Project::mock_x64();
let format_string_index: usize = 1; let extern_symbol = ExternSymbol::mock_sprintf_x64();
let mut parameters: Vec<(Datatype, ByteSize)> = Vec::new(); let mut parameters: Vec<(Datatype, ByteSize)> = Vec::new();
parameters.push(("d".to_string().into(), ByteSize::new(8))); parameters.push(("d".to_string().into(), ByteSize::new(8)));
parameters.push(("f".to_string().into(), ByteSize::new(16))); parameters.push(("f".to_string().into(), ByteSize::new(16)));
...@@ -183,13 +185,7 @@ fn test_calculate_parameter_locations() { ...@@ -183,13 +185,7 @@ fn test_calculate_parameter_locations() {
// Test Case 1: The string parameter is still written in the RCX register since 'f' is contained in the float register. // Test Case 1: The string parameter is still written in the RCX register since 'f' is contained in the float register.
assert_eq!( assert_eq!(
expected_args, expected_args,
calculate_parameter_locations( calculate_parameter_locations(parameters.clone(), &extern_symbol, &project,)
parameters.clone(),
&cconv,
format_string_index,
&Variable::mock("RSP", 8),
"x86_64"
)
); );
parameters.push(("s".to_string().into(), ByteSize::new(8))); parameters.push(("s".to_string().into(), ByteSize::new(8)));
...@@ -213,13 +209,7 @@ fn test_calculate_parameter_locations() { ...@@ -213,13 +209,7 @@ fn test_calculate_parameter_locations() {
// Test Case 2: Three further string parameter does not fit into the registers anymore and one is written into the stack. // Test Case 2: Three further string parameter does not fit into the registers anymore and one is written into the stack.
assert_eq!( assert_eq!(
expected_args, expected_args,
calculate_parameter_locations( calculate_parameter_locations(parameters, &extern_symbol, &project)
parameters,
&cconv,
format_string_index,
&Variable::mock("RSP", 8),
"x86_64"
)
); );
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment