Unverified Commit f6ced95c by Enkelmann Committed by GitHub

Implement tracking of nested parameters (#432)

parent 270b4d4e
use crate::prelude::*;
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: i64,
/// The size in bytes of the value that the memory location points to.
size: ByteSize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: i64,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
impl AbstractMemoryLocation {
/// Get the abstract memory location representing the pointer pointing to the memory object
/// that contains the location represented by `self`
/// together with the offset that one has to add to the pointer to get the location of self.
///
/// If `self` is a location (and not a pointer), return the offset in the location instead.
pub fn get_parent_location(
&self,
generic_pointer_size: ByteSize,
) -> Result<(AbstractMemoryLocation, i64), i64> {
match self {
Self::Location { offset, .. } => Err(*offset),
Self::Pointer { offset, target } => {
match target.get_parent_location(generic_pointer_size) {
Ok((inner_parent, innermost_offset)) => Ok((
Self::Pointer {
offset: *offset,
target: Box::new(inner_parent),
},
innermost_offset,
)),
Err(inner_offset) => Ok((
Self::Location {
offset: *offset,
size: generic_pointer_size,
},
inner_offset,
)),
}
}
}
}
/// Add an offset to a memory location.
pub fn add_offset(&mut self, addendum: i64) {
match self {
Self::Location { offset, .. } => *offset += addendum,
Self::Pointer { target, .. } => target.add_offset(addendum),
}
}
/// Add an offset to the root location of the memory location.
pub fn add_offset_at_root(&mut self, addendum: i64) {
match self {
Self::Location { offset, .. } | Self::Pointer { offset, .. } => *offset += addendum,
}
}
/// Dereference the pointer that `self` is pointing to.
///
/// Panics if the old value of `self` is not pointer-sized.
pub fn dereference(&mut self, new_size: ByteSize, generic_pointer_size: ByteSize) {
match self {
Self::Pointer { target, .. } => target.dereference(new_size, generic_pointer_size),
Self::Location { offset, size } => {
assert_eq!(
*size, generic_pointer_size,
"Cannot dereference an abstract memory location that is not pointer-sized."
);
*self = Self::Pointer {
offset: *offset,
target: Box::new(Self::Location {
offset: 0,
size: new_size,
}),
}
}
};
}
/// Extend the location string by adding further derefence operations to it according to the given extension.
pub fn extend(&mut self, extension: AbstractMemoryLocation, generic_pointer_size: ByteSize) {
match self {
Self::Location { offset, size } => {
assert_eq!(*size, generic_pointer_size);
*self = Self::Pointer {
offset: *offset,
target: Box::new(extension),
};
}
Self::Pointer { target, .. } => target.extend(extension, generic_pointer_size),
}
}
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
/// Get the recursion depth of the abstract memory location,
/// i.e. how many times one has to dereference a pointer until reaching the actual location.
pub fn recursion_depth(&self) -> u64 {
match self {
Self::Location { .. } => 0,
Self::Pointer { target, .. } => 1 + target.recursion_depth(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "[0x{offset:x}]"),
Self::Pointer { offset, target } => write!(formatter, "[0x{offset:x}]{target}"),
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl AbstractMemoryLocation {
/// Mock a memory location with a given sequence of offsets.
/// The first element in the sequence is the root offset.
pub fn mock(offsets: &[i64], size: impl Into<ByteSize>) -> AbstractMemoryLocation {
match offsets {
[] => panic!(),
[offset] => AbstractMemoryLocation::Location {
offset: *offset,
size: size.into(),
},
[offset, tail @ ..] => AbstractMemoryLocation::Pointer {
offset: *offset,
target: Box::new(AbstractMemoryLocation::mock(tail, size)),
},
}
}
}
#[test]
fn test_mock() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
assert_eq!(&format!("{loc}"), "[0x1][0x2][0x3]");
}
#[test]
fn test_get_parent_location() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
let (parent_loc, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent_loc, AbstractMemoryLocation::mock(&[1, 2], 8));
assert_eq!(last_offset, 3);
let loc = AbstractMemoryLocation::mock(&[1], 4);
assert!(loc.get_parent_location(ByteSize::new(8)).is_err());
}
#[test]
fn test_offset_addendums() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
loc.add_offset(6);
assert_eq!(&loc, &AbstractMemoryLocation::mock(&[1, 2, 9], 4));
loc.add_offset_at_root(-5);
assert_eq!(&loc, &AbstractMemoryLocation::mock(&[-4, 2, 9], 4));
}
#[test]
fn test_dereference() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
loc.dereference(ByteSize::new(8), ByteSize::new(4));
assert_eq!(loc, AbstractMemoryLocation::mock(&[1, 2, 3, 0], 8))
}
#[test]
fn test_extend() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
let extension = AbstractMemoryLocation::mock(&[4, 5, 6], 1);
loc.extend(extension, ByteSize::new(4));
assert_eq!(loc, AbstractMemoryLocation::mock(&[1, 2, 3, 4, 5, 6], 1));
}
#[test]
fn test_recursion_depth() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
assert_eq!(loc.recursion_depth(), 2);
let loc = AbstractMemoryLocation::mock(&[1], 4);
assert_eq!(loc.recursion_depth(), 0);
}
}
......@@ -3,12 +3,17 @@ use crate::prelude::*;
use derive_more::Deref;
use std::sync::Arc;
mod location;
pub use location::AbstractLocation;
mod mem_location;
pub use mem_location::AbstractMemoryLocation;
/// An abstract identifier is used to identify an object or a value in an abstract state.
///
/// Since many program states can be represented by the same abstract state in data-flow analysis,
/// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states.
/// Abstract identifiers achieve this by identifying a *time*, i.e. a specific abstract state,
/// and a *location*, i.e. a recipe for abstracting a concrete value from any concrete state that is represented by the abstract state.
/// and a *location*, i.e. a recipe for computing a concrete value from any concrete state that is represented by the abstract state.
/// The value in question then serves as the identifier.
/// For example, a pointer may uniquely determine the memory object it is pointing to.
/// Or a value may represent the value of a variable at a certain time,
......@@ -20,15 +25,15 @@ use std::sync::Arc;
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited.
///
/// Alternatively one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Alternatively, one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Path hints are given as a possibly empty array of time identifiers.
/// To prevent infinitely long path hints, each time identifier is only allowed to appear at most once in the array.
/// The specific meaning of the path hints depends upon the use case.
///
/// An abstract identifier is given by a time identifier, a location identifier and a path hints array (containing time identifiers).
///
/// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`.
/// For the location identifier see [`AbstractLocation`].
/// The time identifier is given by a [`Tid`].
/// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)]
......@@ -161,136 +166,6 @@ impl std::fmt::Display for AbstractIdentifier {
}
}
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is always a register.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidentally describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
/// The location is given by a register.
Register(Variable),
/// The value itself is a constant address to global memory.
/// Note that the `size` is the size of the pointer and not the size
/// of the value residing at the specific address in global memory.
GlobalAddress {
/// The address in global memory.
address: u64,
/// The byte size of the address (not the pointed-to value!).
size: ByteSize,
},
/// The location is in memory.
/// One needs to follow the pointer in the given register
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(Variable, AbstractMemoryLocation),
/// The location is in memory.
/// One needs to follow the pointer located at the given global address
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
GlobalPointer(u64, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(var) => write!(formatter, "{}", var.name),
Self::GlobalAddress { address, size: _ } => write!(formatter, "0x{address:x}"),
Self::Pointer(var, location) => write!(formatter, "{}->{}", var.name, location),
Self::GlobalPointer(address, location) => {
write!(formatter, "0x{address:x}->{location}")
}
}
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(variable.clone()))
}
/// Create an abstract location on the stack.
/// The returned location describes the value of the given `size`
/// at the given `offset` relative to the memory location that the `stack_register` is pointing to.
pub fn from_stack_position(
stack_register: &Variable,
offset: i64,
size: ByteSize,
) -> AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Create an abstract location representing an address pointing to global memory.
pub fn from_global_address(address: &Bitvector) -> AbstractLocation {
let size = address.bytesize();
let address = address
.try_to_u64()
.expect("Global address larger than 64 bits encountered.");
AbstractLocation::GlobalAddress { address, size }
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::GlobalAddress { size, .. } => *size,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
mem_location.bytesize()
}
}
}
}
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: i64,
/// The size in bytes of the value that the memory location points to.
size: ByteSize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: i64,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
impl AbstractMemoryLocation {
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "({offset})"),
Self::Pointer { offset, target } => write!(formatter, "({offset})->{target}"),
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
......@@ -313,6 +188,20 @@ pub mod tests {
.unwrap(),
)
}
/// Mock an abstract identifier with the given TID name
/// and with a nested abstract location starting at the register given by `var`.
pub fn mock_nested(
tid: impl ToString,
var: &str,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> Self {
AbstractIdentifier::new(
Tid::new(tid.to_string()),
AbstractLocation::mock(var, offsets, size),
)
}
}
#[test]
......
use super::*;
use crate::{bitvec, variable};
use crate::{analysis::forward_interprocedural_fixpoint::Context as _, bitvec, def, variable};
#[test]
fn test_compute_return_values_of_call() {
......@@ -25,21 +25,27 @@ fn test_compute_return_values_of_call() {
&call,
);
let expected_val = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("call_tid"), &variable!("RAX:8")),
AbstractIdentifier::mock("call_tid", "RAX", 8),
bitvec!("0x0:8").into(),
);
assert_eq!(return_values.iter().len(), 3);
assert_eq!(return_values[0], (&variable!("RAX:8"), expected_val));
// Test returning a known value.
let param_ref = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("callee"), &variable!("RDI:8")),
AbstractIdentifier::mock("callee", "RDI", 8),
bitvec!("0x0:8").into(),
);
callee_state.set_register(&variable!("RAX:8"), param_ref);
let expected_val = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("caller"), &variable!("RDI:8")),
let expected_val = DataDomain::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("caller", "RDI", 8),
bitvec!("0x0:8").into(),
);
),
(
AbstractIdentifier::mock("call_tid", "RAX", 8),
bitvec!("0x0:8").into(),
),
]));
let return_values = context.compute_return_values_of_call(
&mut caller_state,
&callee_state,
......@@ -69,7 +75,7 @@ fn test_call_stub_handling() {
assert_eq!(
state.get_params_of_current_function(),
vec![(
Arg::from_var(variable!("r0:4"), None),
&AbstractLocation::from_var(&variable!("r0:4")).unwrap(),
AccessPattern::new().with_read_flag()
)]
);
......@@ -97,14 +103,14 @@ fn test_call_stub_handling() {
assert_eq!(
params[0],
(
Arg::from_var(variable!("r0:4"), None),
&AbstractLocation::from_var(&variable!("r0:4")).unwrap(),
AccessPattern::new_unknown_access()
)
);
assert_eq!(
params[1],
(
Arg::from_var(variable!("r2:4"), None),
&AbstractLocation::from_var(&variable!("r2:4")).unwrap(),
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
......@@ -114,6 +120,51 @@ fn test_call_stub_handling() {
}
#[test]
fn test_stack_register_adjustment_after_call() {
let project = Project::mock_x64();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let mut state_before_call = State::mock_x64("mock_fn");
let stack_id = AbstractIdentifier::mock("mock_fn", "RSP", 8);
state_before_call.set_register(
&variable!("RSP:8"),
DataDomain::from_target(stack_id.clone(), bitvec!("0x-20:8").into()),
);
let call_term = Term {
tid: Tid::new("call_tid"),
term: Jmp::CallInd {
target: Expression::Var(variable!("R15:8")),
return_: Some(Tid::new("return_")),
},
};
// Test adjustment on extern calls
let state_after_call = context
.update_call_stub(&state_before_call, &call_term)
.unwrap();
let adjusted_sp = state_after_call.get_register(&variable!("RSP:8"));
assert_eq!(
adjusted_sp,
DataDomain::from_target(stack_id.clone(), bitvec!("0x-18:8").into())
);
// Test adjustment on intern calls
let state_before_return = State::mock_x64("callee");
let state_after_call = context
.update_return(
Some(&state_before_return),
Some(&state_before_call),
&call_term,
&call_term,
&None,
)
.unwrap();
let adjusted_sp = state_after_call.get_register(&variable!("RSP:8"));
assert_eq!(
adjusted_sp,
DataDomain::from_target(stack_id.clone(), bitvec!("0x-18:8").into())
);
}
#[test]
fn test_get_global_mem_address() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
......@@ -135,3 +186,82 @@ fn test_get_global_mem_address() {
let result = context.get_global_mem_address(&value);
assert!(result.is_none());
}
#[test]
fn test_generation_of_nested_ids_and_access_patterns_on_load_and_store() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let state = State::mock_arm32();
// Load from a tracked pointer value
let def = def!["load_instr: r0:4 := Load from r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let loaded_value = new_state.get_register(&variable!("r0:4"));
assert_eq!(
loaded_value,
DataDomain::from_target(
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r1:4", &[16], 4)
),
bitvec!("0x0:4").into()
)
);
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 1);
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
)));
// Load from an untracked register value
let def = def!["load_instr: r0:4 := Load from r8:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let loaded_value = new_state.get_register(&variable!("r0:4"));
assert!(loaded_value.is_top());
assert_eq!(new_state.get_params_of_current_function(), []);
// Store a tracked pointer value
let def = def!["store_instr: Store at r0:4 := r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 2);
assert!(params.contains(&(
&AbstractLocation::mock("r0:4", &[], 4),
AccessPattern::new()
.with_read_flag()
.with_mutably_dereferenced_flag()
)));
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new().with_read_flag()
)));
// Store to an untracked register value
let def = def!["store_instr: Store at r8:4 := r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 1);
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new().with_read_flag()
)));
}
#[test]
fn test_stack_param_loaded_but_not_accessed() {
// Regression test for the case that a stack parameter is loaded into a register but then not directly accessed.
// In such a case the stack parameter must still be proactively marked as read,
// because its later usage might simply be missed by the analysis
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let state = State::mock_arm32();
let def = def!["r0:4 := Load from sp:4"];
let new_state = context.update_def(&state, &def).unwrap();
let fn_sig = new_state.get_params_of_current_function();
assert!(fn_sig.contains(&(
&AbstractLocation::mock("sp:4", &[0], 4),
AccessPattern::new().with_read_flag()
)));
}
use super::*;
use crate::{bitvec, variable};
#[test]
fn test_generate_return_values_for_call() {
let mut state = State::mock_arm32();
let input_ids = BTreeSet::from([
AbstractIdentifier::mock("mock_fn", "r0", 4),
AbstractIdentifier::mock("mock_fn", "big_register", 16),
]);
let return_args = [Arg::mock_register("r1", 4)];
let call_tid = Tid::new("call");
state.generate_return_values_for_call(&input_ids, &return_args, &call_tid);
assert!(state
.tracked_ids
.get(&AbstractIdentifier::mock("call", "r1", 4))
.is_some());
let expected_return_value = DataDomain::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("mock_fn", "r0", 4),
BitvectorDomain::new_top(ByteSize::new(4)),
),
(
AbstractIdentifier::mock("call", "r1", 4),
bitvec!("0x0:4").into(),
),
]));
assert_eq!(state.register[&variable!("r1:4")], expected_return_value);
}
#[test]
fn test_get_params_of_current_function() {
let mut state = State::mock_arm32();
let param_one = AbstractIdentifier::mock("mock_fn", "param_one", 4);
let param_two = AbstractIdentifier::mock("mock_fn", "param_two", 4);
let not_param = AbstractIdentifier::mock("call_tid", "r0", 4);
let non_param_stack_offset = AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[-8], 4),
);
let global_param = AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::GlobalAddress {
address: 0x1000,
size: ByteSize::new(4),
},
);
state
.tracked_ids
.insert(param_one.clone(), AccessPattern::new().with_read_flag());
state.tracked_ids.insert(
param_two.clone(),
AccessPattern::new().with_dereference_flag(),
);
state
.tracked_ids
.insert(not_param, AccessPattern::new_unknown_access());
state
.tracked_ids
.insert(non_param_stack_offset, AccessPattern::new_unknown_access());
state
.tracked_ids
.insert(global_param.clone(), AccessPattern::new_unknown_access());
let params = state.get_params_of_current_function();
let global_params = state.get_global_mem_params_of_current_function();
assert_eq!(
params,
Vec::from([
(
param_one.get_location(),
AccessPattern::new().with_read_flag()
),
(
param_two.get_location(),
AccessPattern::new().with_dereference_flag()
)
])
);
assert_eq!(
global_params,
Vec::from([(
global_param.get_location(),
AccessPattern::new_unknown_access()
)])
);
}
#[test]
fn test_merge_parameter_access() {
let mut state = State::mock_arm32();
let num_original_tracked_ids = state.tracked_ids.len();
let global_memory = RuntimeMemoryImage::mock();
state.register.insert(
variable!("sp:4"),
DataDomain::from_target(state.stack_id.clone(), bitvec!("0x-20:4").into()),
);
state.register.insert(
variable!("r1:4"),
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r0", 4),
bitvec!("0x2:4").into(),
),
);
let param_loc = AbstractLocation::mock("r0:4", &[], 4);
let stack_param_loc = AbstractLocation::mock("sp:4", &[0], 4);
let high_stack_param_loc = AbstractLocation::mock("sp:4", &[32], 4);
let nested_param_loc = AbstractLocation::mock("r1:4", &[6], 4);
let params = [
(&param_loc, AccessPattern::new_unknown_access()),
(&stack_param_loc, AccessPattern::new_unknown_access()),
(&high_stack_param_loc, AccessPattern::new_unknown_access()),
(&nested_param_loc, AccessPattern::new_unknown_access()),
];
state.merge_parameter_access(&params, &global_memory);
// Merge normal param access
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
param_loc.clone()
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
// Do not merge/track access to local stack variable
assert!(state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[-32], 4)
))
.is_none());
// Generate new stack param if necessary
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[0], 4)
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
// Track new nested parameter (in the right register)
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r0:4", &[8], 4)
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
assert_eq!(state.tracked_ids.len(), num_original_tracked_ids + 2);
}
#[test]
fn test_eval_param_location() {
let mut state = State::mock_arm32();
let global_memory = RuntimeMemoryImage::mock();
// Param is a register
state
.register
.insert(variable!("r0:4"), bitvec!("0x123:4").into());
let value = state.eval_param_location(&AbstractLocation::mock("r0:4", &[], 4), &global_memory);
assert_eq!(value, bitvec!("0x123:4").into());
// Param is a nested register (and values in nested objects are not tracked)
state.register.insert(
variable!("r0:4"),
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r3", 4),
bitvec!("0x0:4").into(),
),
);
let value = state.eval_param_location(&AbstractLocation::mock("r0:4", &[8], 4), &global_memory);
assert_eq!(
value,
DataDomain::from_target(
AbstractIdentifier::new(Tid::new("mock_fn"), AbstractLocation::mock("r3:4", &[8], 4)),
bitvec!("0x0:4").into()
)
);
// Read the value at a stack offset
state
.stack
.insert_at_byte_index(bitvec!("0x42:4").into(), -8);
let value =
state.eval_param_location(&AbstractLocation::mock("sp:4", &[-8], 4), &global_memory);
assert_eq!(value, bitvec!("0x42:4").into());
// Read a nested pointer from the stack. The read has to remove one level of indirection if the stack value can be read.
state.stack.insert_at_byte_index(
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r0", 4),
bitvec!("0x5:4").into(),
),
-8,
);
let value = state.eval_param_location(
&AbstractLocation::mock("sp:4", &[-8, 2, 6], 4),
&global_memory,
);
assert_eq!(
value,
DataDomain::from_target(
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r0:4", &[7, 6], 4)
),
bitvec!("0x0:4").into()
)
);
}
......@@ -26,18 +26,6 @@ fn mock_stack_id() -> AbstractIdentifier {
AbstractIdentifier::from_var(Tid::new("mock_fn"), &variable!("sp:4"))
}
/// Mock an abstract ID of a stack parameter
fn mock_stack_param_id(offset: i64, size: u64) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::from_stack_position(
mock_stack_id().unwrap_register(),
offset,
ByteSize::new(size),
),
)
}
#[test]
fn test_new() {
let state = State::mock_arm32();
......@@ -65,61 +53,6 @@ fn test_new() {
}
#[test]
fn test_store_and_load_from_stack() {
let mut state = State::mock_arm32();
let address = DataDomain::from_target(mock_stack_id(), bitvec!("-4:4").into());
let value: DataDomain<BitvectorDomain> = bitvec!("0x0:4").into();
// write and load a value to the current stack frame
state.write_value(address.clone(), value.clone());
assert_eq!(state.stack.iter().len(), 1);
assert_eq!(
state.stack.get(bitvec!("-4:4"), ByteSize::new(4)),
value.clone()
);
assert_eq!(state.load_value(address, ByteSize::new(4), None), value);
// Load a parameter register and check that the parameter gets generated
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x4:4").into());
let stack_param_id = mock_stack_param_id(4, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
assert_eq!(state.tracked_ids.iter().len(), 6);
assert_eq!(
state.load_value(address.clone(), ByteSize::new(4), None),
stack_param
);
assert_eq!(state.tracked_ids.iter().len(), 7);
assert_eq!(
state
.tracked_ids
.get(&stack_param_id)
.unwrap()
.is_accessed(),
false
); // The load method does not set access flags.
}
#[test]
fn test_load_unsized_from_stack() {
let mut state = State::mock_arm32();
// Load an existing stack param (generated by a sized load at the same address)
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x0:4").into());
let stack_param_id = mock_stack_param_id(0, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
state.load_value(address, ByteSize::new(4), None);
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x0:4").into());
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Load a non-existing stack param
let stack_param_id = mock_stack_param_id(4, 1);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:1").into());
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x4:4"));
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Unsized load from the current stack frame
let unsized_load = state.load_unsized_value_from_stack(bitvec!("-4:4"));
assert_eq!(unsized_load, DataDomain::new_top(ByteSize::new(1)));
}
#[test]
fn test_eval() {
let mut state = State::mock_arm32();
// Test the eval method
......
use super::*;
use crate::{expr, variable};
use crate::variable;
/// Mock the abstract location of a global parameter.
fn mock_global_x64(address: u64) -> AbstractLocation {
AbstractLocation::GlobalAddress {
address: address,
size: ByteSize::new(8),
}
}
impl FunctionSignature {
/// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably,
/// one mutably accessed global variable at address 0x2000
......@@ -7,40 +16,45 @@ impl FunctionSignature {
pub fn mock_x64() -> FunctionSignature {
let mut write_access_pattern = AccessPattern::new();
write_access_pattern.set_unknown_access_flags();
let parameters = HashMap::from_iter([
let parameters = BTreeMap::from_iter([
(
Arg::from_var(variable!("RDI:8"), None),
AbstractLocation::from_var(&variable!("RDI:8")).unwrap(),
AccessPattern::new(),
),
(
Arg::from_var(variable!("RSI:8"), None),
AbstractLocation::from_var(&variable!("RSI:8")).unwrap(),
write_access_pattern,
),
]);
FunctionSignature {
parameters,
global_parameters: HashMap::from([
(0x2000, AccessPattern::new_unknown_access()),
(0x3000, AccessPattern::new().with_dereference_flag()),
global_parameters: BTreeMap::from([
(mock_global_x64(0x2000), AccessPattern::new_unknown_access()),
(
mock_global_x64(0x3000),
AccessPattern::new().with_dereference_flag(),
),
]),
}
}
}
fn mock_stack_arg(address: Expression, size: u64) -> Arg {
Arg::Stack {
address,
size: size.into(),
data_type: None,
}
fn mock_stack_arg(offset: i64, size: u64) -> AbstractLocation {
AbstractLocation::Pointer(
variable!("RSP:8"),
AbstractMemoryLocation::Location {
offset: offset,
size: ByteSize::new(size),
},
)
}
#[test]
fn test_two_parameter_overlapping_merging() {
let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1004:8"), 8);
let stack_parm_1 = mock_stack_arg(0x1000, 8);
let stack_parm_2 = mock_stack_arg(0x1004, 8);
func_sig
.parameters
......@@ -51,13 +65,10 @@ fn test_two_parameter_overlapping_merging() {
assert_eq!(
func_sig.sanitize(&proj),
(
vec!["Unexpected stack parameter size".to_string()],
vec!["Merged a stack parameter, that intersect another but is no subset".to_string()]
)
);
let mut expected_function_sig = FunctionSignature::mock_x64();
let expected_stack_arg = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 12);
let expected_stack_arg = mock_stack_arg(0x1000, 12);
expected_function_sig
.parameters
......@@ -69,10 +80,10 @@ fn test_two_parameter_overlapping_merging() {
fn test_merging_multiple_parameters() {
let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 1);
let stack_parm_3 = mock_stack_arg(expr!("RSP:8 + 0x1007:8"), 1);
let stack_parm_4 = mock_stack_arg(expr!("RSP:8 + 0x1008:8"), 8);
let stack_parm_1 = mock_stack_arg(0x8, 8);
let stack_parm_2 = mock_stack_arg(0x8, 1);
let stack_parm_3 = mock_stack_arg(0xf, 1);
let stack_parm_4 = mock_stack_arg(0x10, 8);
func_sig.parameters.extend([
(stack_parm_1.clone(), AccessPattern::new()),
......@@ -80,7 +91,8 @@ fn test_merging_multiple_parameters() {
(stack_parm_3, AccessPattern::new()),
(stack_parm_4.clone(), AccessPattern::new()),
]);
assert_eq!((vec![], vec![]), func_sig.sanitize(&proj));
let logs = func_sig.sanitize(&proj);
assert_eq!(logs, Vec::<String>::new());
let mut expected_function_sig = FunctionSignature::mock_x64();
expected_function_sig.parameters.extend([
......@@ -93,8 +105,8 @@ fn test_merging_multiple_parameters() {
fn test_log_messages() {
let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1001:8"), 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1007:8"), 4);
let stack_parm_1 = mock_stack_arg(0x1001, 8);
let stack_parm_2 = mock_stack_arg(0x1007, 4);
func_sig.parameters.extend([
(stack_parm_1.clone(), AccessPattern::new()),
......@@ -103,13 +115,10 @@ fn test_log_messages() {
let logs = func_sig.sanitize(&proj);
assert_eq!(
(
vec![
"Unexpected stack parameter size".to_string(),
"Unexpected stack parameter alignment".to_string()
],
vec!["Merged a stack parameter, that intersect another but is no subset".to_string()]
),
logs
);
}
......@@ -6,50 +6,52 @@ impl<'a> Context<'a> {
/// to the value that represents it in the caller.
///
/// For parameter IDs this is the value of the parameter on function call.
/// For IDs of objects created in the callee it is the ID together with a path hint given by the call TID.
/// For IDs of objects created in the callee it is the ID itself.
/// For other IDs (including the callee stack frame ID) it is a `Top` value,
/// i.e. the value of the ID should be unknown to the caller.
///
/// Note that this function assumes that callee-originating IDs have already been renamed
/// to the name they should represent in the caller beforehand.
pub fn create_callee_id_to_caller_data_map(
&self,
state_before_call: &State,
state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let stack_register = &self.project.stack_pointer_register;
let mut id_map = BTreeMap::new();
let callee_tid = state_before_return.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap();
if let Some(callee_fn_sig) = self.fn_signatures.get(callee_tid) {
for param in callee_fn_sig.parameters.keys() {
let param_id = AbstractIdentifier::from_arg(callee_tid, param);
if let Ok(param_value) =
state_before_call.eval_parameter_arg(param, &self.project.runtime_memory_image)
{
let param_id = AbstractIdentifier::new(callee_tid.clone(), param.clone());
let param_value = state_before_call
.eval_abstract_location(param, &self.project.runtime_memory_image);
id_map.insert(param_id, param_value);
} else {
id_map.insert(param_id, Data::new_top(param.bytesize()));
}
for global_param in callee_fn_sig.global_parameters.keys() {
let global_param_id =
AbstractIdentifier::new(callee_tid.clone(), global_param.clone());
let global_param_value = state_before_call
.eval_abstract_location(global_param, &self.project.runtime_memory_image);
id_map.insert(global_param_id, global_param_value);
}
}
for object_id in state_before_return.memory.get_all_object_ids() {
if object_id.get_tid() != callee_tid || !object_id.get_path_hints().is_empty() {
// Object is neither a parameter object nor the stack frame of the callee.
if let Ok(new_object_id) = object_id.with_path_hint(call_tid.clone()) {
id_map.insert(
object_id,
object_id.clone(),
Data::from_target(
new_object_id,
object_id,
Bitvector::zero(stack_register.size.into()).into(),
),
);
} else {
id_map.insert(object_id, Data::new_top(stack_register.size));
}
}
}
id_map.insert(
state_before_return.stack_id.clone(),
Data::new_top(stack_register.size),
);
// Also insert the global memory IDs to the map.
// Also insert the global memory ID to the map.
id_map.insert(
state_before_return.get_global_mem_id(),
Data::from_target(
......@@ -61,6 +63,55 @@ impl<'a> Context<'a> {
id_map
}
/// Create a map that maps callee IDs to the value assigned to it in the caller after a return instruction.
///
/// This is *not* the map used in the internal `update_return` handling.
/// Instead, the created map combines several ID renaming steps used internally into one renaming map.
/// The map is intended for use in other analyses depending on the PointerInference,
/// but not in the PointerInference itself.
pub fn create_full_callee_id_to_caller_data_map(
&self,
state_before_call: &State,
state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let cconv = &self.project.program.term.subs[state_before_return.get_fn_tid()]
.term
.calling_convention;
let cconv = match self.project.get_specific_calling_convention(cconv) {
Some(cconv) => cconv,
None => {
return BTreeMap::new();
}
};
let callee_fn_sig = self
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
let mut minimized_return_state = state_before_return.clone();
minimized_return_state.minimize_before_return_instruction(callee_fn_sig, cconv);
let mut location_to_data_map =
minimized_return_state.map_abstract_locations_to_pointer_data(call_tid);
minimized_return_state.filter_location_to_pointer_data_map(&mut location_to_data_map);
let mut replacement_map =
minimized_return_state.get_id_to_unified_ids_replacement_map(&location_to_data_map);
minimized_return_state.merge_mem_objects_with_unique_abstract_location(call_tid);
let unified_to_caller_replacement_map =
self.create_callee_id_to_caller_data_map(state_before_call, &minimized_return_state);
// In the ID-to-unified-ID map replace parameter IDs with their corresponding values in the caller.
for value in replacement_map.values_mut() {
value.replace_all_ids(&unified_to_caller_replacement_map);
}
// Add all parameter IDs to the map
let callee_tid = state_before_return.get_fn_tid();
for (id, value) in unified_to_caller_replacement_map {
if id.get_tid() == callee_tid && id.get_path_hints().is_empty() {
replacement_map.insert(id, value);
}
}
replacement_map
}
/// Create a map from the parameter IDs (of the function that the given state corresponds to)
/// to the corresponding access patterns.
pub fn create_id_to_access_pattern_map(
......@@ -71,7 +122,11 @@ impl<'a> Context<'a> {
let fn_tid = state.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(fn_tid).unwrap();
for (param, access_pattern) in &callee_fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(fn_tid, param);
let param_id = AbstractIdentifier::new(fn_tid.clone(), param.clone());
id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
}
for (param, access_pattern) in &callee_fn_sig.global_parameters {
let param_id = AbstractIdentifier::new(fn_tid.clone(), param.clone());
id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
}
......
......@@ -97,6 +97,18 @@ impl<'a> Context<'a> {
}
}
/// If `result` is an `Err`, log the error message as an error message through the `log_collector` channel.
pub fn log_error(&self, result: Result<(), Error>, location: Option<&Tid>) {
if let Err(err) = result {
let mut log_message =
LogMessage::new_error(format!("{err}")).source("Pointer Inference");
if let Some(loc) = location {
log_message = log_message.location(loc.clone());
};
let _ = self.log_collector.send(LogThreadMsg::Log(log_message));
}
}
/// Detect and log if the stack pointer is not as expected when returning from a function.
fn detect_stack_pointer_information_loss_on_return(
&self,
......@@ -299,7 +311,7 @@ impl<'a> Context<'a> {
/// Merge global memory data from the callee global memory object to the caller global memory object
/// if the corresponding global variable is marked as mutable in both the caller and callee.
fn merge_global_mem_from_callee(
fn merge_non_nested_global_mem_from_callee(
&self,
caller_state: &mut State,
callee_global_mem: &AbstractObject,
......@@ -360,23 +372,47 @@ fn compute_call_return_global_var_access_intervals(
caller_fn_sig: &FunctionSignature,
callee_fn_sig: &FunctionSignature,
) -> BTreeMap<u64, AccessPattern> {
let caller_mut_indices: BTreeSet<u64> = caller_fn_sig
.global_parameters
.iter()
.filter_map(|(location, access_pattern)| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
if access_pattern.is_mutably_dereferenced() {
return Some(*address);
}
}
None
})
.collect();
let callee_mut_indices: BTreeSet<u64> = callee_fn_sig
.global_parameters
.iter()
.filter_map(|(location, access_pattern)| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
if access_pattern.is_mutably_dereferenced() {
return Some(*address);
}
}
None
})
.collect();
let mut intervals: BTreeMap<u64, AccessPattern> = caller_fn_sig
.global_parameters
.keys()
.chain(callee_fn_sig.global_parameters.keys())
.map(|index| (*index, AccessPattern::new()))
.filter_map(|location| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
Some((*address, AccessPattern::new()))
} else {
None
}
})
.collect();
for (index, access_pattern) in intervals.iter_mut() {
if let (Some(caller_pattern), Some(callee_pattern)) = (
caller_fn_sig.global_parameters.get(index),
callee_fn_sig.global_parameters.get(index),
) {
if caller_pattern.is_mutably_dereferenced() && callee_pattern.is_mutably_dereferenced()
{
if caller_mut_indices.contains(index) && callee_mut_indices.contains(index) {
access_pattern.set_mutably_dereferenced_flag();
}
}
}
intervals
}
......
......@@ -173,6 +173,12 @@ fn update_return() {
&variable!("RDX:8"),
Data::from_target(new_id("callee", "RDI"), bv(0)),
);
state_before_return
.memory
.get_object_mut(&callee_created_heap_id)
.unwrap()
.set_value(bitvec!("0x42:8").into(), &bitvec!("0x6:8").into())
.unwrap();
let state_before_call = State::new(&variable!("RSP:8"), Tid::new("caller"), BTreeSet::new());
let mut state_before_call = context
......@@ -210,10 +216,8 @@ fn update_return() {
assert_eq!(
state.get_register(&variable!("RAX:8")),
Data::from_target(
callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.unwrap(),
bv(16).into()
AbstractIdentifier::mock("call_callee", "RAX", 8),
bv(0).into()
)
);
assert_eq!(
......@@ -234,15 +238,12 @@ fn update_return() {
.get_all_object_ids()
.get(&param_obj_id)
.is_some());
assert!(state
let value = state
.memory
.get_all_object_ids()
.get(
&callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.get_object(&AbstractIdentifier::mock("call_callee", "RAX", 8))
.unwrap()
)
.is_some());
.get_value(bitvec!("0x-a:8"), ByteSize::new(8));
assert_eq!(value, bitvec!("0x42:8").into());
}
#[test]
......@@ -297,6 +298,20 @@ fn get_unsound_caller_ids() {
new_id("callee", "RSI"),
Data::from_target(new_id("caller", "RAX"), bv(2).into()),
);
callee_id_to_caller_data_map.insert(
AbstractIdentifier::new(
Tid::new("callee"),
AbstractLocation::mock_global(0x2000, &[], 8),
),
bv(42).into(),
);
callee_id_to_caller_data_map.insert(
AbstractIdentifier::new(
Tid::new("callee"),
AbstractLocation::mock_global(0x3000, &[], 8),
),
bv(42).into(),
);
let callee_tid = Tid::new("callee");
let callee_state = State::from_fn_sig(
......@@ -395,7 +410,7 @@ fn test_merge_global_mem_from_callee() {
Data::from_target(caller_state.get_global_mem_id(), bitvec!("0:8").into()),
)]);
context.merge_global_mem_from_callee(
context.merge_non_nested_global_mem_from_callee(
&mut caller_state,
callee_global_mem,
&replacement_map,
......
......@@ -124,6 +124,20 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
return None;
}
};
let callee_fn_sig = match self.fn_signatures.get(state_before_return.get_fn_tid()) {
Some(fn_sig) => fn_sig,
None => {
let location = state_before_return.get_fn_tid();
self.log_error(
Err(anyhow!(
"Internal function {} has no function signature.",
location
)),
Some(location),
);
return None;
}
};
// Detect possible information loss on the stack pointer and report it.
if let Err(err) = self.detect_stack_pointer_information_loss_on_return(state_before_return)
......@@ -133,19 +147,19 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
// or a call to a non-returning extern function that was not marked as non-returning.
return None;
}
// Minimize the callee state and replace callee-originating object IDs whenever possible.
let mut state_before_return = state_before_return.clone();
state_before_return.minimize_before_return_instruction(callee_fn_sig, cconv);
state_before_return.merge_mem_objects_with_unique_abstract_location(&call_term.tid);
// Create a mapping of IDs from the callee to IDs that should be used in the caller.
let id_map = self.create_callee_id_to_caller_data_map(
state_before_call,
state_before_return,
&call_term.tid,
);
let id_map =
self.create_callee_id_to_caller_data_map(state_before_call, &state_before_return);
let callee_id_to_access_pattern_map =
self.create_id_to_access_pattern_map(state_before_return);
self.create_id_to_access_pattern_map(&state_before_return);
// Identify caller IDs for which the callee analysis may be unsound for this callsite.
let unsound_caller_ids =
self.get_unsound_caller_ids(&id_map, &callee_id_to_access_pattern_map);
// TODO: Unsound caller IDs occur too often to log the cases right now.
// FIXME: Unsound caller IDs occur too often to log the cases right now.
// We have to investigate the reasons for it (maybe too many parameters on the caller stack?)
// and find better heuristics to prevent them poisoning the analysis soundness.
......@@ -167,11 +181,7 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
continue;
}
if *callee_object_id == state_before_return.get_global_mem_id() {
let callee_fn_sig = self
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
self.merge_global_mem_from_callee(
self.merge_non_nested_global_mem_from_callee(
&mut state_after_return,
callee_object,
&id_map,
......@@ -196,11 +206,9 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
.is_none()
{
// Add a callee object that does not correspond to a parameter to the caller or the stack of the callee.
if let Ok(new_object_id) = callee_object_id.with_path_hint(call_term.tid.clone()) {
state_after_return
.memory
.insert(new_object_id, callee_object);
}
.insert(callee_object_id.clone(), callee_object);
} else {
// The callee object is a parameter object.
self.log_debug(
......@@ -217,7 +225,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
state_after_return
.memory
.assume_arbitrary_writes_to_object(id, &BTreeSet::new());
// TODO: We should specify more possible reference targets.
}
// Cleanup
state_after_return.remove_unreferenced_objects();
......
......@@ -49,6 +49,8 @@ pub use state::State;
/// The version number of the analysis.
const VERSION: &str = "0.2";
/// The recursion limit for nested pointers.
const POINTER_RECURSION_DEPTH_LIMIT: u64 = 2;
/// The name and version number of the "Memory" CWE check.
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
......@@ -315,7 +317,7 @@ impl<'a> PointerInference<'a> {
}) => (state_before_call, state_before_return),
_ => continue,
};
let id_to_data_map = context.create_callee_id_to_caller_data_map(
let id_to_data_map = context.create_full_callee_id_to_caller_data_map(
state_before_call,
state_before_return,
call_tid,
......
......@@ -87,11 +87,23 @@ impl AbstractObject {
inner.is_unique = false;
}
/// Mark the abstract object as unique, i.e. it represents exactly one memory object.
pub fn mark_as_unique(&mut self) {
let inner = Arc::make_mut(&mut self.inner);
inner.is_unique = true;
}
/// Get the type of the memory object.
pub fn get_object_type(&self) -> Option<ObjectType> {
self.inner.type_
}
/// Set the type of the memory object.
pub fn set_object_type(&mut self, object_type: Option<ObjectType>) {
let inner = Arc::make_mut(&mut self.inner);
inner.type_ = object_type;
}
/// Overwrite the values in `self` with those in `other`
/// under the assumption that the zero offset in `other` corresponds to the offset `offset_other` in `self`.
///
......
......@@ -5,7 +5,6 @@ use super::*;
impl AbstractObjectList {
/// Get a reference to the object corresponding to the given ID.
#[cfg(test)]
pub fn get_object(&self, id: &AbstractIdentifier) -> Option<&AbstractObject> {
self.objects.get(id)
}
......@@ -64,6 +63,11 @@ impl AbstractObjectList {
self.objects.iter()
}
/// Get an iterator of mutable references over the abstract objects in `self`.
pub fn iter_objects_mut(&mut self) -> impl Iterator<Item = &mut AbstractObject> {
self.objects.values_mut()
}
/// Get the number of objects that are currently tracked.
#[cfg(test)]
pub fn get_num_objects(&self) -> usize {
......
......@@ -137,6 +137,26 @@ impl AbstractObjectList {
None => Err(anyhow!("Object ID not contained in object list.")),
}
}
/// Only retain those memory objects for which the provided predicate returns `true`.
/// All memory objects for which the predicate returns `False` are removed from `self`.
pub fn retain<F>(&mut self, f: F)
where
F: FnMut(&AbstractIdentifier, &mut AbstractObject) -> bool,
{
self.objects.retain(f)
}
/// Remove an object from the object list.
/// Returns the removed object if its ID was indeed contained in the object list.
pub fn remove(&mut self, id: &AbstractIdentifier) -> Option<AbstractObject> {
self.objects.remove(id)
}
/// Return `true` if the object list contains a memory object indexed by the given ID.
pub fn contains(&self, id: &AbstractIdentifier) -> bool {
self.objects.contains_key(id)
}
}
impl AbstractDomain for AbstractObjectList {
......
......@@ -78,7 +78,7 @@ impl State {
self.write_to_address(address, &self.eval(value), global_memory)
}
/// Evaluate the given load instruction and return the data read on success.
/// Evaluate the given address expression and return the data read from that address on success.
pub fn load_value(
&self,
address: &Expression,
......@@ -86,6 +86,17 @@ impl State {
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
let address = self.eval(address);
self.load_value_from_address(&address, size, global_memory)
}
/// Load the value at the given address from the state and return the data read on success.
/// If the address contains more than one possible pointer target the results are merged for all possible pointer targets.
pub fn load_value_from_address(
&self,
address: &Data,
size: ByteSize,
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
let mut result = if let Some(global_address) = address.get_absolute_value() {
if let Ok(address_bitvector) = global_address.try_to_bitvec() {
match global_memory.read(&address_bitvector, size) {
......@@ -109,7 +120,7 @@ impl State {
} else {
Data::new_empty(size)
};
result = result.merge(&self.memory.get_value(&address, size));
result = result.merge(&self.memory.get_value(address, size));
if let Ok(offset) = result.try_to_offset() {
if result.bytesize() == self.stack_id.bytesize()
......@@ -217,6 +228,81 @@ impl State {
}
}
/// Evaluate the value of the given abstract location on the current state.
/// If the actual value cannot be determined (e.g. if an intermediate pointer returns `Top`)
/// then a `Top` value is returned.
pub fn eval_abstract_location(
&self,
location: &AbstractLocation,
global_memory: &RuntimeMemoryImage,
) -> Data {
match location {
AbstractLocation::GlobalAddress { address, size } => {
assert_eq!(*size, self.stack_id.bytesize());
Data::from_target(
self.get_global_mem_id().clone(),
Bitvector::from_u64(*address)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
)
}
AbstractLocation::GlobalPointer(address, nested_location) => {
let pointer = Data::from_target(
self.get_global_mem_id().clone(),
Bitvector::from_u64(*address)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
self.eval_abstract_memory_location(nested_location, pointer, global_memory)
}
AbstractLocation::Register(var) => self.get_register(var),
AbstractLocation::Pointer(var, nested_location) => {
let pointer = self.get_register(var);
self.eval_abstract_memory_location(nested_location, pointer, global_memory)
}
}
}
/// Evaluate the value of the given abstract memory location on the current state
/// with the given `root_pointer` as the start point of the location description.
fn eval_abstract_memory_location(
&self,
location: &AbstractMemoryLocation,
root_pointer: Data,
global_memory: &RuntimeMemoryImage,
) -> Data {
match location {
AbstractMemoryLocation::Location { offset, size } => {
let pointer = root_pointer.add_offset(
&Bitvector::from_i64(*offset)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
self.load_value_from_address(&pointer, *size, global_memory)
.unwrap_or_else(|_| Data::new_top(*size))
}
AbstractMemoryLocation::Pointer { offset, target } => {
let pointer = root_pointer.add_offset(
&Bitvector::from_i64(*offset)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
match self.load_value_from_address(
&pointer,
self.stack_id.bytesize(),
global_memory,
) {
Ok(nested_root_pointer) => self.eval_abstract_memory_location(
target,
nested_root_pointer,
global_memory,
),
Err(_) => Data::new_top(location.bytesize()),
}
}
}
}
/// Check whether the given `def` could result in a memory access through a NULL pointer.
///
/// If no NULL pointer dereference is detected then `Ok(false)` is returned.
......
use super::*;
#[test]
fn handle_store() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP");
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(0))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 32:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-32))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 + -8:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-40))
);
state
.handle_store(&expr!("RSP:8 + 8:8"), &expr!("1:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 - 8:8"), &expr!("2:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 + -16:8"), &expr!("3:8"), &global_memory)
.unwrap();
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 4:8"));
assert_eq!(
state
.load_value(&expr!("RSP:8 + 12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(1).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 - 4:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(2).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 + -12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(3).into()
);
}
#[test]
fn global_mem_access() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(
&variable!("RSP:8"),
Tid::new("func_tid"),
BTreeSet::from([0x2000]),
);
// global read-only address
let address_expr = expr!("0x1000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("0xb3b2b1b0:4").into() // note that we read in little-endian byte order
);
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
// global writeable address
let address_expr = expr!("0x2000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
DataDomain::new_top(ByteSize::new(4))
);
assert!(state
.write_to_address(&address_expr, &bitvec!("21:4").into(), &global_memory)
.is_ok());
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("21:4").into()
);
// invalid global address
let address_expr = expr!("0x3456:8");
assert!(state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.is_err());
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
}
#[test]
fn test_eval_abstract_location() {
let mut state = State::new(&variable!("RSP:8"), Tid::new("fn_tid"), BTreeSet::new());
let global_memory = RuntimeMemoryImage::mock();
let object_id = AbstractIdentifier::mock("fn_tid", "RSI", 8);
state
.memory
.add_abstract_object(object_id.clone(), ByteSize::new(8), None);
state
.memory
.get_object_mut(&state.stack_id)
.unwrap()
.set_value(
Data::from_target(object_id.clone(), bitvec!("0x0:8").into()),
&bitvec!("0x-20:8").into(),
)
.unwrap();
state
.memory
.get_object_mut(&object_id)
.unwrap()
.set_value(bitvec!("0x42:8").into(), &bitvec!("0x10:8").into())
.unwrap();
let location = AbstractLocation::mock("RSP:8", &[-32], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(
value,
Data::from_target(object_id.clone(), bitvec!("0x0:8").into())
);
let location = AbstractLocation::mock("RSP:8", &[-32, 16], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(value, bitvec!("0x42:8").into());
// Also test evaluation of a global address
state
.memory
.get_object_mut(&state.get_global_mem_id().clone())
.unwrap()
.set_value(bitvec!("0x43:8").into(), &bitvec!("0x2000:8").into())
.unwrap();
let location = AbstractLocation::mock_global(0x2000, &[0], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(value, bitvec!("0x43:8").into());
}
use super::*;
use crate::analysis::vsa_results::VsaResult;
use crate::{abstract_domain::AbstractLocation, analysis::vsa_results::VsaResult};
/// Implementation of the [`VsaResult`] trait for providing other analyses with an easy-to-use interface
/// to use the value set and points-to analysis results of the pointer inference.
......@@ -30,4 +30,15 @@ impl<'a> VsaResult for PointerInference<'a> {
.eval_parameter_arg(parameter, &context.project.runtime_memory_image)
.ok()
}
/// Evaluate the value of the given parameter at the given jump instruction.
fn eval_parameter_location_at_call(
&self,
jmp_tid: &Tid,
parameter: &AbstractLocation,
) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
let context = self.computation.get_context().get_context();
Some(state.eval_abstract_location(parameter, &context.project.runtime_memory_image))
}
}
......@@ -44,7 +44,7 @@ pub struct Context<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_start_node_map: HashMap<(Tid, Tid), NodeIndex>,
/// A set containing a given [`Def`](crate::intermediate_representation::Def) as the first `Def` of the block.
/// A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_first_def_set: HashSet<(Tid, Tid)>,
......
//! This module provides the [`VsaResult`] trait
//! which defines an interface for the results of analyses similar to a value set analysis.
use crate::abstract_domain::AbstractLocation;
use crate::intermediate_representation::{Arg, Expression};
use crate::prelude::*;
......@@ -28,6 +29,13 @@ pub trait VsaResult {
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, param: &Arg) -> Option<Self::ValueDomain>;
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_location_at_call(
&self,
jmp_tid: &Tid,
param: &AbstractLocation,
) -> Option<Self::ValueDomain>;
/// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Self::ValueDomain>;
}
......@@ -107,7 +107,7 @@ impl<'a> Context<'a> {
.function_signatures
.get(id.get_tid())
.unwrap()
.get_stack_params_total_size();
.get_stack_params_total_size(&self.project.stack_pointer_register);
replace_if_smaller_bound(
&mut upper_bound,
BoundsMetadata::from_source(
......@@ -135,6 +135,8 @@ impl<'a> Context<'a> {
object_id: &AbstractIdentifier,
current_stack_frame_id: &AbstractIdentifier,
) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
// FIXME: The malloc-tid-to-object-size-map check does not work anymore,
// because we do not use path hints in the PointerInference anymore.
if self
.malloc_tid_to_object_size_map
.contains_key(object_id.get_tid())
......@@ -153,7 +155,7 @@ impl<'a> Context<'a> {
.function_signatures
.get(object_id.get_tid())
.unwrap()
.get_stack_params_total_size();
.get_stack_params_total_size(&self.project.stack_pointer_register);
(None, Some(BoundsMetadata::new(stack_frame_upper_bound)))
} else if object_id.get_tid() == current_stack_frame_id.get_tid()
&& object_id.get_path_hints().is_empty()
......
......@@ -89,6 +89,8 @@ impl<'a> Context<'a> {
/// then the absolute value is used and unknown origins of the size value are ignored.
/// If more than one possible absolute value for the size is found then the minimum value for the size is returned.
pub fn compute_size_of_heap_object(&self, object_id: &AbstractIdentifier) -> BitvectorDomain {
// FIXME: We use path hints, which are not longer provided by the PointerInference, to substitute some values.
// We either have to change that or make sure that we provide the path hints ourselves.
if let Some(object_size) = self.malloc_tid_to_object_size_map.get(object_id.get_tid()) {
let fn_tid_at_malloc_call = self.call_to_caller_fn_map[object_id.get_tid()].clone();
let object_size = self.recursively_substitute_param_values_context_sensitive(
......
......@@ -192,9 +192,10 @@ fn add_param_replacements_for_call(
.get(callee_tid)
{
for param_arg in fn_sig.parameters.keys() {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
if let Some(param_value) =
vsa_results.eval_parameter_location_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
let param_id = AbstractIdentifier::new(call.tid.clone(), param_arg.clone());
replacement_map.insert(param_id, param_value);
}
}
......
......@@ -13,10 +13,13 @@
//!
//! The check uses the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`)
//! to check whether any memory accesses may point outside of the bounds of the corresponding memory objects.
//! For this the results of the Pointer Inference analysis are aggregated interprocedurally.
//! Additionally, the check uses a lightweight intraprocedural dataflow fixpoint computation
//! Additionally, the check uses a lightweight dataflow fixpoint computation
//! to ensure that for each memory object only the first access outside of its bounds is flagged as a CWE.
//!
//! Currently, the check is only partially interprocedural.
//! Bounds of parameter objects can be detected, but bounds of memory objects created in called functions
//! (other than the standard allocation functions) will not be detected.
//!
//! ## False Positives
//!
//! - Any analysis imprecision of the Pointer Inference analysis may lead to false positive results in this check.
......@@ -40,6 +43,20 @@
//! this still may miss buffer overflows occuring in the called function.
//! - Right now the check only considers buffers on the stack or the heap, but not buffers in global memory.
//! Thus corresponding overflows of buffers in global memory are not detected.
//! - Since the check is only partially interprocedural at the moment,
//! it will miss object sizes of objects created in called functions.
//! For example, if allocations are wrapped in simple wrapper functions,
//! the analysis will miss overflows for corresponding objects, because it cannot determine their object sizes.
// FIXME: The current implementation uses path hints for memory object IDs to determine object sizes interprocedurally.
// But the number of path hint combinations can grow exponentially
// with the call depth to the actual allocation size of a callee-created object.
// This led to state explosion in the PointerInference and thus path hints are not longer provided by the PointerInference.
// But without the path hints that this analysis depended on, the check can only resolve sizes of parameter objects,
// but not of objects returned from called functions (other than the standard allocation functions).
// A future implementation needs a better way to determine object sizes interprocedurally,
// probably depending on several fixpoint computations to circumvent the state explosion problems
// that the old implementation is vulnerable to.
use crate::analysis::pointer_inference::Data;
use crate::prelude::*;
......
......@@ -37,7 +37,7 @@ impl State {
};
let stack_upper_bound = std::cmp::max(
stack_upper_bound,
function_sig.get_stack_params_total_size(),
function_sig.get_stack_params_total_size(&project.stack_pointer_register),
);
let object_lower_bounds = BTreeMap::from([(
stack_id.clone(),
......
......@@ -112,7 +112,7 @@ impl<'a> Context<'a> {
if access_pattern.is_dereferenced() {
if let Some(arg_value) = self
.pointer_inference
.eval_parameter_arg_at_call(call_tid, arg)
.eval_parameter_location_at_call(call_tid, arg)
{
if let Some(mut warnings) = state.check_address_for_use_after_free(&arg_value) {
warning_causes.append(&mut warnings);
......
......@@ -300,7 +300,7 @@ pub mod tests {
assert_eq!(processed_warnings.len(), 1);
let processed_cwe = processed_warnings.iter().next().unwrap();
assert_eq!(&processed_cwe.other[0], &[
"Accessed ID object_origin_tid(->call_tid) @ RAX may have been freed before at free_tid.".to_string(),
"Accessed ID object_origin_tid(->call_tid) @ RAX:i64 may have been freed before at free_tid.".to_string(),
"Relevant callgraph TIDs: [root_func_tid, call_tid]".to_string(),
]);
......
......@@ -31,6 +31,11 @@ impl Tid {
}
}
/// Returns true if the ID string ends with the provided suffix.
pub fn has_id_suffix(&self, suffix: &str) -> bool {
self.id.ends_with(suffix)
}
/// Generate the ID of a block starting at the given address.
///
/// Note that the block may not actually exist.
......
......@@ -10,7 +10,7 @@ use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path;
/// Disassemble the given binary and parse it to a [`Project`](crate::intermediate_representation::Project) struct.
/// Disassemble the given binary and parse it to a [`Project`] struct.
///
/// If successful, returns the binary file (as a byte vector), the parsed project struct,
/// and a vector of log messages generated during the process.
......
......@@ -450,6 +450,7 @@ mod tests {
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "mingw32-gcc"); // TODO: Check reason for failure! Probably same as above?
mark_skipped(&mut tests, "x64", "mingw32-gcc"); // We find an additional false positive in unrelated code.
for test_case in tests {
let num_expected_occurences = 1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment