Unverified Commit f6ced95c by Enkelmann Committed by GitHub

Implement tracking of nested parameters (#432)

parent 270b4d4e
use super::AbstractMemoryLocation;
use crate::intermediate_representation::*;
use crate::prelude::*;
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is either a register or a (constant) global address.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidentally describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
/// The location is given by a register.
Register(Variable),
/// The value itself is a constant address to global memory.
/// Note that the `size` is the size of the pointer and not the size
/// of the value residing at the specific address in global memory.
GlobalAddress {
/// The address in global memory.
address: u64,
/// The byte size of the address (not the pointed-to value!).
size: ByteSize,
},
/// The location is in memory.
/// One needs to follow the pointer in the given register
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(Variable, AbstractMemoryLocation),
/// The location is in memory.
/// One needs to follow the pointer located at the given global address
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
GlobalPointer(u64, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(var) => write!(formatter, "{}", var.name)?,
Self::GlobalAddress { address, size: _ } => write!(formatter, "0x{address:x}")?,
Self::Pointer(var, location) => write!(formatter, "{}{}", var.name, location)?,
Self::GlobalPointer(address, location) => write!(formatter, "0x{address:x}{location}")?,
};
write!(formatter, ":i{}", self.bytesize().as_bit_length())
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(variable.clone()))
}
/// Create an abstract location on the stack.
/// The returned location describes the value of the given `size`
/// at the given `offset` relative to the memory location that the `stack_register` is pointing to.
pub fn from_stack_position(
stack_register: &Variable,
offset: i64,
size: ByteSize,
) -> AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Create an abstract location representing an address pointing to global memory.
pub fn from_global_address(address: &Bitvector) -> AbstractLocation {
let size = address.bytesize();
let address = address
.try_to_u64()
.expect("Global address larger than 64 bits encountered.");
AbstractLocation::GlobalAddress { address, size }
}
/// Add an offset to the abstract location.
pub fn with_offset_addendum(self, addendum: i64) -> AbstractLocation {
match self {
Self::Register(_) => panic!("Cannot add an offset to a register abstract location"),
Self::GlobalAddress { address, size } => Self::GlobalAddress {
address: address + (addendum as u64),
size,
},
Self::Pointer(var, mut location) => {
location.add_offset(addendum);
Self::Pointer(var, location)
}
Self::GlobalPointer(address, mut location) => {
location.add_offset(addendum);
Self::GlobalPointer(address, location)
}
}
}
/// Return the abstract location that one gets when dereferencing the pointer that `self` is pointing to.
///
/// Panics if `self` is not pointer-sized.
pub fn dereferenced(
self,
new_size: ByteSize,
generic_pointer_size: ByteSize,
) -> AbstractLocation {
match self {
Self::Register(var) => Self::Pointer(
var,
AbstractMemoryLocation::Location {
offset: 0,
size: new_size,
},
),
Self::GlobalAddress { address, size } => {
assert_eq!(
size, generic_pointer_size,
"Cannot dereference an abstract memory location that is not pointer-sized."
);
Self::GlobalPointer(
address,
AbstractMemoryLocation::Location {
offset: 0,
size: new_size,
},
)
}
Self::GlobalPointer(address, mut location) => {
location.dereference(new_size, generic_pointer_size);
Self::GlobalPointer(address, location)
}
Self::Pointer(var, mut location) => {
location.dereference(new_size, generic_pointer_size);
Self::Pointer(var.clone(), location)
}
}
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::GlobalAddress { size, .. } => *size,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
mem_location.bytesize()
}
}
}
/// Get the recursion depth of the abstract location,
/// i.e. how many times one has to dereference a pointer until reaching the actual location.
pub fn recursion_depth(&self) -> u64 {
match self {
Self::Register(_) => 0,
Self::GlobalAddress { .. } => 1,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
1 + mem_location.recursion_depth()
}
}
}
/// Extend the location string by adding further derefence operations to it according to the given extension.
pub fn extend(&mut self, extension: AbstractMemoryLocation, generic_pointer_size: ByteSize) {
match self {
Self::Pointer(_, location) | Self::GlobalPointer(_, location) => {
location.extend(extension, generic_pointer_size);
}
Self::GlobalAddress { address, size } => {
assert_eq!(*size, generic_pointer_size);
*self = Self::GlobalPointer(*address, extension);
}
Self::Register(var) => {
assert_eq!(var.size, generic_pointer_size);
*self = Self::Pointer(var.clone(), extension);
}
}
}
/// Get the abstract location representing the pointer pointing to the memory object
/// that contains the location represented by `self`
/// together with the offset that one has to add to the pointer to get the location of self.
///
/// Returns an error if the abstract location contains no dereference operation
/// (e.g. if `self` represents a register value).
pub fn get_parent_location(
&self,
generic_pointer_size: ByteSize,
) -> Result<(AbstractLocation, i64), Error> {
match self {
AbstractLocation::GlobalAddress { .. } | AbstractLocation::Register(_) => {
Err(anyhow!("Root location without a parent."))
}
AbstractLocation::GlobalPointer(address, location) => {
match location.get_parent_location(generic_pointer_size) {
Ok((inner_parent_location, innermost_offset)) => Ok((
Self::GlobalPointer(*address, inner_parent_location),
innermost_offset,
)),
Err(innermost_offset) => Ok((
Self::GlobalAddress {
address: *address,
size: generic_pointer_size,
},
innermost_offset,
)),
}
}
AbstractLocation::Pointer(var, location) => {
match location.get_parent_location(generic_pointer_size) {
Ok((inner_parent_location, innermost_offset)) => Ok((
Self::Pointer(var.clone(), inner_parent_location),
innermost_offset,
)),
Err(innermost_offset) => Ok((Self::Register(var.clone()), innermost_offset)),
}
}
}
}
/// Get a list of all (recursive) parent locations.
/// The list is sorted by recursion depth, starting with the root location.
pub fn get_all_parent_locations(
&self,
generic_pointer_size: ByteSize,
) -> Vec<AbstractLocation> {
match self {
AbstractLocation::GlobalAddress { .. } | AbstractLocation::Register(_) => Vec::new(),
AbstractLocation::GlobalPointer(_, _) | AbstractLocation::Pointer(_, _) => {
let (parent, _) = self.get_parent_location(generic_pointer_size).unwrap();
let mut all_parents = parent.get_all_parent_locations(generic_pointer_size);
all_parents.push(parent);
all_parents
}
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::variable;
impl AbstractLocation {
/// Mock an abstract location with a variable as root.
pub fn mock(
root_var: &str,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> AbstractLocation {
let var = variable!(root_var);
match offsets {
[] => {
assert_eq!(var.size, size.into());
AbstractLocation::Register(var)
}
_ => AbstractLocation::Pointer(var, AbstractMemoryLocation::mock(offsets, size)),
}
}
/// Mock an abstract location with a global address as root.
pub fn mock_global(
root_address: u64,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> AbstractLocation {
match offsets {
[] => AbstractLocation::GlobalAddress {
address: root_address,
size: size.into(),
},
_ => AbstractLocation::GlobalPointer(
root_address,
AbstractMemoryLocation::mock(offsets, size),
),
}
}
}
#[test]
fn test_from_variants() {
let loc = AbstractLocation::from_var(&variable!("RAX:8")).unwrap();
assert_eq!(&format!("{loc}"), "RAX:i64");
let loc = AbstractLocation::from_global_address(&Bitvector::from_u64(32));
assert_eq!(
loc,
AbstractLocation::GlobalAddress {
address: 32,
size: ByteSize::new(8)
}
);
let loc = AbstractLocation::from_stack_position(&variable!("RSP:8"), 16, ByteSize::new(8));
assert_eq!(loc, AbstractLocation::mock("RSP:8", &[16], 8));
}
#[test]
fn test_with_offset_addendum() {
let loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4).with_offset_addendum(12);
assert_eq!(loc, AbstractLocation::mock("RAX:8", &[1, 2, 15], 4));
}
#[test]
fn test_dereferenced() {
let loc = AbstractLocation::mock("RAX:8", &[], 8)
.dereferenced(ByteSize::new(4), ByteSize::new(8));
assert_eq!(loc, AbstractLocation::mock("RAX:8", &[0], 4));
}
#[test]
fn test_recursion_depth() {
let loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4);
assert_eq!(loc.recursion_depth(), 3);
}
#[test]
fn test_extend() {
let mut loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4);
let extension = AbstractMemoryLocation::mock(&[4, 5, 6], 1);
loc.extend(extension, ByteSize::new(4));
assert_eq!(loc, AbstractLocation::mock("RAX:8", &[1, 2, 3, 4, 5, 6], 1));
}
#[test]
fn test_get_parent_location() {
let loc = AbstractLocation::mock("RAX:8", &[1], 4);
let (parent, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent, AbstractLocation::mock("RAX:8", &[], 8));
assert_eq!(last_offset, 1);
let loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4);
let (parent, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent, AbstractLocation::mock("RAX:8", &[1, 2], 8));
assert_eq!(last_offset, 3);
}
}
use crate::prelude::*;
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: i64,
/// The size in bytes of the value that the memory location points to.
size: ByteSize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: i64,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
impl AbstractMemoryLocation {
/// Get the abstract memory location representing the pointer pointing to the memory object
/// that contains the location represented by `self`
/// together with the offset that one has to add to the pointer to get the location of self.
///
/// If `self` is a location (and not a pointer), return the offset in the location instead.
pub fn get_parent_location(
&self,
generic_pointer_size: ByteSize,
) -> Result<(AbstractMemoryLocation, i64), i64> {
match self {
Self::Location { offset, .. } => Err(*offset),
Self::Pointer { offset, target } => {
match target.get_parent_location(generic_pointer_size) {
Ok((inner_parent, innermost_offset)) => Ok((
Self::Pointer {
offset: *offset,
target: Box::new(inner_parent),
},
innermost_offset,
)),
Err(inner_offset) => Ok((
Self::Location {
offset: *offset,
size: generic_pointer_size,
},
inner_offset,
)),
}
}
}
}
/// Add an offset to a memory location.
pub fn add_offset(&mut self, addendum: i64) {
match self {
Self::Location { offset, .. } => *offset += addendum,
Self::Pointer { target, .. } => target.add_offset(addendum),
}
}
/// Add an offset to the root location of the memory location.
pub fn add_offset_at_root(&mut self, addendum: i64) {
match self {
Self::Location { offset, .. } | Self::Pointer { offset, .. } => *offset += addendum,
}
}
/// Dereference the pointer that `self` is pointing to.
///
/// Panics if the old value of `self` is not pointer-sized.
pub fn dereference(&mut self, new_size: ByteSize, generic_pointer_size: ByteSize) {
match self {
Self::Pointer { target, .. } => target.dereference(new_size, generic_pointer_size),
Self::Location { offset, size } => {
assert_eq!(
*size, generic_pointer_size,
"Cannot dereference an abstract memory location that is not pointer-sized."
);
*self = Self::Pointer {
offset: *offset,
target: Box::new(Self::Location {
offset: 0,
size: new_size,
}),
}
}
};
}
/// Extend the location string by adding further derefence operations to it according to the given extension.
pub fn extend(&mut self, extension: AbstractMemoryLocation, generic_pointer_size: ByteSize) {
match self {
Self::Location { offset, size } => {
assert_eq!(*size, generic_pointer_size);
*self = Self::Pointer {
offset: *offset,
target: Box::new(extension),
};
}
Self::Pointer { target, .. } => target.extend(extension, generic_pointer_size),
}
}
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
/// Get the recursion depth of the abstract memory location,
/// i.e. how many times one has to dereference a pointer until reaching the actual location.
pub fn recursion_depth(&self) -> u64 {
match self {
Self::Location { .. } => 0,
Self::Pointer { target, .. } => 1 + target.recursion_depth(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "[0x{offset:x}]"),
Self::Pointer { offset, target } => write!(formatter, "[0x{offset:x}]{target}"),
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl AbstractMemoryLocation {
/// Mock a memory location with a given sequence of offsets.
/// The first element in the sequence is the root offset.
pub fn mock(offsets: &[i64], size: impl Into<ByteSize>) -> AbstractMemoryLocation {
match offsets {
[] => panic!(),
[offset] => AbstractMemoryLocation::Location {
offset: *offset,
size: size.into(),
},
[offset, tail @ ..] => AbstractMemoryLocation::Pointer {
offset: *offset,
target: Box::new(AbstractMemoryLocation::mock(tail, size)),
},
}
}
}
#[test]
fn test_mock() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
assert_eq!(&format!("{loc}"), "[0x1][0x2][0x3]");
}
#[test]
fn test_get_parent_location() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
let (parent_loc, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent_loc, AbstractMemoryLocation::mock(&[1, 2], 8));
assert_eq!(last_offset, 3);
let loc = AbstractMemoryLocation::mock(&[1], 4);
assert!(loc.get_parent_location(ByteSize::new(8)).is_err());
}
#[test]
fn test_offset_addendums() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
loc.add_offset(6);
assert_eq!(&loc, &AbstractMemoryLocation::mock(&[1, 2, 9], 4));
loc.add_offset_at_root(-5);
assert_eq!(&loc, &AbstractMemoryLocation::mock(&[-4, 2, 9], 4));
}
#[test]
fn test_dereference() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
loc.dereference(ByteSize::new(8), ByteSize::new(4));
assert_eq!(loc, AbstractMemoryLocation::mock(&[1, 2, 3, 0], 8))
}
#[test]
fn test_extend() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
let extension = AbstractMemoryLocation::mock(&[4, 5, 6], 1);
loc.extend(extension, ByteSize::new(4));
assert_eq!(loc, AbstractMemoryLocation::mock(&[1, 2, 3, 4, 5, 6], 1));
}
#[test]
fn test_recursion_depth() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
assert_eq!(loc.recursion_depth(), 2);
let loc = AbstractMemoryLocation::mock(&[1], 4);
assert_eq!(loc.recursion_depth(), 0);
}
}
......@@ -3,12 +3,17 @@ use crate::prelude::*;
use derive_more::Deref;
use std::sync::Arc;
mod location;
pub use location::AbstractLocation;
mod mem_location;
pub use mem_location::AbstractMemoryLocation;
/// An abstract identifier is used to identify an object or a value in an abstract state.
///
/// Since many program states can be represented by the same abstract state in data-flow analysis,
/// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states.
/// Abstract identifiers achieve this by identifying a *time*, i.e. a specific abstract state,
/// and a *location*, i.e. a recipe for abstracting a concrete value from any concrete state that is represented by the abstract state.
/// and a *location*, i.e. a recipe for computing a concrete value from any concrete state that is represented by the abstract state.
/// The value in question then serves as the identifier.
/// For example, a pointer may uniquely determine the memory object it is pointing to.
/// Or a value may represent the value of a variable at a certain time,
......@@ -20,15 +25,15 @@ use std::sync::Arc;
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited.
///
/// Alternatively one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Alternatively, one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Path hints are given as a possibly empty array of time identifiers.
/// To prevent infinitely long path hints, each time identifier is only allowed to appear at most once in the array.
/// The specific meaning of the path hints depends upon the use case.
///
/// An abstract identifier is given by a time identifier, a location identifier and a path hints array (containing time identifiers).
///
/// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`.
/// For the location identifier see [`AbstractLocation`].
/// The time identifier is given by a [`Tid`].
/// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)]
......@@ -161,136 +166,6 @@ impl std::fmt::Display for AbstractIdentifier {
}
}
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is always a register.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidentally describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
/// The location is given by a register.
Register(Variable),
/// The value itself is a constant address to global memory.
/// Note that the `size` is the size of the pointer and not the size
/// of the value residing at the specific address in global memory.
GlobalAddress {
/// The address in global memory.
address: u64,
/// The byte size of the address (not the pointed-to value!).
size: ByteSize,
},
/// The location is in memory.
/// One needs to follow the pointer in the given register
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(Variable, AbstractMemoryLocation),
/// The location is in memory.
/// One needs to follow the pointer located at the given global address
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
GlobalPointer(u64, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(var) => write!(formatter, "{}", var.name),
Self::GlobalAddress { address, size: _ } => write!(formatter, "0x{address:x}"),
Self::Pointer(var, location) => write!(formatter, "{}->{}", var.name, location),
Self::GlobalPointer(address, location) => {
write!(formatter, "0x{address:x}->{location}")
}
}
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(variable.clone()))
}
/// Create an abstract location on the stack.
/// The returned location describes the value of the given `size`
/// at the given `offset` relative to the memory location that the `stack_register` is pointing to.
pub fn from_stack_position(
stack_register: &Variable,
offset: i64,
size: ByteSize,
) -> AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Create an abstract location representing an address pointing to global memory.
pub fn from_global_address(address: &Bitvector) -> AbstractLocation {
let size = address.bytesize();
let address = address
.try_to_u64()
.expect("Global address larger than 64 bits encountered.");
AbstractLocation::GlobalAddress { address, size }
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::GlobalAddress { size, .. } => *size,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
mem_location.bytesize()
}
}
}
}
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: i64,
/// The size in bytes of the value that the memory location points to.
size: ByteSize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: i64,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
impl AbstractMemoryLocation {
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "({offset})"),
Self::Pointer { offset, target } => write!(formatter, "({offset})->{target}"),
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
......@@ -313,6 +188,20 @@ pub mod tests {
.unwrap(),
)
}
/// Mock an abstract identifier with the given TID name
/// and with a nested abstract location starting at the register given by `var`.
pub fn mock_nested(
tid: impl ToString,
var: &str,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> Self {
AbstractIdentifier::new(
Tid::new(tid.to_string()),
AbstractLocation::mock(var, offsets, size),
)
}
}
#[test]
......
use super::*;
use crate::abstract_domain::{
AbstractDomain, AbstractIdentifier, AbstractLocation, BitvectorDomain, DataDomain, SizedDomain,
TryToBitvec,
AbstractDomain, AbstractIdentifier, AbstractLocation, BitvectorDomain, DataDomain,
RegisterDomain as _, SizedDomain, TryToBitvec,
};
use crate::utils::arguments;
use crate::{
......@@ -8,8 +9,6 @@ use crate::{
intermediate_representation::Project,
};
use super::*;
/// The context struct for the fixpoint algorithm.
pub struct Context<'a> {
graph: &'a Graph<'a>,
......@@ -35,11 +34,9 @@ impl<'a> Context<'a> {
/// Compute the return values of a call and return them (without adding them to the caller state).
///
/// The `callee_state` is the state of the callee at the return site.
/// The return values are expressed in the abstract IDs that are known to the caller.
/// If a return value may contain `Top` values,
/// i.e. values for which the origin is not known or not expressible in the abstract IDs known to the caller,
/// then a call- and register-specific abstract ID is added to the corresponding return value.
/// This ID is not added to the tracked IDs of the caller state.
/// Return values corresponding to callee parameters are expressed in the abstract IDs that are known to the caller.
/// Additionally, each return value also contains one abstract ID specific to the call instruction and return register.
/// This ID is used to track abstract location access patterns to the return value of the call in the caller.
fn compute_return_values_of_call<'cconv>(
&self,
caller_state: &mut State,
......@@ -74,8 +71,9 @@ impl<'a> Context<'a> {
/// Compute the return value for the given register.
///
/// The return value contains the IDs of all possible input IDs of the call that it may reference.
/// If the value may also contain a value not originating from the caller
/// then replace it with a call- and register-specific abstract ID.
/// Additionally, it also contains a call- and register-specific abstract ID,
/// which can be used to track the access patterns of the return value
/// independently of whether the return value only references caller values or not.
fn compute_return_register_value_of_call(
&self,
caller_state: &mut State,
......@@ -86,20 +84,18 @@ impl<'a> Context<'a> {
let callee_value = callee_state.get_register(return_register);
let mut return_value: DataDomain<BitvectorDomain> =
DataDomain::new_empty(return_register.size);
// For absolute or Top-values originating in the callee the Top-flag of the return value is set.
if callee_value.contains_top() || callee_value.get_absolute_value().is_some() {
return_value.set_contains_top_flag();
}
// For every relative value in the callee we check whether it is relative a parameter to the callee.
// If yes, we can compute it relative to the value of the parameter at the callsite and add the result to the return value.
// Else we just set the Top-flag of the return value to indicate some value originating in the callee.
for (callee_id, callee_offset) in callee_value.get_relative_values() {
if callee_id.get_tid() == callee_state.get_current_function_tid()
&& matches!(
callee_id.get_location(),
AbstractLocation::GlobalAddress { .. }
)
{
for (callee_id, callee_offset) in callee_value
.get_relative_values()
.iter()
.filter(|(callee_id, _)| callee_id.get_tid() == callee_state.get_current_function_tid())
{
if matches!(
callee_id.get_location(),
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _)
) {
// Globals get the same ID as if the global pointer originated in the caller.
let caller_global_id = AbstractIdentifier::new(
caller_state.get_current_function_tid().clone(),
......@@ -109,13 +105,13 @@ impl<'a> Context<'a> {
let caller_global =
DataDomain::from_target(caller_global_id, callee_offset.clone());
return_value = return_value.merge(&caller_global);
} else if let Some(param_arg) = callee_state.get_arg_corresponding_to_id(callee_id) {
let param_value = caller_state.eval_parameter_arg(&param_arg);
} else {
let param_value = caller_state.eval_param_location(
callee_id.get_location(),
&self.project.runtime_memory_image,
);
let param_value = caller_state
.substitute_global_mem_address(param_value, &self.project.runtime_memory_image);
if param_value.contains_top() || param_value.get_absolute_value().is_some() {
return_value.set_contains_top_flag()
}
for (param_id, param_offset) in param_value.get_relative_values() {
let value = DataDomain::from_target(
param_id.clone(),
......@@ -123,19 +119,14 @@ impl<'a> Context<'a> {
);
return_value = return_value.merge(&value);
}
} else {
return_value.set_contains_top_flag();
}
}
// If the Top-flag of the return value was set we replace it with an ID representing the return register
// to indicate where the unknown value originated from.
if return_value.contains_top() {
let id = AbstractIdentifier::from_var(call.tid.clone(), return_register);
let value =
DataDomain::from_target(id, Bitvector::zero(return_register.size.into()).into());
return_value = return_value.merge(&value);
return_value.unset_contains_top_flag();
}
// Also add an ID representing the return register (regardless of what was added before).
// This ID is used to track abstract location access patterns in relation to the return value.
let id = AbstractIdentifier::from_var(call.tid.clone(), return_register);
let value =
DataDomain::from_target(id, Bitvector::zero(return_register.size.into()).into());
return_value = return_value.merge(&value);
return_value
}
......@@ -314,6 +305,34 @@ impl<'a> Context<'a> {
}
None
}
/// Adjust the stack register after a call to a function.
///
/// On x86, this removes the return address from the stack
/// (other architectures pass the return address in a register, not on the stack).
/// On other architectures the stack register retains the value it had before the call.
/// Note that in some calling conventions the callee also clears function parameters from the stack.
/// We do not detect and handle these cases yet.
fn adjust_stack_register_on_return_from_call(
&self,
state_before_call: &State,
new_state: &mut State,
) {
let stack_register = &self.project.stack_pointer_register;
let stack_pointer = state_before_call.get_register(stack_register);
match self.project.cpu_architecture.as_str() {
"x86" | "x86_32" | "x86_64" => {
let offset = Bitvector::from_u64(stack_register.size.into())
.into_truncate(apint::BitWidth::from(stack_register.size))
.unwrap();
new_state.set_register(
stack_register,
stack_pointer.bin_op(BinOpType::IntAdd, &offset.into()),
);
}
_ => new_state.set_register(stack_register, stack_pointer),
}
}
}
impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
......@@ -339,7 +358,8 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
new_state.set_register(var, value);
}
Def::Load { var, address } => {
new_state.set_deref_flag_for_input_ids_of_expression(address);
new_state.set_deref_flag_for_pointer_inputs_of_expression(address);
new_state.set_read_flag_for_input_ids_of_expression(address);
let address = new_state.substitute_global_mem_address(
state.eval(address),
&self.project.runtime_memory_image,
......@@ -352,10 +372,13 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
);
let value = new_state
.substitute_global_mem_address(value, &self.project.runtime_memory_image);
new_state.track_contained_ids(&value);
new_state.set_read_flag_for_contained_ids(&value);
new_state.set_register(var, value);
}
Def::Store { address, value } => {
new_state.set_mutable_deref_flag_for_input_ids_of_expression(address);
new_state.set_mutable_deref_flag_for_pointer_inputs_of_expression(address);
new_state.set_read_flag_for_input_ids_of_expression(address);
let address = new_state.substitute_global_mem_address(
state.eval(address),
&self.project.runtime_memory_image,
......@@ -420,6 +443,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
cconv,
&self.project.runtime_memory_image,
);
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
return Some(new_state);
}
}
......@@ -427,6 +451,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) {
self.handle_extern_symbol_call(&mut new_state, extern_symbol, &call.tid);
if !extern_symbol.no_return {
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
return Some(new_state);
}
} else if let Some(cconv) = self.project.get_standard_calling_convention() {
......@@ -435,6 +460,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
cconv,
&self.project.runtime_memory_image,
);
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
return Some(new_state);
}
}
......@@ -462,9 +488,9 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
Some(cconv) => cconv,
None => return None,
};
let old_state = state_before_call.unwrap();
let state_before_call = state_before_call.unwrap();
let callee_state = state.unwrap();
let mut new_state = old_state.clone();
let mut new_state = state_before_call.clone();
// Merge parameter access patterns with the access patterns from the callee.
let parameters = callee_state.get_params_of_current_function();
new_state.merge_parameter_access(&parameters, &self.project.runtime_memory_image);
......@@ -480,8 +506,11 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
new_state.clear_non_callee_saved_register(&calling_convention.callee_saved_register);
// Now we can insert the return values into the state
for (var, value) in return_value_list {
// The return values may contain new IDs that have to be tracked.
new_state.track_contained_ids(&value);
new_state.set_register(var, value);
}
self.adjust_stack_register_on_return_from_call(state_before_call, &mut new_state);
Some(new_state)
}
......
use super::*;
use crate::{bitvec, variable};
use crate::{analysis::forward_interprocedural_fixpoint::Context as _, bitvec, def, variable};
#[test]
fn test_compute_return_values_of_call() {
......@@ -25,21 +25,27 @@ fn test_compute_return_values_of_call() {
&call,
);
let expected_val = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("call_tid"), &variable!("RAX:8")),
AbstractIdentifier::mock("call_tid", "RAX", 8),
bitvec!("0x0:8").into(),
);
assert_eq!(return_values.iter().len(), 3);
assert_eq!(return_values[0], (&variable!("RAX:8"), expected_val));
// Test returning a known value.
let param_ref = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("callee"), &variable!("RDI:8")),
AbstractIdentifier::mock("callee", "RDI", 8),
bitvec!("0x0:8").into(),
);
callee_state.set_register(&variable!("RAX:8"), param_ref);
let expected_val = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("caller"), &variable!("RDI:8")),
bitvec!("0x0:8").into(),
);
let expected_val = DataDomain::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("caller", "RDI", 8),
bitvec!("0x0:8").into(),
),
(
AbstractIdentifier::mock("call_tid", "RAX", 8),
bitvec!("0x0:8").into(),
),
]));
let return_values = context.compute_return_values_of_call(
&mut caller_state,
&callee_state,
......@@ -69,7 +75,7 @@ fn test_call_stub_handling() {
assert_eq!(
state.get_params_of_current_function(),
vec![(
Arg::from_var(variable!("r0:4"), None),
&AbstractLocation::from_var(&variable!("r0:4")).unwrap(),
AccessPattern::new().with_read_flag()
)]
);
......@@ -97,14 +103,14 @@ fn test_call_stub_handling() {
assert_eq!(
params[0],
(
Arg::from_var(variable!("r0:4"), None),
&AbstractLocation::from_var(&variable!("r0:4")).unwrap(),
AccessPattern::new_unknown_access()
)
);
assert_eq!(
params[1],
(
Arg::from_var(variable!("r2:4"), None),
&AbstractLocation::from_var(&variable!("r2:4")).unwrap(),
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
......@@ -114,6 +120,51 @@ fn test_call_stub_handling() {
}
#[test]
fn test_stack_register_adjustment_after_call() {
let project = Project::mock_x64();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let mut state_before_call = State::mock_x64("mock_fn");
let stack_id = AbstractIdentifier::mock("mock_fn", "RSP", 8);
state_before_call.set_register(
&variable!("RSP:8"),
DataDomain::from_target(stack_id.clone(), bitvec!("0x-20:8").into()),
);
let call_term = Term {
tid: Tid::new("call_tid"),
term: Jmp::CallInd {
target: Expression::Var(variable!("R15:8")),
return_: Some(Tid::new("return_")),
},
};
// Test adjustment on extern calls
let state_after_call = context
.update_call_stub(&state_before_call, &call_term)
.unwrap();
let adjusted_sp = state_after_call.get_register(&variable!("RSP:8"));
assert_eq!(
adjusted_sp,
DataDomain::from_target(stack_id.clone(), bitvec!("0x-18:8").into())
);
// Test adjustment on intern calls
let state_before_return = State::mock_x64("callee");
let state_after_call = context
.update_return(
Some(&state_before_return),
Some(&state_before_call),
&call_term,
&call_term,
&None,
)
.unwrap();
let adjusted_sp = state_after_call.get_register(&variable!("RSP:8"));
assert_eq!(
adjusted_sp,
DataDomain::from_target(stack_id.clone(), bitvec!("0x-18:8").into())
);
}
#[test]
fn test_get_global_mem_address() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
......@@ -135,3 +186,82 @@ fn test_get_global_mem_address() {
let result = context.get_global_mem_address(&value);
assert!(result.is_none());
}
#[test]
fn test_generation_of_nested_ids_and_access_patterns_on_load_and_store() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let state = State::mock_arm32();
// Load from a tracked pointer value
let def = def!["load_instr: r0:4 := Load from r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let loaded_value = new_state.get_register(&variable!("r0:4"));
assert_eq!(
loaded_value,
DataDomain::from_target(
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r1:4", &[16], 4)
),
bitvec!("0x0:4").into()
)
);
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 1);
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
)));
// Load from an untracked register value
let def = def!["load_instr: r0:4 := Load from r8:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let loaded_value = new_state.get_register(&variable!("r0:4"));
assert!(loaded_value.is_top());
assert_eq!(new_state.get_params_of_current_function(), []);
// Store a tracked pointer value
let def = def!["store_instr: Store at r0:4 := r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 2);
assert!(params.contains(&(
&AbstractLocation::mock("r0:4", &[], 4),
AccessPattern::new()
.with_read_flag()
.with_mutably_dereferenced_flag()
)));
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new().with_read_flag()
)));
// Store to an untracked register value
let def = def!["store_instr: Store at r8:4 := r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 1);
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new().with_read_flag()
)));
}
#[test]
fn test_stack_param_loaded_but_not_accessed() {
// Regression test for the case that a stack parameter is loaded into a register but then not directly accessed.
// In such a case the stack parameter must still be proactively marked as read,
// because its later usage might simply be missed by the analysis
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let state = State::mock_arm32();
let def = def!["r0:4 := Load from sp:4"];
let new_state = context.update_def(&state, &def).unwrap();
let fn_sig = new_state.get_params_of_current_function();
assert!(fn_sig.contains(&(
&AbstractLocation::mock("sp:4", &[0], 4),
AccessPattern::new().with_read_flag()
)));
}
......@@ -4,6 +4,7 @@
use super::AccessPattern;
use super::FunctionSignature;
use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::AbstractLocation;
use crate::abstract_domain::DomainMap;
use crate::abstract_domain::UnionMergeStrategy;
use crate::analysis::callgraph::get_program_callgraph;
......@@ -12,6 +13,7 @@ use crate::analysis::fixpoint::{Computation, Context};
use crate::intermediate_representation::*;
use crate::utils::log::LogMessage;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
/// The context object for propagating known global variables top-down in the call graph.
......@@ -31,7 +33,7 @@ impl<'a> Context for KnownGlobalsContext<'a> {
type EdgeLabel = &'a Term<Jmp>;
type NodeLabel = Tid;
/// The values at nodes are the sets of known addresses of global variables for that function.
type NodeValue = HashSet<u64>;
type NodeValue = BTreeSet<AbstractLocation>;
/// Get the call graph corresponding to the context object.
fn get_graph(&self) -> &CallGraph<'a> {
......@@ -39,10 +41,14 @@ impl<'a> Context for KnownGlobalsContext<'a> {
}
/// The merge function returns the union of the two input sets of global addresses.
fn merge(&self, set1: &HashSet<u64>, set2: &HashSet<u64>) -> HashSet<u64> {
fn merge(
&self,
set1: &BTreeSet<AbstractLocation>,
set2: &BTreeSet<AbstractLocation>,
) -> BTreeSet<AbstractLocation> {
let mut result = set1.clone();
for address in set2 {
result.insert(*address);
result.insert(address.clone());
}
result
}
......@@ -50,9 +56,9 @@ impl<'a> Context for KnownGlobalsContext<'a> {
/// We always propagate all known addresses of global variables along the edges of the call graph.
fn update_edge(
&self,
globals: &HashSet<u64>,
globals: &BTreeSet<AbstractLocation>,
_edge: petgraph::stable_graph::EdgeIndex,
) -> Option<HashSet<u64>> {
) -> Option<BTreeSet<AbstractLocation>> {
Some(globals.clone())
}
}
......@@ -66,7 +72,7 @@ impl<'a> Context for KnownGlobalsContext<'a> {
fn propagate_known_globals_top_down(
project: &Project,
fn_sigs: &BTreeMap<Tid, FunctionSignature>,
) -> BTreeMap<Tid, HashSet<u64>> {
) -> BTreeMap<Tid, BTreeSet<AbstractLocation>> {
let graph = get_program_callgraph(&project.program);
let context = KnownGlobalsContext::new(&graph);
let mut computation = Computation::new(context, None);
......@@ -96,12 +102,15 @@ struct GlobalsPropagationContext<'a> {
/// The reversed (!) call graph of the program.
graph: &'a CallGraph<'a>,
/// A map from TIDs of functions to the set of known addresses of global variables for that function.
known_globals: &'a BTreeMap<Tid, HashSet<u64>>,
known_globals: &'a BTreeMap<Tid, BTreeSet<AbstractLocation>>,
}
impl<'a> GlobalsPropagationContext<'a> {
/// Create a new [`GlobalsPropagationContext`] object.
fn new(graph: &'a CallGraph<'a>, known_globals: &'a BTreeMap<Tid, HashSet<u64>>) -> Self {
fn new(
graph: &'a CallGraph<'a>,
known_globals: &'a BTreeMap<Tid, BTreeSet<AbstractLocation>>,
) -> Self {
GlobalsPropagationContext {
graph,
known_globals,
......@@ -113,9 +122,9 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
type EdgeLabel = &'a Term<Jmp>;
type NodeLabel = Tid;
/// The node values for the fixpoint comutation
/// are maps from addresses of global variables known to the function represented by the node
/// are maps from locations of (possibly nested) global variables known to the function represented by the node
/// to the corresponding access pattern of the global variable.
type NodeValue = DomainMap<u64, AccessPattern, UnionMergeStrategy>;
type NodeValue = DomainMap<AbstractLocation, AccessPattern, UnionMergeStrategy>;
/// Get the (reversed!) call graph corresponding to the program
fn get_graph(&self) -> &CallGraph<'a> {
......@@ -144,7 +153,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
.iter()
.filter_map(|(address, access_pattern)| {
if caller_known_globals.contains(address) {
Some((*address, *access_pattern))
Some((address.clone(), *access_pattern))
} else {
None
}
......@@ -161,7 +170,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
/// that are known to the caller anyway (i.e. some function upwards in the call graph accesses the global variable).
fn propagate_globals_bottom_up(
project: &Project,
known_globals: &BTreeMap<Tid, HashSet<u64>>,
known_globals: &BTreeMap<Tid, BTreeSet<AbstractLocation>>,
fn_sigs: &mut BTreeMap<Tid, FunctionSignature>,
logs: &mut Vec<LogMessage>,
) {
......@@ -178,7 +187,7 @@ fn propagate_globals_bottom_up(
let globals = fn_sig
.global_parameters
.iter()
.map(|(address, access_pattern)| (*address, *access_pattern))
.map(|(address, access_pattern)| (address.clone(), *access_pattern))
.collect();
computation.set_node_value(node, globals);
}
......@@ -198,7 +207,7 @@ fn propagate_globals_bottom_up(
let fn_globals = &mut fn_sigs.get_mut(fn_tid).unwrap().global_parameters;
for (address, propagated_access_pattern) in propagated_globals.iter() {
fn_globals
.entry(*address)
.entry(address.clone())
.and_modify(|access_pattern| {
*access_pattern = access_pattern.merge(propagated_access_pattern);
})
......@@ -207,6 +216,48 @@ fn propagate_globals_bottom_up(
}
}
/// For all nested global parameters add the corresponding parent locations to the function signatures.
///
/// This ensures that subsequent analyses can safely assume
/// that for each nested parameter the parent location is also a parameter.
fn add_parents_of_known_nested_globals(
fn_sigs: &mut BTreeMap<Tid, FunctionSignature>,
generic_pointer_size: ByteSize,
) {
for fn_sig in fn_sigs.values_mut() {
let mut parents_to_add = HashSet::new();
for global in fn_sig.global_parameters.keys() {
parents_to_add.extend(get_parents_of_global(global, generic_pointer_size).into_iter());
}
for parent in parents_to_add {
fn_sig
.global_parameters
.entry(parent)
.and_modify(|pattern| pattern.set_dereference_flag())
.or_insert(
AccessPattern::new()
.with_read_flag()
.with_dereference_flag(),
);
}
}
}
/// get all parent locations for the given potentially nested global location.
fn get_parents_of_global(
location: &AbstractLocation,
generic_pointer_size: ByteSize,
) -> Vec<AbstractLocation> {
if let AbstractLocation::GlobalPointer(_, _) = location {
let (parent, _offset) = location.get_parent_location(generic_pointer_size).unwrap();
let mut parents = get_parents_of_global(&parent, generic_pointer_size);
parents.push(parent);
parents
} else {
Vec::new()
}
}
/// Propagate the access patterns of global variables along the edges of the call graph of the given project.
///
/// The propagation works as follows:
......@@ -230,14 +281,22 @@ pub fn propagate_globals(
) {
let known_globals = propagate_known_globals_top_down(project, fn_sigs);
propagate_globals_bottom_up(project, &known_globals, fn_sigs, logs);
// Also add parent locations of propagated globals to the function signatures
add_parents_of_known_nested_globals(fn_sigs, project.get_pointer_bytesize());
}
#[cfg(test)]
pub mod tests {
use std::collections::HashMap;
use super::*;
/// Mock the abstract location of a global parameter.
fn mock_global(address: u64) -> AbstractLocation {
AbstractLocation::GlobalAddress {
address: address,
size: ByteSize::new(4),
}
}
#[test]
fn test_globals_propagation() {
let mut project = Project::mock_arm32();
......@@ -265,15 +324,16 @@ pub mod tests {
let mut sig_main = FunctionSignature::new();
sig_main
.global_parameters
.insert(1000, AccessPattern::new().with_read_flag());
.insert(mock_global(1000), AccessPattern::new().with_read_flag());
let mut sig_callee1 = FunctionSignature::new();
sig_callee1
.global_parameters
.insert(2000, AccessPattern::new().with_dereference_flag());
sig_callee1.global_parameters.insert(
mock_global(2000),
AccessPattern::new().with_dereference_flag(),
);
let mut sig_callee2 = FunctionSignature::new();
sig_callee2
.global_parameters
.insert(1000, AccessPattern::new_unknown_access());
.insert(mock_global(1000), AccessPattern::new_unknown_access());
let mut fn_sigs = BTreeMap::from([
(Tid::new("main"), sig_main),
(Tid::new("callee1"), sig_callee1),
......@@ -285,18 +345,53 @@ pub mod tests {
// Check propagation results
assert_eq!(
&fn_sigs[&Tid::new("main")].global_parameters,
&HashMap::from([(1000, AccessPattern::new_unknown_access())])
&BTreeMap::from([(mock_global(1000), AccessPattern::new_unknown_access())])
);
assert_eq!(
&fn_sigs[&Tid::new("callee1")].global_parameters,
&HashMap::from([
(1000, AccessPattern::new_unknown_access()),
(2000, AccessPattern::new().with_dereference_flag())
&BTreeMap::from([
(mock_global(1000), AccessPattern::new_unknown_access()),
(
mock_global(2000),
AccessPattern::new().with_dereference_flag()
)
])
);
assert_eq!(
&fn_sigs[&Tid::new("callee2")].global_parameters,
&HashMap::from([(1000, AccessPattern::new_unknown_access())])
&BTreeMap::from([(mock_global(1000), AccessPattern::new_unknown_access())])
);
}
#[test]
fn test_add_parent_locations() {
// The case of a known nested global parameter without knowing the parent locations happens
// when a callee returns a nested global in a return register.
let location = AbstractLocation::mock_global(0x2000, &[8, 16], 8);
let globals = BTreeMap::from([(location, AccessPattern::new_unknown_access())]);
let fn_sig = FunctionSignature {
parameters: BTreeMap::new(),
global_parameters: globals,
};
let mut fn_sigs = BTreeMap::from([(Tid::new("func"), fn_sig)]);
add_parents_of_known_nested_globals(&mut fn_sigs, ByteSize::new(8));
let fn_sig = &fn_sigs[&Tid::new("func")];
let deref_pattern = AccessPattern::new()
.with_read_flag()
.with_dereference_flag();
assert_eq!(
fn_sig.global_parameters,
BTreeMap::from([
(
AbstractLocation::mock_global(0x2000, &[8, 16], 8),
AccessPattern::new_unknown_access()
),
(
AbstractLocation::mock_global(0x2000, &[8], 8),
deref_pattern
),
(AbstractLocation::mock_global(0x2000, &[], 8), deref_pattern),
])
);
}
}
......@@ -6,6 +6,11 @@
//! (is the value read, dereferenced for read access or dereferenced for write access).
//! Accesses to constant addresses that may correspond to global variables are also tracked.
//!
//! For values that are not directly tracked,
//! the algorithm tracks the abstract location that describes how the pointer to that value was computed.
//! This enables tracking of nested parameter objects
//! without actually tracking the memory objects where these objects are located.
//!
//! Known limitations of the analysis:
//! * The analysis is an overapproximation in the sense that it may generate more input parameters
//! than actually exist in some cases.
......@@ -17,16 +22,19 @@
//! * Parameters that are used as input values for variadic functions may be missed.
//! Some variadic functions are stubbed, i.e. parameter recognition should work for these.
//! But not all variadic functions are stubbed.
//! * If only a part (e.g. a single byte) of a stack parameter is accessed instead of the whole parameter
//! then a duplicate stack parameter may be generated.
//! A proper sanitation for this case is not yet implemented,
//! although error messages are generated if such a case is detected.
//! * For floating point parameter registers the base register is detected as a parameter,
//! although only a smaller sub-register is the actual parameter in many cases.
//! Also, if a function uses sub-registers of floating point registers as local variables,
//! the registers may be incorrectly flagged as input parameters.
//! * Tracking of nested parameters via their abstract locations is an unsound, heuristic approach,
//! as the analysis does not keep track of when such nested pointers might get overwritten.
//! Nevertheless, it should result in an overapproximation of parameters and their access patterns in most cases.
//! * The nesting depth for tracked nested parameters is limited
//! to avoid generating infinitely many parameters for recursive types like linked lists.
use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::AbstractLocation;
use crate::abstract_domain::AbstractMemoryLocation;
use crate::analysis::fixpoint::Computation;
use crate::analysis::forward_interprocedural_fixpoint::create_computation;
use crate::analysis::forward_interprocedural_fixpoint::GeneralizedContext;
......@@ -36,12 +44,10 @@ use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::LogMessage;
use std::collections::BTreeMap;
use std::collections::HashMap;
mod context;
use context::*;
mod state;
use itertools::Itertools;
use state::State;
mod access_pattern;
pub use access_pattern::AccessPattern;
......@@ -49,6 +55,11 @@ mod global_var_propagation;
use global_var_propagation::propagate_globals;
pub mod stubs;
/// The recursion depth limit for abstract locations to be tracked by the function signature analysis,
/// i.e. how many dereference operations an abstract location is allowed to contain
/// before the analysis stops tracking the location.
const POINTER_RECURSION_DEPTH_LIMIT: u64 = 2;
/// Generate the computation object for the fixpoint computation
/// and set the node values for all function entry nodes.
fn generate_fixpoint_computation<'a>(
......@@ -147,7 +158,7 @@ pub fn compute_function_signatures<'a>(
// Sanitize the parameters
let mut logs = Vec::new();
for (fn_tid, fn_sig) in fn_sig_map.iter_mut() {
let (info_log, debug_log) = fn_sig.sanitize(project);
let info_log = fn_sig.sanitize(project);
for log in info_log {
logs.push(
LogMessage::new_info(log)
......@@ -155,13 +166,6 @@ pub fn compute_function_signatures<'a>(
.source("Function Signature Analysis"),
)
}
for log in debug_log {
logs.push(
LogMessage::new_debug(log)
.location(fn_tid.clone())
.source("Function Signature Analysis"),
)
}
}
// Propagate globals in bottom-up direction in the call graph
propagate_globals(project, &mut fn_sig_map, &mut logs);
......@@ -174,30 +178,42 @@ pub fn compute_function_signatures<'a>(
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct FunctionSignature {
/// The parameters of the function together with their access patterns.
pub parameters: HashMap<Arg, AccessPattern>,
pub parameters: BTreeMap<AbstractLocation, AccessPattern>,
/// Values in writeable global memory accessed by the function.
/// Does not contain indirectly accessed values, e.g. values accessed by callees of this function.
pub global_parameters: HashMap<u64, AccessPattern>,
pub global_parameters: BTreeMap<AbstractLocation, AccessPattern>,
}
impl FunctionSignature {
/// Generate an empty function signature.
pub fn new() -> Self {
Self {
parameters: HashMap::new(),
global_parameters: HashMap::new(),
parameters: BTreeMap::new(),
global_parameters: BTreeMap::new(),
}
}
/// The returned number is the maximum of stack offset plus parameter size
/// taken over all stack parameters in the function signature.
pub fn get_stack_params_total_size(&self) -> i64 {
pub fn get_stack_params_total_size(&self, stack_register: &Variable) -> i64 {
let mut stack_params_total_size: i64 = 0;
for param in self.parameters.keys() {
if let Ok(param_offset) = param.eval_stack_offset() {
let param_upper_bound =
param_offset.try_to_i64().unwrap() + (u64::from(param.bytesize()) as i64);
stack_params_total_size = std::cmp::max(stack_params_total_size, param_upper_bound);
if let AbstractLocation::Pointer(var, mem_location) = param {
if var == stack_register {
match mem_location {
AbstractMemoryLocation::Location { offset, size } => {
stack_params_total_size = std::cmp::max(
stack_params_total_size,
offset + (u64::from(*size) as i64),
);
}
AbstractMemoryLocation::Pointer { offset, target: _ } => {
stack_params_total_size = std::cmp::max(
stack_params_total_size,
offset + (u64::from(stack_register.size) as i64),
);
}
}
}
}
}
stack_params_total_size
......@@ -206,21 +222,21 @@ impl FunctionSignature {
/// Merge the parameter list and the global parameter list of `self` with the given lists.
fn merge_parameter_lists(
&mut self,
params: &[(Arg, AccessPattern)],
global_params: &[(u64, AccessPattern)],
params: &[(&AbstractLocation, AccessPattern)],
global_params: &[(&AbstractLocation, AccessPattern)],
) {
for (arg, sig_new) in params {
if let Some(sig_self) = self.parameters.get_mut(arg) {
*sig_self = sig_self.merge(sig_new);
} else {
self.parameters.insert(arg.clone(), *sig_new);
self.parameters.insert((*arg).clone(), *sig_new);
}
}
for (address, sig_new) in global_params {
if let Some(sig_self) = self.global_parameters.get_mut(address) {
*sig_self = sig_self.merge(sig_new);
} else {
self.global_parameters.insert(*address, *sig_new);
self.global_parameters.insert((*address).clone(), *sig_new);
}
}
}
......@@ -239,172 +255,161 @@ impl FunctionSignature {
/// This may indicate an error in the analysis
/// as no proper sanitation pass is implemented for such cases yet.
/// * Merge intersecting stack parameters
fn sanitize(&mut self, project: &Project) -> (Vec<String>, Vec<String>) {
fn sanitize(&mut self, project: &Project) -> Vec<String> {
match project.cpu_architecture.as_str() {
"x86" | "x86_32" | "x86_64" => {
let return_addr_expr = Expression::Var(project.stack_pointer_register.clone());
let return_addr_arg = Arg::Stack {
address: return_addr_expr,
size: project.stack_pointer_register.size,
data_type: None,
};
self.parameters.remove(&return_addr_arg);
let return_addr_location = AbstractLocation::from_stack_position(
&project.stack_pointer_register,
0,
project.get_pointer_bytesize(),
);
self.parameters.remove(&return_addr_location);
}
_ => (),
}
let debug_messages = self.merge_intersecting_stack_parameters();
let info_messages = self.check_for_unaligned_stack_params(&project.stack_pointer_register);
(info_messages, debug_messages)
// FIXME: We check for intersecting stack parameter register, but not for intersecting nested parameters.
// We should add a check for these to generate log messages (but probably without trying to merge such parameters)
self.merge_intersecting_stack_parameters(&project.stack_pointer_register);
self.check_for_unaligned_stack_params(&project.stack_pointer_register)
}
/// Return a log message for every unaligned stack parameter
/// or a stack parameter of different size than the generic pointer size is found.
fn check_for_unaligned_stack_params(&self, stack_register: &Variable) -> Vec<String> {
let mut log_messages: Vec<String> = vec![];
for arg in self.parameters.keys() {
if let Arg::Stack { size, .. } = arg {
if *size != stack_register.size {
for param in self.parameters.keys() {
if let Some(offset) = get_offset_if_simple_stack_param(param, stack_register) {
if param.bytesize() != stack_register.size {
log_messages.push("Unexpected stack parameter size".into());
}
if let Ok(offset) = arg.eval_stack_offset() {
if offset.try_to_u64().unwrap_or(0) % u64::from(stack_register.size) != 0 {
log_messages.push("Unexpected stack parameter alignment".into());
}
if offset % u64::from(stack_register.size) as i64 != 0 {
log_messages.push("Unexpected stack parameter alignment".into());
}
}
}
log_messages
}
/// Merges two intersecting stack parameters by joining them into one stack parameter.
/// Merges intersecting stack parameters by joining them into one stack parameter.
///
/// Two [Arg](crate::intermediate_representation::Arg) are merged if *all* of the following applies:
/// * parameters return `Ok` on `Arg::eval_stack_offset()`
/// * parameters intersect
fn merge_intersecting_stack_parameters(&mut self) -> Vec<String> {
let mut stack_parms = self
/// Only non-nested stack parameters are joined by this function.
fn merge_intersecting_stack_parameters(&mut self, stack_register: &Variable) {
let stack_params: BTreeMap<(i64, ByteSize), (AbstractLocation, AccessPattern)> = self
.parameters
.clone()
.into_iter()
.filter(|x| x.0.eval_stack_offset().is_ok())
.sorted_by(|a, b| {
match a
.0
.eval_stack_offset()
.unwrap()
.checked_sgt(&b.0.eval_stack_offset().unwrap())
.unwrap()
{
true => std::cmp::Ordering::Greater,
false => std::cmp::Ordering::Less,
}
.iter()
.filter_map(|(location, access_pattern)| {
get_offset_if_simple_stack_param(location, stack_register).map(|offset| {
(
(offset, location.bytesize()),
(location.clone(), *access_pattern),
)
})
})
.collect_vec();
let mut logs = vec![];
if !stack_parms.is_empty() {
let mut i = 0;
while i < stack_parms.len() - 1 {
if let Ok((merged_arg, log)) =
get_bounds_intersecting_stack_arg(&stack_parms[i].0, &stack_parms[i + 1].0)
{
self.parameters.remove(&stack_parms[i].0);
self.parameters.remove(&stack_parms[i + 1].0);
self.parameters.insert(
merged_arg.clone(),
stack_parms[i].1.merge(&stack_parms[i + 1].1),
);
.collect();
stack_parms.insert(
i,
(merged_arg, stack_parms[i].1.merge(&stack_parms[i + 1].1)),
let mut current_param: Option<(i64, i64, AccessPattern)> = None;
for ((offset, _), (param, access_pattern)) in stack_params.into_iter() {
self.parameters.remove(&param);
if let Some((cur_offset, cur_size, cur_access_pattern)) = current_param {
if offset < cur_offset + cur_size {
let merged_size = std::cmp::max(
cur_size,
offset - cur_offset + u64::from(param.bytesize()) as i64,
);
stack_parms.remove(i + 1);
stack_parms.remove(i + 1);
logs.extend(log);
let merged_access_pattern = cur_access_pattern.merge(&access_pattern);
current_param = Some((cur_offset, merged_size, merged_access_pattern));
} else {
i += 1;
self.parameters.insert(
generate_simple_stack_param(
cur_offset,
ByteSize::new(cur_size as u64),
stack_register,
),
cur_access_pattern,
);
current_param =
Some((offset, u64::from(param.bytesize()) as i64, access_pattern));
}
} else {
current_param = Some((offset, u64::from(param.bytesize()) as i64, access_pattern));
}
}
logs
if let Some((cur_offset, cur_size, cur_access_pattern)) = current_param {
self.parameters.insert(
generate_simple_stack_param(
cur_offset,
ByteSize::new(cur_size as u64),
stack_register,
),
cur_access_pattern,
);
}
}
}
/// Merges two stack parameters and returns the merged [Arg](crate::intermediate_representation::Arg).
/// Also returns a message, if one argument is not a subset of the other one.
///
/// Assumes the provided `Arg` are ordered by equal or increasing stack offset.
///
/// Returns `Err` if `first_arg` or `second_arg`:
/// * are not `Arg::Stack` types
/// * return `Err` on `Arg::eval_stack_offset()`
/// * do not intersect
fn get_bounds_intersecting_stack_arg(
first_arg: &Arg,
second_arg: &Arg,
) -> Result<(Arg, Vec<String>), Error> {
if let (
Arg::Stack {
data_type: _,
size: first_size,
address: first_address,
},
Arg::Stack {
data_type: _,
size: second_size,
..
},
) = (first_arg, second_arg)
{
let first_arg_offset = first_arg.eval_stack_offset()?.try_to_u64()?;
let first_arg_size = u64::from(*first_size);
let second_arg_offset = second_arg.eval_stack_offset()?.try_to_u64()?;
let second_arg_size = u64::from(*second_size);
let mut logs = vec![];
let first_arg_upper_bound = first_arg_offset + first_arg_size;
// Check if they intersect
if first_arg_upper_bound > second_arg_offset {
let second_arg_upper_bound = second_arg_offset + second_arg_size;
impl Default for FunctionSignature {
fn default() -> Self {
Self::new()
}
}
// Check if subset
if second_arg_upper_bound <= first_arg_upper_bound
&& second_arg_offset >= first_arg_offset
{
// second arg is a subset, we just keep first_arg
return Ok((first_arg.clone(), logs));
}
if first_arg_upper_bound <= second_arg_upper_bound
&& first_arg_offset >= second_arg_offset
{
// first arg is a subset, we just keep second_arg
return Ok((second_arg.clone(), logs));
}
logs.push(
"Merged a stack parameter, that intersect another but is no subset".to_string(),
impl FunctionSignature {
/// Generate a compact JSON-representation of the function signature for pretty printing.
#[allow(dead_code)]
pub fn to_json_compact(&self) -> serde_json::Value {
let mut json_map = serde_json::Map::new();
let mut param_map = serde_json::Map::new();
for (param, pattern) in self.parameters.iter() {
param_map.insert(
format!("{param}"),
serde_json::Value::String(format!("{pattern}")),
);
}
json_map.insert(
"Parameters".to_string(),
serde_json::Value::Object(param_map),
);
let mut global_param_map = serde_json::Map::new();
for (param, pattern) in self.global_parameters.iter() {
global_param_map.insert(
format!("{param}"),
serde_json::Value::String(format!("{pattern}")),
);
let merged_arg = Arg::Stack {
address: first_address.clone(),
size: (second_arg_upper_bound - first_arg_offset).into(),
data_type: None,
};
return Ok((merged_arg, logs));
} else {
return Err(anyhow!("Args do not intersect"));
}
json_map.insert(
"Globals".to_string(),
serde_json::Value::Object(global_param_map),
);
serde_json::Value::Object(json_map)
}
Err(anyhow!("Args are no stack arguments"))
}
impl Default for FunctionSignature {
fn default() -> Self {
Self::new()
/// If the abstract location is a location on the stack
/// then return its offset relative to the zero position on the stack.
fn get_offset_if_simple_stack_param(
param: &AbstractLocation,
stack_register: &Variable,
) -> Option<i64> {
if let AbstractLocation::Pointer(var, mem_location) = param {
if var == stack_register {
if let AbstractMemoryLocation::Location { offset, .. } = mem_location {
return Some(*offset);
}
}
}
None
}
/// Generate an abstract location of a (non-nested) stack parameter.
fn generate_simple_stack_param(
offset: i64,
size: ByteSize,
stack_register: &Variable,
) -> AbstractLocation {
AbstractLocation::Pointer(
stack_register.clone(),
AbstractMemoryLocation::Location { offset, size },
)
}
#[cfg(test)]
......
......@@ -91,22 +91,21 @@ impl State {
/// Fill every return register that might be a pointer with a value that may point to any pointer-sized input ID
/// or to an output ID specific to the call and output register.
/// Non-pointer-sized output registers are only filled with an ID specific to the call and output register.
fn generate_return_values_for_call(
&mut self,
input_ids: &BTreeSet<AbstractIdentifier>,
return_args: &[Arg],
call_tid: &Tid,
) {
// Fill every output register with a value that may point to any pointer-sized input ID
// or to an output ID specific to the call and output register.
let generic_pointer_size = self.stack_id.unwrap_register().size;
let generic_pointer_size = self.stack_id.bytesize();
let generic_output_relative_values: BTreeMap<AbstractIdentifier, BitvectorDomain> =
input_ids
.iter()
.filter(|id| id.bytesize() == generic_pointer_size)
.map(|id| (id.clone(), BitvectorDomain::new_top(generic_pointer_size)))
.collect();
let mut generic_output = DataDomain::new_top(generic_pointer_size);
let mut generic_output = DataDomain::new_empty(generic_pointer_size);
generic_output.set_relative_values(generic_output_relative_values);
for output_arg in return_args {
......@@ -115,13 +114,15 @@ impl State {
data_type: _,
} = output_arg
{
let specific_id = AbstractIdentifier::from_var(call_tid.clone(), var);
self.add_id_to_tracked_ids(&specific_id);
let specific_target =
DataDomain::from_target(specific_id, Bitvector::zero(var.size.into()).into());
if var.size == generic_pointer_size {
let specific_target = DataDomain::from_target(
AbstractIdentifier::from_var(call_tid.clone(), var),
Bitvector::zero(var.size.into()).into(),
);
let output = generic_output.merge(&specific_target);
self.set_register(var, output);
} else {
self.set_register(var, specific_target);
}
}
}
......@@ -131,21 +132,21 @@ impl State {
///
/// A register (or stack position with positive offset) is considered a parameter
/// if any access to its value at function start is recorded in the corresponding object signature.
pub fn get_params_of_current_function(&self) -> Vec<(Arg, AccessPattern)> {
/// A nested location is considered a parameter if it was dereferenced during the function execution.
pub fn get_params_of_current_function(&self) -> Vec<(&AbstractLocation, AccessPattern)> {
let mut params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() {
if let Ok(param_arg) = generate_param_arg_from_abstract_id(id) {
if access_pattern.is_accessed() {
params.push((param_arg, *access_pattern));
} else if matches!(id.get_location(), &AbstractLocation::Pointer { .. }) {
// This is a stack parameter.
// If it was only loaded into a register but otherwise not used, then the read-flag needs to be set.
let mut access_pattern = *access_pattern;
access_pattern.set_read_flag();
params.push((param_arg, access_pattern));
}
if self.is_register_based_param_id(id) {
if (id.get_location().recursion_depth() > 0 && access_pattern.is_dereferenced())
|| (id.get_location().recursion_depth() == 0 && access_pattern.is_accessed())
{
params.push((id.get_location(), *access_pattern));
}
} else if self.is_stack_based_param_id(id)
&& ((id.get_location().recursion_depth() > 1 && access_pattern.is_dereferenced())
|| (id.get_location().recursion_depth() == 1 && access_pattern.is_accessed()))
{
params.push((id.get_location(), *access_pattern));
}
}
params
......@@ -153,16 +154,26 @@ impl State {
/// Return a list of all potential global memory addresses
/// for which any type of access has been tracked by the current state.
pub fn get_global_mem_params_of_current_function(&self) -> Vec<(u64, AccessPattern)> {
pub fn get_global_mem_params_of_current_function(
&self,
) -> Vec<(&AbstractLocation, AccessPattern)> {
let mut global_params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() {
match id.get_location() {
AbstractLocation::GlobalPointer(address, _)
| AbstractLocation::GlobalAddress { address, .. } => {
global_params.push((*address, *access_pattern));
let location = id.get_location();
match location {
AbstractLocation::GlobalAddress { .. } => {
if access_pattern.is_accessed() {
global_params.push((location, *access_pattern));
}
}
AbstractLocation::GlobalPointer(_, _) => {
// Nested parameters are only explicitly tracked if they are dereferenced.
if access_pattern.is_dereferenced() {
global_params.push((location, *access_pattern));
}
}
AbstractLocation::Pointer(_, _) | AbstractLocation::Register(_) => (),
_ => (),
}
}
}
......@@ -177,52 +188,155 @@ impl State {
/// Note that this may create new stack parameter objects for self.
pub fn merge_parameter_access(
&mut self,
params: &[(Arg, AccessPattern)],
params: &[(&AbstractLocation, AccessPattern)],
global_memory: &RuntimeMemoryImage,
) {
for (parameter, call_access_pattern) in params {
let param_value = self.eval_parameter_arg(parameter);
let param_value = self.eval_param_location(parameter, global_memory);
let param_value = self.substitute_global_mem_address(param_value, global_memory);
for (id, offset) in param_value.get_relative_values() {
if let Some(object) = self.tracked_ids.get_mut(id) {
*object = object.merge(call_access_pattern);
}
if *id == self.stack_id && call_access_pattern.is_dereferenced() {
if let Ok(offset) = offset.try_to_bitvec() {
// We also have to dereference the stack pointer and set the access flags of the pointed-to value
let value = self.load_unsized_value_from_stack(offset.clone());
for id in value.referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
// Since we do not know whether the value itself was also dereferenced in the callee,
// we have to assume some unknown access to the value.
object.set_unknown_access_flags();
} else if *id == self.stack_id {
// Add stack IDs only if they correspond to stack parameters, i.e. the offset is positive.
if let Ok(concrete_offset) = offset.try_to_bitvec() {
if !concrete_offset.sign_bit().to_bool() {
if let Some(stack_param) = self.generate_stack_param_id_if_nonexistent(
concrete_offset,
id.bytesize(),
) {
let object = self.tracked_ids.get_mut(&stack_param).unwrap();
*object = object.merge(call_access_pattern);
}
}
}
if call_access_pattern.is_mutably_dereferenced() {
// The stack value may have been overwritten by the call
if let Ok(offset) = offset.try_to_offset() {
self.stack.mark_interval_values_as_top(
offset,
offset,
ByteSize::new(1),
);
}
} else {
self.tracked_ids.insert(id.clone(), *call_access_pattern);
}
if *id == self.stack_id && call_access_pattern.is_mutably_dereferenced() {
// The stack value may have been overwritten by the call
if let Ok(offset) = offset.try_to_offset() {
self.stack
.mark_interval_values_as_top(offset, offset, ByteSize::new(1));
}
}
}
}
}
/// If the given abstract ID represents a possible parameter of the current function
/// then return an argument object corresponding to the parameter.
pub fn get_arg_corresponding_to_id(&self, id: &AbstractIdentifier) -> Option<Arg> {
if id.get_tid() == self.stack_id.get_tid() {
generate_param_arg_from_abstract_id(id).ok()
} else {
None
/// Evaluate the value of a parameter location from a call on the current state.
///
/// This function panics for global parameters.
pub fn eval_param_location(
&mut self,
param_location: &AbstractLocation,
global_memory: &RuntimeMemoryImage,
) -> DataDomain<BitvectorDomain> {
match param_location {
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _) => {
panic!("Globals are not valid parameter locations.")
}
AbstractLocation::Register(var) => {
let value = self.get_register(var);
self.substitute_global_mem_address(value, global_memory)
}
AbstractLocation::Pointer(var, mem_location) => {
if var == self.stack_id.unwrap_register() {
self.eval_stack_pointer_param_location(mem_location, global_memory)
} else {
let value = self.get_register(var);
let value = self.substitute_global_mem_address(value, global_memory);
self.eval_mem_location_relative_value(value, mem_location)
}
}
}
}
/// Evaluate the value of a parameter location relative to the stack pointer position in the current state.
fn eval_stack_pointer_param_location(
&mut self,
mem_location: &AbstractMemoryLocation,
global_memory: &RuntimeMemoryImage,
) -> DataDomain<BitvectorDomain> {
let stack_register = self.stack_id.unwrap_register();
match mem_location {
AbstractMemoryLocation::Location { offset, size } => {
if let Some(stack_offset) =
self.get_offset_if_exact_stack_pointer(&self.get_register(stack_register))
{
let stack_offset = stack_offset
+ &Bitvector::from_i64(*offset).into_sign_resize(self.stack_id.bytesize());
self.load_value_from_stack(stack_offset, *size)
} else {
DataDomain::new_top(*size)
}
}
AbstractMemoryLocation::Pointer {
offset,
target: inner_mem_location,
} => {
if let Some(stack_offset) =
self.get_offset_if_exact_stack_pointer(&self.get_register(stack_register))
{
let stack_offset = stack_offset
+ &Bitvector::from_i64(*offset).into_sign_resize(self.stack_id.bytesize());
let value = self.load_value_from_stack(stack_offset, self.stack_id.bytesize());
let value = self.substitute_global_mem_address(value, global_memory);
self.eval_mem_location_relative_value(value, inner_mem_location)
} else {
DataDomain::new_top(inner_mem_location.bytesize())
}
}
}
}
/// Return `true` if the given ID is a parameter ID,
/// but not a global parameter.
/// This function does not check access patterns for the ID.
fn is_register_based_param_id(&self, id: &AbstractIdentifier) -> bool {
if id.get_tid() != self.get_current_function_tid() || id == &self.stack_id {
return false;
}
// Filter out global IDs
if matches!(
id.get_location(),
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _)
) {
return false;
}
// Filter out stack based IDs
if let AbstractLocation::Pointer(var, _) = id.get_location() {
if var == self.stack_id.unwrap_register() {
return false;
}
}
true
}
/// Return `true` if the given ID is a stack parameter ID or a nested stack parameter ID.
/// This function does not check access patterns for the ID.
fn is_stack_based_param_id(&self, id: &AbstractIdentifier) -> bool {
if id.get_tid() != self.get_current_function_tid() || id == &self.stack_id {
return false;
}
if let AbstractLocation::Pointer(register, mem_location) = id.get_location() {
if register == self.stack_id.unwrap_register() {
// ID is stack based, we have to filter out negative stack offsets.
match mem_location {
AbstractMemoryLocation::Location { offset, .. }
| AbstractMemoryLocation::Pointer { offset, .. } => {
if *offset < 0 {
return false;
}
}
}
return true;
}
}
false
}
}
/// Generate register arguments from a list of registers.
......@@ -233,25 +347,5 @@ fn generate_args_from_registers(registers: &[Variable]) -> Vec<Arg> {
.collect()
}
/// Generate an argument representing the location in the given abstract ID.
/// If the location is a pointer, it is assumed that the pointer points to the stack.
/// Returns an error if the location contains a second level of indirection
/// or if the location is associated to global memory.
fn generate_param_arg_from_abstract_id(id: &AbstractIdentifier) -> Result<Arg, Error> {
match id.get_location() {
AbstractLocation::Register(var) => Ok(Arg::from_var(var.clone(), None)),
AbstractLocation::Pointer(var, mem_location) => match mem_location {
AbstractMemoryLocation::Location { offset, size } => Ok(Arg::Stack {
address: Expression::Var(var.clone()).plus_const(*offset),
size: *size,
data_type: None,
}),
AbstractMemoryLocation::Pointer { .. } => {
Err(anyhow!("Memory location is not a stack offset."))
}
},
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _) => {
Err(anyhow!("Global values are not parameters."))
}
}
}
#[cfg(test)]
pub mod tests;
use super::*;
use crate::{bitvec, variable};
#[test]
fn test_generate_return_values_for_call() {
let mut state = State::mock_arm32();
let input_ids = BTreeSet::from([
AbstractIdentifier::mock("mock_fn", "r0", 4),
AbstractIdentifier::mock("mock_fn", "big_register", 16),
]);
let return_args = [Arg::mock_register("r1", 4)];
let call_tid = Tid::new("call");
state.generate_return_values_for_call(&input_ids, &return_args, &call_tid);
assert!(state
.tracked_ids
.get(&AbstractIdentifier::mock("call", "r1", 4))
.is_some());
let expected_return_value = DataDomain::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("mock_fn", "r0", 4),
BitvectorDomain::new_top(ByteSize::new(4)),
),
(
AbstractIdentifier::mock("call", "r1", 4),
bitvec!("0x0:4").into(),
),
]));
assert_eq!(state.register[&variable!("r1:4")], expected_return_value);
}
#[test]
fn test_get_params_of_current_function() {
let mut state = State::mock_arm32();
let param_one = AbstractIdentifier::mock("mock_fn", "param_one", 4);
let param_two = AbstractIdentifier::mock("mock_fn", "param_two", 4);
let not_param = AbstractIdentifier::mock("call_tid", "r0", 4);
let non_param_stack_offset = AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[-8], 4),
);
let global_param = AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::GlobalAddress {
address: 0x1000,
size: ByteSize::new(4),
},
);
state
.tracked_ids
.insert(param_one.clone(), AccessPattern::new().with_read_flag());
state.tracked_ids.insert(
param_two.clone(),
AccessPattern::new().with_dereference_flag(),
);
state
.tracked_ids
.insert(not_param, AccessPattern::new_unknown_access());
state
.tracked_ids
.insert(non_param_stack_offset, AccessPattern::new_unknown_access());
state
.tracked_ids
.insert(global_param.clone(), AccessPattern::new_unknown_access());
let params = state.get_params_of_current_function();
let global_params = state.get_global_mem_params_of_current_function();
assert_eq!(
params,
Vec::from([
(
param_one.get_location(),
AccessPattern::new().with_read_flag()
),
(
param_two.get_location(),
AccessPattern::new().with_dereference_flag()
)
])
);
assert_eq!(
global_params,
Vec::from([(
global_param.get_location(),
AccessPattern::new_unknown_access()
)])
);
}
#[test]
fn test_merge_parameter_access() {
let mut state = State::mock_arm32();
let num_original_tracked_ids = state.tracked_ids.len();
let global_memory = RuntimeMemoryImage::mock();
state.register.insert(
variable!("sp:4"),
DataDomain::from_target(state.stack_id.clone(), bitvec!("0x-20:4").into()),
);
state.register.insert(
variable!("r1:4"),
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r0", 4),
bitvec!("0x2:4").into(),
),
);
let param_loc = AbstractLocation::mock("r0:4", &[], 4);
let stack_param_loc = AbstractLocation::mock("sp:4", &[0], 4);
let high_stack_param_loc = AbstractLocation::mock("sp:4", &[32], 4);
let nested_param_loc = AbstractLocation::mock("r1:4", &[6], 4);
let params = [
(&param_loc, AccessPattern::new_unknown_access()),
(&stack_param_loc, AccessPattern::new_unknown_access()),
(&high_stack_param_loc, AccessPattern::new_unknown_access()),
(&nested_param_loc, AccessPattern::new_unknown_access()),
];
state.merge_parameter_access(&params, &global_memory);
// Merge normal param access
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
param_loc.clone()
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
// Do not merge/track access to local stack variable
assert!(state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[-32], 4)
))
.is_none());
// Generate new stack param if necessary
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[0], 4)
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
// Track new nested parameter (in the right register)
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r0:4", &[8], 4)
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
assert_eq!(state.tracked_ids.len(), num_original_tracked_ids + 2);
}
#[test]
fn test_eval_param_location() {
let mut state = State::mock_arm32();
let global_memory = RuntimeMemoryImage::mock();
// Param is a register
state
.register
.insert(variable!("r0:4"), bitvec!("0x123:4").into());
let value = state.eval_param_location(&AbstractLocation::mock("r0:4", &[], 4), &global_memory);
assert_eq!(value, bitvec!("0x123:4").into());
// Param is a nested register (and values in nested objects are not tracked)
state.register.insert(
variable!("r0:4"),
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r3", 4),
bitvec!("0x0:4").into(),
),
);
let value = state.eval_param_location(&AbstractLocation::mock("r0:4", &[8], 4), &global_memory);
assert_eq!(
value,
DataDomain::from_target(
AbstractIdentifier::new(Tid::new("mock_fn"), AbstractLocation::mock("r3:4", &[8], 4)),
bitvec!("0x0:4").into()
)
);
// Read the value at a stack offset
state
.stack
.insert_at_byte_index(bitvec!("0x42:4").into(), -8);
let value =
state.eval_param_location(&AbstractLocation::mock("sp:4", &[-8], 4), &global_memory);
assert_eq!(value, bitvec!("0x42:4").into());
// Read a nested pointer from the stack. The read has to remove one level of indirection if the stack value can be read.
state.stack.insert_at_byte_index(
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r0", 4),
bitvec!("0x5:4").into(),
),
-8,
);
let value = state.eval_param_location(
&AbstractLocation::mock("sp:4", &[-8, 2, 6], 4),
&global_memory,
);
assert_eq!(
value,
DataDomain::from_target(
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r0:4", &[7, 6], 4)
),
bitvec!("0x0:4").into()
)
);
}
use super::State;
use super::POINTER_RECURSION_DEPTH_LIMIT;
use crate::abstract_domain::*;
use crate::intermediate_representation::*;
impl State {
/// Load the value at the given address.
///
/// Only values on the stack and in registers are tracked directly.
/// For all other values abstract location strings are generated
/// that track how the pointer to the value is computed.
///
/// This function does not set any access flags for input IDs in the address value.
pub fn load_value(
&mut self,
address: DataDomain<BitvectorDomain>,
size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> {
let mut loaded_value = DataDomain::new_empty(size);
for (id, offset) in address.get_relative_values() {
loaded_value = loaded_value.merge(&self.load_value_via_id_and_offset(id, offset, size));
}
if let Some(global_address) = address.get_absolute_value() {
loaded_value =
loaded_value.merge(&self.load_global_address(global_address, size, global_memory));
}
if address.contains_top() {
loaded_value.set_contains_top_flag();
}
loaded_value
}
/// Load the value whose position is given by derefencing the given ID and then adding an offset.
///
/// If the ID is the stack then this function actually loads the value at the given stack position.
/// Otherwise it only generates the abstract location of the value and returns it as a relative value.
fn load_value_via_id_and_offset(
&mut self,
id: &AbstractIdentifier,
offset: &BitvectorDomain,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
if *id == self.stack_id {
// Try to load a value from the stack (which may generate a new stack parameter)
match offset.try_to_bitvec() {
Ok(stack_offset) => self.load_value_from_stack(stack_offset, size),
Err(_) => DataDomain::new_top(size),
}
} else if let (true, Ok(constant_offset)) = (
id.get_location().recursion_depth() < POINTER_RECURSION_DEPTH_LIMIT,
offset.try_to_offset(),
) {
// Extend the abstract location string
let new_id = AbstractIdentifier::new(
id.get_tid().clone(),
id.get_location()
.clone()
.dereferenced(size, self.stack_id.bytesize())
.with_offset_addendum(constant_offset),
);
DataDomain::from_target(new_id, Bitvector::zero(size.into()).into())
} else {
// The abstract location string cannot be extended
DataDomain::new_top(size)
}
}
/// Load a value from the global address space.
/// If the address is located in writeable global memory then generate a new abstract ID for the value
/// and return a value relative to the new ID.
fn load_global_address(
&mut self,
global_address: &BitvectorDomain,
size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> {
if let (Ok(offset), Some(global_mem)) = (global_address.try_to_bitvec(), global_memory) {
match global_mem.read(&offset, size) {
Ok(Some(value)) => value.into(),
Ok(None) => {
let address = global_address.try_to_offset().unwrap() as u64;
let global_mem_location = AbstractLocation::GlobalAddress { address, size };
let global_mem_id = AbstractIdentifier::new(
self.get_current_function_tid().clone(),
global_mem_location,
);
DataDomain::from_target(global_mem_id, Bitvector::zero(size.into()).into())
}
Err(_) => DataDomain::new_top(size),
}
} else {
DataDomain::new_top(size)
}
}
/// Load the value at the given stack offset.
/// If the offset is non-negative a corresponding stack parameter is generated if necessary.
pub fn load_value_from_stack(
&mut self,
stack_offset: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
if !stack_offset.sign_bit().to_bool() {
// Stack offset is nonnegative, i.e. this is a stack parameter access.
self.get_stack_param(stack_offset, size)
} else {
self.stack.get(stack_offset, size)
}
}
/// Load a value of unknown bytesize at the given stack offset.
/// If the offset is non-negative, a corresponding stack parameter is generated if necessary.
///
/// One must be careful to not rely on the correctness of the bytesize of the returned value!
/// If the size of the value cannot be guessed from the contents of the stack,
/// then a size of 1 byte is assumed, which will be wrong in general!
pub fn load_unsized_value_from_stack(
&mut self,
offset: Bitvector,
) -> DataDomain<BitvectorDomain> {
if !offset.sign_bit().to_bool() {
// This is a pointer to a stack parameter of the current function
self.stack
.get_unsized(offset.clone())
.unwrap_or_else(|| self.get_stack_param(offset, ByteSize::new(1)))
} else {
self.stack
.get_unsized(offset)
.unwrap_or_else(|| DataDomain::new_top(ByteSize::new(1)))
}
}
/// If `address` is a stack offset, then write `value` onto the stack.
///
/// If address points to a stack parameter, whose ID does not yet exists,
/// then the ID is generated and added to the tracked IDs.
///
/// This function does not set any access flags for input IDs of the given address or value.
pub fn write_value(
&mut self,
address: DataDomain<BitvectorDomain>,
value: DataDomain<BitvectorDomain>,
) {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
if !stack_offset.sign_bit().to_bool() {
// We generate a new stack parameter object, but do not set any access flags,
// since the stack parameter is not accessed but overwritten.
let _ = self
.generate_stack_param_id_if_nonexistent(stack_offset.clone(), value.bytesize());
}
self.stack.add(value, stack_offset);
} else if let Some(stack_offset_domain) = address.get_relative_values().get(&self.stack_id)
{
if let Ok(stack_offset) = stack_offset_domain.try_to_bitvec() {
if !stack_offset.sign_bit().to_bool() {
// We generate a new stack parameter object, but do not set any access flags,
// since the stack parameter is not accessed but overwritten.
let _ = self.generate_stack_param_id_if_nonexistent(
stack_offset.clone(),
value.bytesize(),
);
}
let previous_value = self.stack.get(stack_offset.clone(), value.bytesize());
self.stack.add(previous_value.merge(&value), stack_offset);
} else {
self.stack.mark_all_values_as_top();
}
}
}
/// Get the value located at a positive stack offset.
/// This function panics if the address is a negative offset.
///
/// If no corresponding stack parameter ID exists for the value,
/// generate it and then return it as an unmodified stack parameter.
/// Otherwise just read the value at the given stack address.
fn get_stack_param(
&mut self,
address: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
assert!(!address.sign_bit().to_bool());
if let Some(param_id) = self.generate_stack_param_id_if_nonexistent(address.clone(), size) {
let stack_param =
DataDomain::from_target(param_id, Bitvector::zero(size.into()).into());
self.stack.add(stack_param.clone(), address);
stack_param
} else {
self.stack.get(address, size)
}
}
/// If the address is an exactly known pointer to the stack with a constant offset, then return the offset.
pub fn get_offset_if_exact_stack_pointer(
&self,
address: &DataDomain<BitvectorDomain>,
) -> Option<Bitvector> {
if let Some((target, offset)) = address.get_if_unique_target() {
if *target == self.stack_id {
return offset.try_to_bitvec().ok();
}
}
None
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::{bitvec, variable};
/// Mock an abstract ID representing the stack.
fn mock_stack_id() -> AbstractIdentifier {
AbstractIdentifier::from_var(Tid::new("mock_fn"), &variable!("sp:4"))
}
/// Mock an abstract ID of a stack parameter
fn mock_stack_param_id(offset: i64, size: u64) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::from_stack_position(
mock_stack_id().unwrap_register(),
offset,
ByteSize::new(size),
),
)
}
#[test]
fn test_get_offset_if_exact_stack_pointer() {
let state = State::mock_arm32();
let stack_pointer =
DataDomain::from_target(mock_stack_id(), Bitvector::from_i32(-10).into());
assert_eq!(
state.get_offset_if_exact_stack_pointer(&stack_pointer),
Some(Bitvector::from_i32(-10))
);
}
#[test]
fn test_get_stack_param() {
// Reading a previously non-existing stack parameter
let mut state = State::mock_arm32();
let stack_param = state.get_stack_param(bitvec!("0xc:4"), ByteSize::new(8));
let expected_stack_id = AbstractIdentifier::mock_nested("mock_fn", "sp:4", &[12], 8);
let expected_value =
DataDomain::from_target(expected_stack_id.clone(), bitvec!("0x0:8").into());
assert_eq!(&stack_param, &expected_value);
assert!(state.tracked_ids.contains_key(&expected_stack_id));
// Reading the stack parameter again. The position should still contain the stack parameter.
let stack_param = state.get_stack_param(bitvec!("0xc:4"), ByteSize::new(8));
assert_eq!(&stack_param, &expected_value);
// Reading the stack parameter after it has been overwritten with a value.
state
.stack
.insert_at_byte_index(bitvec!("0x2a:8").into(), 12);
let value = state.get_stack_param(bitvec!("0xc:4"), ByteSize::new(8));
assert_eq!(value, bitvec!("0x2a:8").into());
}
#[test]
fn test_store_and_load_from_stack() {
let mut state = State::mock_arm32();
let address = DataDomain::from_target(mock_stack_id(), bitvec!("-4:4").into());
let value: DataDomain<BitvectorDomain> = bitvec!("0x0:4").into();
// write and load a value to the current stack frame
state.write_value(address.clone(), value.clone());
assert_eq!(state.stack.iter().len(), 1);
assert_eq!(
state.stack.get(bitvec!("-4:4"), ByteSize::new(4)),
value.clone()
);
assert_eq!(state.load_value(address, ByteSize::new(4), None), value);
// Load a parameter register and check that the parameter gets generated
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x4:4").into());
let stack_param_id = mock_stack_param_id(4, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
assert_eq!(state.tracked_ids.iter().len(), 6);
assert_eq!(
state.load_value(address.clone(), ByteSize::new(4), None),
stack_param
);
assert_eq!(state.tracked_ids.iter().len(), 7);
assert_eq!(
state
.tracked_ids
.get(&stack_param_id)
.unwrap()
.is_accessed(),
false
); // The load method does not set access flags.
}
#[test]
fn test_load_unsized_from_stack() {
let mut state = State::mock_arm32();
// Load an existing stack param (generated by a sized load at the same address)
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x0:4").into());
let stack_param_id = mock_stack_param_id(0, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
state.load_value(address, ByteSize::new(4), None);
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x0:4").into());
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Load a non-existing stack param
let stack_param_id = mock_stack_param_id(4, 1);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:1").into());
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x4:4"));
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Unsized load from the current stack frame
let unsized_load = state.load_unsized_value_from_stack(bitvec!("-4:4"));
assert_eq!(unsized_load, DataDomain::new_top(ByteSize::new(1)));
}
#[test]
fn test_load_nested_pointers() {
let mut state = State::mock_arm32();
let global_memory = RuntimeMemoryImage::mock();
let parent_id = AbstractIdentifier::mock_nested("mock_fn", "r0:4", &[4], 4);
let pointer = DataDomain::from_target(parent_id.clone(), bitvec!("0x8:4").into());
let loaded_value = state.load_value(pointer, ByteSize::new(4), Some(&global_memory));
let expected_id = AbstractIdentifier::mock_nested("mock_fn", "r0:4", &[4, 8], 4);
let expected_value = DataDomain::from_target(expected_id.clone(), bitvec!("0x0:4").into());
assert_eq!(loaded_value, expected_value);
}
}
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use super::AccessPattern;
use super::POINTER_RECURSION_DEPTH_LIMIT;
use crate::abstract_domain::*;
use crate::intermediate_representation::*;
use crate::prelude::*;
use super::AccessPattern;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
/// Methods of [`State`] related to handling call instructions.
mod call_handling;
/// Methods of [`State`] related to handling load and store instructions.
mod memory_handling;
/// The state tracks knowledge about known register values,
/// known values on the stack, and access patterns of tracked variables.
......@@ -113,90 +114,6 @@ impl State {
self.stack_id.get_tid()
}
/// Load the value at the given address.
///
/// Only constant addresses on the stack are tracked.
/// Thus this function will always return a `Top` domain for any address
/// that may not be a stack address with constant offset.
///
/// This function does not set any access flags for input IDs in the address value.
pub fn load_value(
&mut self,
address: DataDomain<BitvectorDomain>,
size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
self.load_value_from_stack(stack_offset, size)
} else if let (Ok(global_address), Some(global_mem)) =
(address.try_to_bitvec(), global_memory)
{
if let Ok(Some(value)) = global_mem.read(&global_address, size) {
value.into()
} else {
DataDomain::new_top(size)
}
} else {
DataDomain::new_top(size)
}
}
/// Load the value at the given stack offset.
/// If the offset is non-negative a corresponding stack parameter is generated if necessary.
fn load_value_from_stack(
&mut self,
stack_offset: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
if !stack_offset.sign_bit().to_bool() {
// Stack offset is nonnegative, i.e. this is a stack parameter access.
self.get_stack_param(stack_offset, size)
} else {
self.stack.get(stack_offset, size)
}
}
/// Load a value of unknown bytesize at the given stack offset.
/// If the offset is non-negative, a corresponding stack parameter is generated if necessary.
///
/// One must be careful to not rely on the correctness of the bytesize of the returned value!
/// If the size of the value cannot be guessed from the contents of the stack,
/// then a size of 1 byte is assumed, which will be wrong in general!
fn load_unsized_value_from_stack(&mut self, offset: Bitvector) -> DataDomain<BitvectorDomain> {
if !offset.sign_bit().to_bool() {
// This is a pointer to a stack parameter of the current function
self.stack
.get_unsized(offset.clone())
.unwrap_or_else(|| self.get_stack_param(offset, ByteSize::new(1)))
} else {
self.stack
.get_unsized(offset)
.unwrap_or_else(|| DataDomain::new_top(ByteSize::new(1)))
}
}
/// If `address` is a stack offset, then write `value` onto the stack.
///
/// If address points to a stack parameter, whose ID does not yet exists,
/// then the ID is generated and added to the tracked IDs.
///
/// This function does not set any access flags for input IDs of the given address or value.
pub fn write_value(
&mut self,
address: DataDomain<BitvectorDomain>,
value: DataDomain<BitvectorDomain>,
) {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
// We generate a new stack parameter object, but do not set any access flags,
// since the stack parameter is not accessed but overwritten.
if !stack_offset.sign_bit().to_bool() {
let _ = self
.generate_stack_param_id_if_nonexistent(stack_offset.clone(), value.bytesize());
}
self.stack.add(value, stack_offset);
}
}
/// If the stack parameter ID corresponding to the given stack offset does not exist
/// then generate it, add it to the list of tracked IDs, and return it.
fn generate_stack_param_id_if_nonexistent(
......@@ -228,40 +145,6 @@ impl State {
}
}
/// Get the value located at a positive stack offset.
///
/// If no corresponding stack parameter ID exists for the value,
/// generate it and then return it as an unmodified stack parameter.
/// Otherwise just read the value at the given stack address.
fn get_stack_param(
&mut self,
address: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
assert!(!address.sign_bit().to_bool());
if let Some(param_id) = self.generate_stack_param_id_if_nonexistent(address.clone(), size) {
let stack_param =
DataDomain::from_target(param_id, Bitvector::zero(size.into()).into());
self.stack.add(stack_param.clone(), address);
stack_param
} else {
self.stack.get(address, size)
}
}
/// If the address is an exactly known pointer to the stack with a constant offset, then return the offset.
pub fn get_offset_if_exact_stack_pointer(
&self,
address: &DataDomain<BitvectorDomain>,
) -> Option<Bitvector> {
if let Some((target, offset)) = address.get_if_unique_target() {
if *target == self.stack_id {
return offset.try_to_bitvec().ok();
}
}
None
}
/// Merges the access pattern of the given abstract identifer in `self` with the provided access pattern.
///
/// Does not add the identifier to the list of tracked identifiers if it is not already tracked in `self`.
......@@ -308,13 +191,56 @@ impl State {
size,
data_type: _,
} => {
self.set_deref_flag_for_input_ids_of_expression(address);
self.set_deref_flag_for_pointer_inputs_of_expression(address);
self.set_read_flag_for_input_ids_of_expression(address);
let address = self.eval(address);
self.load_value(address, *size, None)
}
}
}
/// Evaluate the value at the given memory location
/// where `value` represents the root pointer relative to which the memory location needs to be computed.
fn eval_mem_location_relative_value(
&mut self,
value: DataDomain<BitvectorDomain>,
mem_location: &AbstractMemoryLocation,
) -> DataDomain<BitvectorDomain> {
let target_size = mem_location.bytesize();
let mut eval_result = DataDomain::new_empty(target_size);
for (id, offset) in value.get_relative_values() {
let mut location = id.get_location().clone();
let mut mem_location = mem_location.clone();
match offset.try_to_offset() {
Ok(concrete_offset) => mem_location.add_offset_at_root(concrete_offset),
Err(_) => {
eval_result.set_contains_top_flag();
continue;
}
};
location.extend(mem_location, self.stack_id.bytesize());
if location.recursion_depth() <= POINTER_RECURSION_DEPTH_LIMIT {
eval_result = eval_result.merge(&DataDomain::from_target(
AbstractIdentifier::new(id.get_tid().clone(), location),
Bitvector::zero(target_size.into()).into(),
));
} else {
eval_result.set_contains_top_flag();
}
}
if value.contains_top() || value.get_absolute_value().is_some() {
eval_result.set_contains_top_flag();
}
eval_result
}
/// Add all relative IDs in `data` to the list of tracked IDs.
pub fn track_contained_ids(&mut self, data: &DataDomain<BitvectorDomain>) {
for id in data.referenced_ids() {
self.add_id_to_tracked_ids(id);
}
}
/// If the given expression is not an [`Expression::Var`] set the read flags
/// for all IDs that may be referenced when computing the value of the expression.
///
......@@ -341,22 +267,34 @@ impl State {
}
}
/// Set the read and dereferenced flag for every tracked ID
/// that may be referenced when computing the value of the expression.
pub fn set_deref_flag_for_input_ids_of_expression(&mut self, expression: &Expression) {
for register in expression.input_vars() {
/// Set the read and dereferenced flag for every tracked pointer ID
/// that may be referenced when computing the value of the given address expression.
pub fn set_deref_flag_for_pointer_inputs_of_expression(&mut self, expression: &Expression) {
for register in get_pointer_inputs_vars_of_address_expression(expression) {
self.set_deref_flag_for_contained_ids(&self.get_register(register));
}
}
/// Set the read and mutably dereferenced flag for every tracked ID
/// that may be referenced when computing the value of the expression.
pub fn set_mutable_deref_flag_for_input_ids_of_expression(&mut self, expression: &Expression) {
for register in expression.input_vars() {
/// Set the read and mutably dereferenced flag for every tracked pointer ID
/// that may be referenced when computing the value of the given address expression.
pub fn set_mutable_deref_flag_for_pointer_inputs_of_expression(
&mut self,
expression: &Expression,
) {
for register in get_pointer_inputs_vars_of_address_expression(expression) {
self.set_deref_mut_flag_for_contained_ids(&self.get_register(register));
}
}
/// Set the read flag for every tracked ID contained in the given value.
pub fn set_read_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) {
for id in value.referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
object.set_read_flag();
}
}
}
/// Set the read and dereferenced flag for every tracked ID contained in the given value.
pub fn set_deref_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) {
for id in value.referenced_ids() {
......@@ -413,6 +351,34 @@ impl State {
}
}
/// Get a list of possible pointer input variables for the given address expression.
///
/// Only addition, subtraction and bitwise AND, OR, XOR can have pointers as inputs.
/// All other subexpressions are assumed to only compute offsets.
fn get_pointer_inputs_vars_of_address_expression(expr: &Expression) -> Vec<&Variable> {
let mut input_vars = Vec::new();
match expr {
Expression::BinOp { op, lhs, rhs } => {
match op {
BinOpType::IntAdd | BinOpType::IntAnd | BinOpType::IntXOr | BinOpType::IntOr => {
// There could be a pointer on either of the sides
input_vars.extend(get_pointer_inputs_vars_of_address_expression(lhs));
input_vars.extend(get_pointer_inputs_vars_of_address_expression(rhs));
}
BinOpType::IntSub => {
// Only the left side could be a pointer
input_vars.extend(get_pointer_inputs_vars_of_address_expression(lhs));
}
_ => (),
}
}
Expression::Var(var) => input_vars.push(var),
_ => (),
}
input_vars
}
impl AbstractDomain for State {
/// Merge two states
fn merge(&self, other: &Self) -> Self {
......@@ -472,4 +438,4 @@ impl State {
}
#[cfg(test)]
mod tests;
pub mod tests;
......@@ -26,18 +26,6 @@ fn mock_stack_id() -> AbstractIdentifier {
AbstractIdentifier::from_var(Tid::new("mock_fn"), &variable!("sp:4"))
}
/// Mock an abstract ID of a stack parameter
fn mock_stack_param_id(offset: i64, size: u64) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::from_stack_position(
mock_stack_id().unwrap_register(),
offset,
ByteSize::new(size),
),
)
}
#[test]
fn test_new() {
let state = State::mock_arm32();
......@@ -65,61 +53,6 @@ fn test_new() {
}
#[test]
fn test_store_and_load_from_stack() {
let mut state = State::mock_arm32();
let address = DataDomain::from_target(mock_stack_id(), bitvec!("-4:4").into());
let value: DataDomain<BitvectorDomain> = bitvec!("0x0:4").into();
// write and load a value to the current stack frame
state.write_value(address.clone(), value.clone());
assert_eq!(state.stack.iter().len(), 1);
assert_eq!(
state.stack.get(bitvec!("-4:4"), ByteSize::new(4)),
value.clone()
);
assert_eq!(state.load_value(address, ByteSize::new(4), None), value);
// Load a parameter register and check that the parameter gets generated
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x4:4").into());
let stack_param_id = mock_stack_param_id(4, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
assert_eq!(state.tracked_ids.iter().len(), 6);
assert_eq!(
state.load_value(address.clone(), ByteSize::new(4), None),
stack_param
);
assert_eq!(state.tracked_ids.iter().len(), 7);
assert_eq!(
state
.tracked_ids
.get(&stack_param_id)
.unwrap()
.is_accessed(),
false
); // The load method does not set access flags.
}
#[test]
fn test_load_unsized_from_stack() {
let mut state = State::mock_arm32();
// Load an existing stack param (generated by a sized load at the same address)
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x0:4").into());
let stack_param_id = mock_stack_param_id(0, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
state.load_value(address, ByteSize::new(4), None);
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x0:4").into());
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Load a non-existing stack param
let stack_param_id = mock_stack_param_id(4, 1);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:1").into());
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x4:4"));
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Unsized load from the current stack frame
let unsized_load = state.load_unsized_value_from_stack(bitvec!("-4:4"));
assert_eq!(unsized_load, DataDomain::new_top(ByteSize::new(1)));
}
#[test]
fn test_eval() {
let mut state = State::mock_arm32();
// Test the eval method
......
use super::*;
use crate::{expr, variable};
use crate::variable;
/// Mock the abstract location of a global parameter.
fn mock_global_x64(address: u64) -> AbstractLocation {
AbstractLocation::GlobalAddress {
address: address,
size: ByteSize::new(8),
}
}
impl FunctionSignature {
/// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably,
/// one mutably accessed global variable at address 0x2000
......@@ -7,40 +16,45 @@ impl FunctionSignature {
pub fn mock_x64() -> FunctionSignature {
let mut write_access_pattern = AccessPattern::new();
write_access_pattern.set_unknown_access_flags();
let parameters = HashMap::from_iter([
let parameters = BTreeMap::from_iter([
(
Arg::from_var(variable!("RDI:8"), None),
AbstractLocation::from_var(&variable!("RDI:8")).unwrap(),
AccessPattern::new(),
),
(
Arg::from_var(variable!("RSI:8"), None),
AbstractLocation::from_var(&variable!("RSI:8")).unwrap(),
write_access_pattern,
),
]);
FunctionSignature {
parameters,
global_parameters: HashMap::from([
(0x2000, AccessPattern::new_unknown_access()),
(0x3000, AccessPattern::new().with_dereference_flag()),
global_parameters: BTreeMap::from([
(mock_global_x64(0x2000), AccessPattern::new_unknown_access()),
(
mock_global_x64(0x3000),
AccessPattern::new().with_dereference_flag(),
),
]),
}
}
}
fn mock_stack_arg(address: Expression, size: u64) -> Arg {
Arg::Stack {
address,
size: size.into(),
data_type: None,
}
fn mock_stack_arg(offset: i64, size: u64) -> AbstractLocation {
AbstractLocation::Pointer(
variable!("RSP:8"),
AbstractMemoryLocation::Location {
offset: offset,
size: ByteSize::new(size),
},
)
}
#[test]
fn test_two_parameter_overlapping_merging() {
let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1004:8"), 8);
let stack_parm_1 = mock_stack_arg(0x1000, 8);
let stack_parm_2 = mock_stack_arg(0x1004, 8);
func_sig
.parameters
......@@ -51,13 +65,10 @@ fn test_two_parameter_overlapping_merging() {
assert_eq!(
func_sig.sanitize(&proj),
(
vec!["Unexpected stack parameter size".to_string()],
vec!["Merged a stack parameter, that intersect another but is no subset".to_string()]
)
vec!["Unexpected stack parameter size".to_string()],
);
let mut expected_function_sig = FunctionSignature::mock_x64();
let expected_stack_arg = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 12);
let expected_stack_arg = mock_stack_arg(0x1000, 12);
expected_function_sig
.parameters
......@@ -69,10 +80,10 @@ fn test_two_parameter_overlapping_merging() {
fn test_merging_multiple_parameters() {
let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 1);
let stack_parm_3 = mock_stack_arg(expr!("RSP:8 + 0x1007:8"), 1);
let stack_parm_4 = mock_stack_arg(expr!("RSP:8 + 0x1008:8"), 8);
let stack_parm_1 = mock_stack_arg(0x8, 8);
let stack_parm_2 = mock_stack_arg(0x8, 1);
let stack_parm_3 = mock_stack_arg(0xf, 1);
let stack_parm_4 = mock_stack_arg(0x10, 8);
func_sig.parameters.extend([
(stack_parm_1.clone(), AccessPattern::new()),
......@@ -80,7 +91,8 @@ fn test_merging_multiple_parameters() {
(stack_parm_3, AccessPattern::new()),
(stack_parm_4.clone(), AccessPattern::new()),
]);
assert_eq!((vec![], vec![]), func_sig.sanitize(&proj));
let logs = func_sig.sanitize(&proj);
assert_eq!(logs, Vec::<String>::new());
let mut expected_function_sig = FunctionSignature::mock_x64();
expected_function_sig.parameters.extend([
......@@ -93,8 +105,8 @@ fn test_merging_multiple_parameters() {
fn test_log_messages() {
let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1001:8"), 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1007:8"), 4);
let stack_parm_1 = mock_stack_arg(0x1001, 8);
let stack_parm_2 = mock_stack_arg(0x1007, 4);
func_sig.parameters.extend([
(stack_parm_1.clone(), AccessPattern::new()),
......@@ -103,13 +115,10 @@ fn test_log_messages() {
let logs = func_sig.sanitize(&proj);
assert_eq!(
(
vec![
"Unexpected stack parameter size".to_string(),
"Unexpected stack parameter alignment".to_string()
],
vec!["Merged a stack parameter, that intersect another but is no subset".to_string()]
),
vec![
"Unexpected stack parameter size".to_string(),
"Unexpected stack parameter alignment".to_string()
],
logs
);
}
......@@ -6,50 +6,52 @@ impl<'a> Context<'a> {
/// to the value that represents it in the caller.
///
/// For parameter IDs this is the value of the parameter on function call.
/// For IDs of objects created in the callee it is the ID together with a path hint given by the call TID.
/// For IDs of objects created in the callee it is the ID itself.
/// For other IDs (including the callee stack frame ID) it is a `Top` value,
/// i.e. the value of the ID should be unknown to the caller.
///
/// Note that this function assumes that callee-originating IDs have already been renamed
/// to the name they should represent in the caller beforehand.
pub fn create_callee_id_to_caller_data_map(
&self,
state_before_call: &State,
state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let stack_register = &self.project.stack_pointer_register;
let mut id_map = BTreeMap::new();
let callee_tid = state_before_return.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap();
for param in callee_fn_sig.parameters.keys() {
let param_id = AbstractIdentifier::from_arg(callee_tid, param);
if let Ok(param_value) =
state_before_call.eval_parameter_arg(param, &self.project.runtime_memory_image)
{
if let Some(callee_fn_sig) = self.fn_signatures.get(callee_tid) {
for param in callee_fn_sig.parameters.keys() {
let param_id = AbstractIdentifier::new(callee_tid.clone(), param.clone());
let param_value = state_before_call
.eval_abstract_location(param, &self.project.runtime_memory_image);
id_map.insert(param_id, param_value);
} else {
id_map.insert(param_id, Data::new_top(param.bytesize()));
}
for global_param in callee_fn_sig.global_parameters.keys() {
let global_param_id =
AbstractIdentifier::new(callee_tid.clone(), global_param.clone());
let global_param_value = state_before_call
.eval_abstract_location(global_param, &self.project.runtime_memory_image);
id_map.insert(global_param_id, global_param_value);
}
}
for object_id in state_before_return.memory.get_all_object_ids() {
if object_id.get_tid() != callee_tid || !object_id.get_path_hints().is_empty() {
// Object is neither a parameter object nor the stack frame of the callee.
if let Ok(new_object_id) = object_id.with_path_hint(call_tid.clone()) {
id_map.insert(
id_map.insert(
object_id.clone(),
Data::from_target(
object_id,
Data::from_target(
new_object_id,
Bitvector::zero(stack_register.size.into()).into(),
),
);
} else {
id_map.insert(object_id, Data::new_top(stack_register.size));
}
Bitvector::zero(stack_register.size.into()).into(),
),
);
}
}
id_map.insert(
state_before_return.stack_id.clone(),
Data::new_top(stack_register.size),
);
// Also insert the global memory IDs to the map.
// Also insert the global memory ID to the map.
id_map.insert(
state_before_return.get_global_mem_id(),
Data::from_target(
......@@ -61,6 +63,55 @@ impl<'a> Context<'a> {
id_map
}
/// Create a map that maps callee IDs to the value assigned to it in the caller after a return instruction.
///
/// This is *not* the map used in the internal `update_return` handling.
/// Instead, the created map combines several ID renaming steps used internally into one renaming map.
/// The map is intended for use in other analyses depending on the PointerInference,
/// but not in the PointerInference itself.
pub fn create_full_callee_id_to_caller_data_map(
&self,
state_before_call: &State,
state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let cconv = &self.project.program.term.subs[state_before_return.get_fn_tid()]
.term
.calling_convention;
let cconv = match self.project.get_specific_calling_convention(cconv) {
Some(cconv) => cconv,
None => {
return BTreeMap::new();
}
};
let callee_fn_sig = self
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
let mut minimized_return_state = state_before_return.clone();
minimized_return_state.minimize_before_return_instruction(callee_fn_sig, cconv);
let mut location_to_data_map =
minimized_return_state.map_abstract_locations_to_pointer_data(call_tid);
minimized_return_state.filter_location_to_pointer_data_map(&mut location_to_data_map);
let mut replacement_map =
minimized_return_state.get_id_to_unified_ids_replacement_map(&location_to_data_map);
minimized_return_state.merge_mem_objects_with_unique_abstract_location(call_tid);
let unified_to_caller_replacement_map =
self.create_callee_id_to_caller_data_map(state_before_call, &minimized_return_state);
// In the ID-to-unified-ID map replace parameter IDs with their corresponding values in the caller.
for value in replacement_map.values_mut() {
value.replace_all_ids(&unified_to_caller_replacement_map);
}
// Add all parameter IDs to the map
let callee_tid = state_before_return.get_fn_tid();
for (id, value) in unified_to_caller_replacement_map {
if id.get_tid() == callee_tid && id.get_path_hints().is_empty() {
replacement_map.insert(id, value);
}
}
replacement_map
}
/// Create a map from the parameter IDs (of the function that the given state corresponds to)
/// to the corresponding access patterns.
pub fn create_id_to_access_pattern_map(
......@@ -71,7 +122,11 @@ impl<'a> Context<'a> {
let fn_tid = state.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(fn_tid).unwrap();
for (param, access_pattern) in &callee_fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(fn_tid, param);
let param_id = AbstractIdentifier::new(fn_tid.clone(), param.clone());
id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
}
for (param, access_pattern) in &callee_fn_sig.global_parameters {
let param_id = AbstractIdentifier::new(fn_tid.clone(), param.clone());
id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
}
......
......@@ -97,6 +97,18 @@ impl<'a> Context<'a> {
}
}
/// If `result` is an `Err`, log the error message as an error message through the `log_collector` channel.
pub fn log_error(&self, result: Result<(), Error>, location: Option<&Tid>) {
if let Err(err) = result {
let mut log_message =
LogMessage::new_error(format!("{err}")).source("Pointer Inference");
if let Some(loc) = location {
log_message = log_message.location(loc.clone());
};
let _ = self.log_collector.send(LogThreadMsg::Log(log_message));
}
}
/// Detect and log if the stack pointer is not as expected when returning from a function.
fn detect_stack_pointer_information_loss_on_return(
&self,
......@@ -299,7 +311,7 @@ impl<'a> Context<'a> {
/// Merge global memory data from the callee global memory object to the caller global memory object
/// if the corresponding global variable is marked as mutable in both the caller and callee.
fn merge_global_mem_from_callee(
fn merge_non_nested_global_mem_from_callee(
&self,
caller_state: &mut State,
callee_global_mem: &AbstractObject,
......@@ -360,21 +372,45 @@ fn compute_call_return_global_var_access_intervals(
caller_fn_sig: &FunctionSignature,
callee_fn_sig: &FunctionSignature,
) -> BTreeMap<u64, AccessPattern> {
let caller_mut_indices: BTreeSet<u64> = caller_fn_sig
.global_parameters
.iter()
.filter_map(|(location, access_pattern)| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
if access_pattern.is_mutably_dereferenced() {
return Some(*address);
}
}
None
})
.collect();
let callee_mut_indices: BTreeSet<u64> = callee_fn_sig
.global_parameters
.iter()
.filter_map(|(location, access_pattern)| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
if access_pattern.is_mutably_dereferenced() {
return Some(*address);
}
}
None
})
.collect();
let mut intervals: BTreeMap<u64, AccessPattern> = caller_fn_sig
.global_parameters
.keys()
.chain(callee_fn_sig.global_parameters.keys())
.map(|index| (*index, AccessPattern::new()))
.filter_map(|location| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
Some((*address, AccessPattern::new()))
} else {
None
}
})
.collect();
for (index, access_pattern) in intervals.iter_mut() {
if let (Some(caller_pattern), Some(callee_pattern)) = (
caller_fn_sig.global_parameters.get(index),
callee_fn_sig.global_parameters.get(index),
) {
if caller_pattern.is_mutably_dereferenced() && callee_pattern.is_mutably_dereferenced()
{
access_pattern.set_mutably_dereferenced_flag();
}
if caller_mut_indices.contains(index) && callee_mut_indices.contains(index) {
access_pattern.set_mutably_dereferenced_flag();
}
}
......
......@@ -173,6 +173,12 @@ fn update_return() {
&variable!("RDX:8"),
Data::from_target(new_id("callee", "RDI"), bv(0)),
);
state_before_return
.memory
.get_object_mut(&callee_created_heap_id)
.unwrap()
.set_value(bitvec!("0x42:8").into(), &bitvec!("0x6:8").into())
.unwrap();
let state_before_call = State::new(&variable!("RSP:8"), Tid::new("caller"), BTreeSet::new());
let mut state_before_call = context
......@@ -210,10 +216,8 @@ fn update_return() {
assert_eq!(
state.get_register(&variable!("RAX:8")),
Data::from_target(
callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.unwrap(),
bv(16).into()
AbstractIdentifier::mock("call_callee", "RAX", 8),
bv(0).into()
)
);
assert_eq!(
......@@ -234,15 +238,12 @@ fn update_return() {
.get_all_object_ids()
.get(&param_obj_id)
.is_some());
assert!(state
let value = state
.memory
.get_all_object_ids()
.get(
&callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.unwrap()
)
.is_some());
.get_object(&AbstractIdentifier::mock("call_callee", "RAX", 8))
.unwrap()
.get_value(bitvec!("0x-a:8"), ByteSize::new(8));
assert_eq!(value, bitvec!("0x42:8").into());
}
#[test]
......@@ -297,6 +298,20 @@ fn get_unsound_caller_ids() {
new_id("callee", "RSI"),
Data::from_target(new_id("caller", "RAX"), bv(2).into()),
);
callee_id_to_caller_data_map.insert(
AbstractIdentifier::new(
Tid::new("callee"),
AbstractLocation::mock_global(0x2000, &[], 8),
),
bv(42).into(),
);
callee_id_to_caller_data_map.insert(
AbstractIdentifier::new(
Tid::new("callee"),
AbstractLocation::mock_global(0x3000, &[], 8),
),
bv(42).into(),
);
let callee_tid = Tid::new("callee");
let callee_state = State::from_fn_sig(
......@@ -395,7 +410,7 @@ fn test_merge_global_mem_from_callee() {
Data::from_target(caller_state.get_global_mem_id(), bitvec!("0:8").into()),
)]);
context.merge_global_mem_from_callee(
context.merge_non_nested_global_mem_from_callee(
&mut caller_state,
callee_global_mem,
&replacement_map,
......
......@@ -124,6 +124,20 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
return None;
}
};
let callee_fn_sig = match self.fn_signatures.get(state_before_return.get_fn_tid()) {
Some(fn_sig) => fn_sig,
None => {
let location = state_before_return.get_fn_tid();
self.log_error(
Err(anyhow!(
"Internal function {} has no function signature.",
location
)),
Some(location),
);
return None;
}
};
// Detect possible information loss on the stack pointer and report it.
if let Err(err) = self.detect_stack_pointer_information_loss_on_return(state_before_return)
......@@ -133,19 +147,19 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
// or a call to a non-returning extern function that was not marked as non-returning.
return None;
}
// Minimize the callee state and replace callee-originating object IDs whenever possible.
let mut state_before_return = state_before_return.clone();
state_before_return.minimize_before_return_instruction(callee_fn_sig, cconv);
state_before_return.merge_mem_objects_with_unique_abstract_location(&call_term.tid);
// Create a mapping of IDs from the callee to IDs that should be used in the caller.
let id_map = self.create_callee_id_to_caller_data_map(
state_before_call,
state_before_return,
&call_term.tid,
);
let id_map =
self.create_callee_id_to_caller_data_map(state_before_call, &state_before_return);
let callee_id_to_access_pattern_map =
self.create_id_to_access_pattern_map(state_before_return);
self.create_id_to_access_pattern_map(&state_before_return);
// Identify caller IDs for which the callee analysis may be unsound for this callsite.
let unsound_caller_ids =
self.get_unsound_caller_ids(&id_map, &callee_id_to_access_pattern_map);
// TODO: Unsound caller IDs occur too often to log the cases right now.
// FIXME: Unsound caller IDs occur too often to log the cases right now.
// We have to investigate the reasons for it (maybe too many parameters on the caller stack?)
// and find better heuristics to prevent them poisoning the analysis soundness.
......@@ -167,11 +181,7 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
continue;
}
if *callee_object_id == state_before_return.get_global_mem_id() {
let callee_fn_sig = self
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
self.merge_global_mem_from_callee(
self.merge_non_nested_global_mem_from_callee(
&mut state_after_return,
callee_object,
&id_map,
......@@ -196,11 +206,9 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
.is_none()
{
// Add a callee object that does not correspond to a parameter to the caller or the stack of the callee.
if let Ok(new_object_id) = callee_object_id.with_path_hint(call_term.tid.clone()) {
state_after_return
.memory
.insert(new_object_id, callee_object);
}
state_after_return
.memory
.insert(callee_object_id.clone(), callee_object);
} else {
// The callee object is a parameter object.
self.log_debug(
......@@ -217,7 +225,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
state_after_return
.memory
.assume_arbitrary_writes_to_object(id, &BTreeSet::new());
// TODO: We should specify more possible reference targets.
}
// Cleanup
state_after_return.remove_unreferenced_objects();
......
......@@ -49,6 +49,8 @@ pub use state::State;
/// The version number of the analysis.
const VERSION: &str = "0.2";
/// The recursion limit for nested pointers.
const POINTER_RECURSION_DEPTH_LIMIT: u64 = 2;
/// The name and version number of the "Memory" CWE check.
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
......@@ -315,7 +317,7 @@ impl<'a> PointerInference<'a> {
}) => (state_before_call, state_before_return),
_ => continue,
};
let id_to_data_map = context.create_callee_id_to_caller_data_map(
let id_to_data_map = context.create_full_callee_id_to_caller_data_map(
state_before_call,
state_before_return,
call_tid,
......
......@@ -87,11 +87,23 @@ impl AbstractObject {
inner.is_unique = false;
}
/// Mark the abstract object as unique, i.e. it represents exactly one memory object.
pub fn mark_as_unique(&mut self) {
let inner = Arc::make_mut(&mut self.inner);
inner.is_unique = true;
}
/// Get the type of the memory object.
pub fn get_object_type(&self) -> Option<ObjectType> {
self.inner.type_
}
/// Set the type of the memory object.
pub fn set_object_type(&mut self, object_type: Option<ObjectType>) {
let inner = Arc::make_mut(&mut self.inner);
inner.type_ = object_type;
}
/// Overwrite the values in `self` with those in `other`
/// under the assumption that the zero offset in `other` corresponds to the offset `offset_other` in `self`.
///
......
......@@ -5,7 +5,6 @@ use super::*;
impl AbstractObjectList {
/// Get a reference to the object corresponding to the given ID.
#[cfg(test)]
pub fn get_object(&self, id: &AbstractIdentifier) -> Option<&AbstractObject> {
self.objects.get(id)
}
......@@ -64,6 +63,11 @@ impl AbstractObjectList {
self.objects.iter()
}
/// Get an iterator of mutable references over the abstract objects in `self`.
pub fn iter_objects_mut(&mut self) -> impl Iterator<Item = &mut AbstractObject> {
self.objects.values_mut()
}
/// Get the number of objects that are currently tracked.
#[cfg(test)]
pub fn get_num_objects(&self) -> usize {
......
......@@ -137,6 +137,26 @@ impl AbstractObjectList {
None => Err(anyhow!("Object ID not contained in object list.")),
}
}
/// Only retain those memory objects for which the provided predicate returns `true`.
/// All memory objects for which the predicate returns `False` are removed from `self`.
pub fn retain<F>(&mut self, f: F)
where
F: FnMut(&AbstractIdentifier, &mut AbstractObject) -> bool,
{
self.objects.retain(f)
}
/// Remove an object from the object list.
/// Returns the removed object if its ID was indeed contained in the object list.
pub fn remove(&mut self, id: &AbstractIdentifier) -> Option<AbstractObject> {
self.objects.remove(id)
}
/// Return `true` if the object list contains a memory object indexed by the given ID.
pub fn contains(&self, id: &AbstractIdentifier) -> bool {
self.objects.contains_key(id)
}
}
impl AbstractDomain for AbstractObjectList {
......
......@@ -78,7 +78,7 @@ impl State {
self.write_to_address(address, &self.eval(value), global_memory)
}
/// Evaluate the given load instruction and return the data read on success.
/// Evaluate the given address expression and return the data read from that address on success.
pub fn load_value(
&self,
address: &Expression,
......@@ -86,6 +86,17 @@ impl State {
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
let address = self.eval(address);
self.load_value_from_address(&address, size, global_memory)
}
/// Load the value at the given address from the state and return the data read on success.
/// If the address contains more than one possible pointer target the results are merged for all possible pointer targets.
pub fn load_value_from_address(
&self,
address: &Data,
size: ByteSize,
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
let mut result = if let Some(global_address) = address.get_absolute_value() {
if let Ok(address_bitvector) = global_address.try_to_bitvec() {
match global_memory.read(&address_bitvector, size) {
......@@ -109,7 +120,7 @@ impl State {
} else {
Data::new_empty(size)
};
result = result.merge(&self.memory.get_value(&address, size));
result = result.merge(&self.memory.get_value(address, size));
if let Ok(offset) = result.try_to_offset() {
if result.bytesize() == self.stack_id.bytesize()
......@@ -217,6 +228,81 @@ impl State {
}
}
/// Evaluate the value of the given abstract location on the current state.
/// If the actual value cannot be determined (e.g. if an intermediate pointer returns `Top`)
/// then a `Top` value is returned.
pub fn eval_abstract_location(
&self,
location: &AbstractLocation,
global_memory: &RuntimeMemoryImage,
) -> Data {
match location {
AbstractLocation::GlobalAddress { address, size } => {
assert_eq!(*size, self.stack_id.bytesize());
Data::from_target(
self.get_global_mem_id().clone(),
Bitvector::from_u64(*address)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
)
}
AbstractLocation::GlobalPointer(address, nested_location) => {
let pointer = Data::from_target(
self.get_global_mem_id().clone(),
Bitvector::from_u64(*address)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
self.eval_abstract_memory_location(nested_location, pointer, global_memory)
}
AbstractLocation::Register(var) => self.get_register(var),
AbstractLocation::Pointer(var, nested_location) => {
let pointer = self.get_register(var);
self.eval_abstract_memory_location(nested_location, pointer, global_memory)
}
}
}
/// Evaluate the value of the given abstract memory location on the current state
/// with the given `root_pointer` as the start point of the location description.
fn eval_abstract_memory_location(
&self,
location: &AbstractMemoryLocation,
root_pointer: Data,
global_memory: &RuntimeMemoryImage,
) -> Data {
match location {
AbstractMemoryLocation::Location { offset, size } => {
let pointer = root_pointer.add_offset(
&Bitvector::from_i64(*offset)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
self.load_value_from_address(&pointer, *size, global_memory)
.unwrap_or_else(|_| Data::new_top(*size))
}
AbstractMemoryLocation::Pointer { offset, target } => {
let pointer = root_pointer.add_offset(
&Bitvector::from_i64(*offset)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
match self.load_value_from_address(
&pointer,
self.stack_id.bytesize(),
global_memory,
) {
Ok(nested_root_pointer) => self.eval_abstract_memory_location(
target,
nested_root_pointer,
global_memory,
),
Err(_) => Data::new_top(location.bytesize()),
}
}
}
}
/// Check whether the given `def` could result in a memory access through a NULL pointer.
///
/// If no NULL pointer dereference is detected then `Ok(false)` is returned.
......
......@@ -2,6 +2,7 @@
use super::*;
use crate::analysis::pointer_inference::object::AbstractObject;
use crate::analysis::pointer_inference::POINTER_RECURSION_DEPTH_LIMIT;
impl State {
/// Search (recursively) through all memory objects referenced by the given IDs
......@@ -89,4 +90,390 @@ impl State {
}
Ok(())
}
/// Create an ID renaming map that maps IDs in `self` to the values representing them
/// after unifying and renaming non-parameter objects in `self` in preparation of returning to a caller.
pub fn get_id_to_unified_ids_replacement_map(
&self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
) -> BTreeMap<AbstractIdentifier, Data> {
let mut id_replacement_map = BTreeMap::new();
for (unified_id, value) in location_to_data_map.iter() {
for (old_id, offset) in value.get_relative_values() {
if old_id.get_tid() != self.get_fn_tid() || !old_id.get_path_hints().is_empty() {
let mut pointer_to_unified_id =
Data::from_target(unified_id.clone(), offset.un_op(UnOpType::Int2Comp));
pointer_to_unified_id.set_contains_top_flag();
id_replacement_map.insert(old_id.clone(), pointer_to_unified_id);
}
}
}
for value in self.register.values() {
for id in value.get_relative_values().keys() {
if id.get_tid() == self.get_fn_tid() && id.get_path_hints().is_empty() {
// This is a parameter ID
id_replacement_map.insert(
id.clone(),
Data::from_target(id.clone(), Bitvector::zero(id.bytesize().into()).into()),
);
}
}
}
for object_id in self.memory.get_all_object_ids() {
for id in self.memory.get_referenced_ids_overapproximation(&object_id) {
if id.get_tid() == self.get_fn_tid() && id.get_path_hints().is_empty() {
// This is a parameter ID
id_replacement_map.insert(
id.clone(),
Data::from_target(id.clone(), Bitvector::zero(id.bytesize().into()).into()),
);
}
}
}
id_replacement_map
}
/// Replace all IDs pointing to non-parameter objects.
/// - IDs contained in the values of the location to data map are replaced by the corresponding key (with adjusted offset).
/// But the Top flag is also set, because the pointers may point to other objects.
/// - All other non-parameter IDs are replaced with Top.
pub fn replace_ids_to_non_parameter_objects(
&mut self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
) {
let id_replacement_map = self.get_id_to_unified_ids_replacement_map(location_to_data_map);
// Now use the replacement map to replace IDs
for value in self.register.values_mut() {
value.replace_all_ids(&id_replacement_map);
}
for object in self.memory.iter_objects_mut() {
object.replace_ids(&id_replacement_map);
}
// Clean up registers left as Top after the replacement
self.register.retain(|_var, value| !value.is_top());
}
/// Explicitly insert pointers to unified objects at the locations specified by their abstract location.
///
/// Note that these are the only locations where we (by definition) know
/// that the pointer is unique, i.e. we do not have to set a Top flag.
/// However, we still have to add targets to parameter objects, absolute values or the `Top` flag
/// to the pointer if the original pointer value contained them,
/// because these targets were not merged to the unified object.
pub fn insert_pointers_to_unified_objects(
&mut self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
call_tid: &Tid,
) {
for (unified_id, old_value) in location_to_data_map.iter() {
// Compute the pointer (which may also contain pointers to parameter objects and absolute values).
let mut pointer_to_unified_object = Data::from_target(
unified_id.clone(),
Bitvector::zero(unified_id.bytesize().into()).into(),
);
for (old_id, old_offset) in old_value.get_relative_values() {
if old_id.get_tid() == self.get_fn_tid() && old_id.get_path_hints().is_empty() {
pointer_to_unified_object = pointer_to_unified_object
.merge(&Data::from_target(old_id.clone(), old_offset.clone()));
}
}
pointer_to_unified_object.set_absolute_value(old_value.get_absolute_value().cloned());
if old_value.contains_top() {
pointer_to_unified_object.set_contains_top_flag()
}
// Insert the pointer at the corresponding abstract location
match unified_id.get_location() {
AbstractLocation::Register(var) => {
self.set_register(var, pointer_to_unified_object)
}
unified_location => {
let (parent_location, offset_in_parent_object) = unified_location
.get_parent_location(self.stack_id.bytesize())
.unwrap();
let parent_tid = if unified_id.get_tid() == call_tid {
call_tid.clone()
} else {
// We know that the parent is a parameter object, since we cannot track nested pointers in parameter objects.
self.stack_id.get_tid().clone()
};
let parent_object = self
.memory
.get_object_mut(&AbstractIdentifier::new(parent_tid, parent_location))
.unwrap();
parent_object
.set_value(
pointer_to_unified_object,
&Bitvector::from_i64(offset_in_parent_object)
.into_resize_signed(self.stack_id.bytesize())
.into(),
)
.unwrap();
}
}
}
}
/// Merge the target objects that are non-parameter objects for the given location to data mapping.
/// Return the results as a location to memory object map.
///
/// This function is a step in the process of unifying callee-originating memory objects on a return instruction.
/// The memory objects are also marked as unique, because they will represent a unique object in the caller.
pub fn generate_target_objects_for_new_locations(
&self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
) -> BTreeMap<AbstractIdentifier, AbstractObject> {
let mut location_to_object_map: BTreeMap<AbstractIdentifier, AbstractObject> =
BTreeMap::new();
for (location_id, value) in location_to_data_map {
let mut new_object: Option<AbstractObject> = None;
'target_loop: for (target_id, target_offset) in value.get_relative_values() {
if (target_id.get_tid() == self.get_fn_tid()
&& target_id.get_path_hints().is_empty())
|| !self.memory.contains(target_id)
{
continue 'target_loop;
}
let target_offset = match target_offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => {
match &mut new_object {
Some(object) => object.assume_arbitrary_writes(&BTreeSet::new()),
None => {
new_object =
Some(AbstractObject::new(None, self.stack_id.bytesize()))
}
}
continue 'target_loop;
}
};
let target_object = self.memory.get_object(target_id).unwrap();
let mut target_object = target_object.clone();
target_object
.add_offset_to_all_indices(&Bitvector::from_i64(-target_offset).into());
match &mut new_object {
None => new_object = Some(target_object),
Some(object) => *object = object.merge(&target_object),
}
}
let mut new_object =
new_object.unwrap_or_else(|| AbstractObject::new(None, self.stack_id.bytesize()));
new_object.mark_as_unique();
new_object.set_object_type(None);
location_to_object_map.insert(location_id.clone(), new_object);
}
location_to_object_map
}
/// Filter out those locations from the location to pointer data map
/// whose non-parameter object targets intersect with any of the other locations.
///
/// Note that this does not filter out locations whose targets contain the `Top` flag,
/// despite the fact that these locations theoretically may point to the same non-parameter object.
/// I.e. we trade soundness in the general case for exactness in the common case here.
pub fn filter_location_to_pointer_data_map(
&self,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
) {
let mut visited_targets = HashSet::new();
let mut non_unique_targets = HashSet::new();
for value in location_to_data_map.values() {
for id in value.get_relative_values().keys() {
if id.get_tid() != self.get_fn_tid() && self.memory.contains(id) {
if visited_targets.contains(id) {
non_unique_targets.insert(id.clone());
} else {
visited_targets.insert(id.clone());
}
}
}
}
let mut filtered_out_ids = HashSet::new();
location_to_data_map.retain(|location_id, value| {
for id in value.get_relative_values().keys() {
if non_unique_targets.contains(id) {
filtered_out_ids.insert(location_id.clone());
return false;
}
}
true
});
// Also filter out those locations whose parent locations were filtered out.
location_to_data_map.retain(|location, _| {
if location.get_tid().has_id_suffix("_param") {
return true;
}
for parent in location
.get_location()
.get_all_parent_locations(self.stack_id.bytesize())
{
let parent_id = AbstractIdentifier::new(location.get_tid().clone(), parent);
if filtered_out_ids.contains(&parent_id) {
return false;
}
}
true
});
}
/// Add abstract locations based on register values to the location to pointer data map.
/// The TID for the corresponding abstract IDs is the given `call_tid`.
///
/// This function assumes that `self` has already been minimized
/// and thus all non-parameter register values have been removed from the state.
fn add_register_based_root_locations_to_location_to_pointer_data_map(
&self,
call_tid: &Tid,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
) {
for (var, value) in self.register.iter() {
if !var.is_temp && self.contains_non_param_pointer(value) {
let location = AbstractLocation::from_var(var).unwrap();
let id = AbstractIdentifier::new(call_tid.clone(), location);
location_to_data_map.insert(id.clone(), value.clone());
}
}
}
/// Add abstract locations based on parameter objects to the location to pointer data map.
/// The TID for the corresponding abstract IDs is the given `call_tid` with a `_param` suffix.
///
/// The TID suffix is necessary to prevent naming collisions with locations based on return registers.
///
/// This function assumes that the stack memory object of `self` has already been deleted by a call to
/// [`State::minimize_before_return_instruction`](crate::analysis::pointer_inference::State::minimize_before_return_instruction).
fn add_param_based_root_locations_to_location_to_pointer_data_map(
&self,
call_tid: &Tid,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
) {
for (object_id, object) in self.memory.iter() {
if object_id.get_tid() == self.get_fn_tid()
&& object_id.get_path_hints().is_empty()
&& object_id.get_location().recursion_depth() < POINTER_RECURSION_DEPTH_LIMIT
{
for (index, value) in object.get_mem_region().iter() {
if self.contains_non_param_pointer(value) {
let location = object_id
.get_location()
.clone()
.dereferenced(value.bytesize(), self.stack_id.bytesize())
.with_offset_addendum(*index);
location_to_data_map.insert(
AbstractIdentifier::new(
call_tid.clone().with_id_suffix("_param"),
location,
),
value.clone(),
);
}
}
}
}
}
/// Derive nested locations from the given list of locations to derive
/// and add them to the location to pointer data map.
fn add_derived_locations_to_location_to_pointer_data_map(
&self,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
mut locations_to_derive: BTreeMap<AbstractIdentifier, Data>,
) {
while let Some((location_id, location_data)) = locations_to_derive.pop_first() {
if location_id.get_location().recursion_depth() >= POINTER_RECURSION_DEPTH_LIMIT {
continue;
}
'data_target_loop: for (object_id, object_offset) in location_data.get_relative_values()
{
if object_id.get_tid() == self.get_fn_tid() && object_id.get_path_hints().is_empty()
{
// Ignore parameter objects
continue 'data_target_loop;
}
let object_offset = match object_offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => continue 'data_target_loop,
};
let mem_object = match self.memory.get_object(object_id) {
Some(object) => object,
None => continue 'data_target_loop,
};
for (elem_offset, elem_data) in mem_object.get_mem_region().iter() {
if self.contains_non_param_pointer(elem_data) {
// We want to create a new abstract location for this element.
// But the same abstract location may already exist, so we may have to merge values instead.
let new_location_offset = *elem_offset - object_offset; // TODO: Check correctness of this offset!
let new_location = location_id
.get_location()
.clone()
.dereferenced(elem_data.bytesize(), self.stack_id.bytesize())
.with_offset_addendum(new_location_offset);
let new_location_id =
AbstractIdentifier::new(location_id.get_tid().clone(), new_location);
let new_location_data = elem_data.clone();
location_to_data_map
.entry(new_location_id.clone())
.and_modify(|loc_data| *loc_data = loc_data.merge(&new_location_data))
.or_insert(new_location_data.clone());
locations_to_derive
.entry(new_location_id.clone())
.and_modify(|loc_data| *loc_data = loc_data.merge(&new_location_data))
.or_insert(new_location_data);
}
}
}
}
}
/// Generate a map from abstract locations pointing to non-parameter memory objects
/// to the data represented by the abstract location in the current state.
///
/// The abstract locations get different TIDs depending on the root of the location:
/// - If the root is a return register, then the TID is given by the provided `call_tid`.
/// - If the root is a parameter memory object, then the TID is given by appending the suffix `_param` to the `call_tid`.
/// Since parameter and return register can overlap, the abstract IDs would overlap
/// if one would use the same TID in both cases.
///
/// For return register based location this function also generates nested abstract locations.
///
/// This function assumes that
/// [`State::minimize_before_return_instruction`](crate::analysis::pointer_inference::State::minimize_before_return_instruction)
/// has been called on `self` beforehand.
pub fn map_abstract_locations_to_pointer_data(
&self,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let mut location_to_data_map = BTreeMap::new();
self.add_register_based_root_locations_to_location_to_pointer_data_map(
call_tid,
&mut location_to_data_map,
);
let locations_to_derive = location_to_data_map.clone();
self.add_param_based_root_locations_to_location_to_pointer_data_map(
call_tid,
&mut location_to_data_map,
);
// Add derived locations based on return register locations.
// FIXME: We cannot add derived locations based on parameter objects,
// because the location and ID of their parent objects would be ambiguous
// between parameter objects and other derived locations.
self.add_derived_locations_to_location_to_pointer_data_map(
&mut location_to_data_map,
locations_to_derive,
);
location_to_data_map
}
/// Returns `true` if the value contains at least one reference to a non-parameter
/// (and non-stack) memory object tracked by the current state.
fn contains_non_param_pointer(&self, value: &Data) -> bool {
for id in value.referenced_ids() {
if (id.get_tid() != self.get_fn_tid() || !id.get_path_hints().is_empty())
&& self.memory.contains(id)
{
return true;
}
}
false
}
}
use super::object::AbstractObject;
use super::object_list::AbstractObjectList;
use super::Data;
use crate::abstract_domain::*;
use crate::analysis::function_signature::AccessPattern;
use crate::analysis::function_signature::FunctionSignature;
use crate::intermediate_representation::*;
use crate::prelude::*;
use std::collections::HashSet;
use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc;
......@@ -67,38 +70,120 @@ impl State {
stack_register: &Variable,
function_tid: Tid,
) -> State {
let global_addresses = fn_sig.global_parameters.keys().cloned().collect();
let global_addresses = fn_sig
.global_parameters
.keys()
.map(|location| match location {
AbstractLocation::GlobalAddress { address, .. }
| AbstractLocation::GlobalPointer(address, _) => *address,
_ => panic!("Unexpected non-global parameter"),
})
.collect();
let mock_global_memory = RuntimeMemoryImage::empty(true);
let mut state = State::new(stack_register, function_tid.clone(), global_addresses);
// Set parameter values and create parameter memory objects.
for (arg, access_pattern) in &fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(&function_tid, arg);
match arg {
Arg::Register {
expr: Expression::Var(var),
..
} => state.set_register(
var,
Data::from_target(param_id.clone(), Bitvector::zero(var.size.into()).into()),
),
Arg::Register { .. } => continue, // Parameters in floating point registers are currently ignored.
Arg::Stack { address, size, .. } => {
let param_data =
Data::from_target(param_id.clone(), Bitvector::zero((*size).into()).into());
state
.write_to_address(address, &param_data, &mock_global_memory)
.unwrap();
}
for params in sort_params_by_recursion_depth(&fn_sig.parameters).values() {
for (param_location, access_pattern) in params {
state.add_param(param_location, access_pattern, &mock_global_memory);
}
if access_pattern.is_dereferenced() {
state
.memory
.add_abstract_object(param_id, stack_register.size, None);
}
for (recursion_depth, params) in sort_params_by_recursion_depth(&fn_sig.global_parameters) {
if recursion_depth > 0 {
for (param_location, access_pattern) in params {
state.add_param(param_location, access_pattern, &mock_global_memory);
}
}
}
state
}
/// Add the given parameter to the function start state represented by `self`:
/// For the given parameter location, add a parameter object if it was dereferenced (according to the access pattern)
/// and write the pointer to the parameter object to the corresponding existing memory object of `self`.
///
/// This function assumes that the parent memory object of `param` already exists if `param` is a nested parameter.
fn add_param(
&mut self,
param: &AbstractLocation,
access_pattern: &AccessPattern,
global_memory: &RuntimeMemoryImage,
) {
let param_id = AbstractIdentifier::new(self.stack_id.get_tid().clone(), param.clone());
if !matches!(param, AbstractLocation::GlobalAddress { .. })
&& access_pattern.is_dereferenced()
{
self.memory
.add_abstract_object(param_id.clone(), self.stack_id.bytesize(), None);
}
match param {
AbstractLocation::Register(var) => {
self.set_register(
var,
Data::from_target(param_id, Bitvector::zero(param.bytesize().into()).into()),
);
}
AbstractLocation::Pointer(_, _) => {
let (parent_location, offset) =
param.get_parent_location(self.stack_id.bytesize()).unwrap();
let parent_id =
AbstractIdentifier::new(self.stack_id.get_tid().clone(), parent_location);
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_i64(offset)
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
&Data::from_target(
param_id.clone(),
Bitvector::zero(param_id.bytesize().into()).into(),
),
global_memory,
)
.unwrap();
}
AbstractLocation::GlobalAddress { .. } => (),
AbstractLocation::GlobalPointer(_, _) => {
let (parent_location, offset) =
param.get_parent_location(self.stack_id.bytesize()).unwrap();
if let AbstractLocation::GlobalAddress { address, size: _ } = parent_location {
let parent_id = self.get_global_mem_id();
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_u64(address + offset as u64)
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
&Data::from_target(
param_id.clone(),
Bitvector::zero(param_id.bytesize().into()).into(),
),
global_memory,
)
.unwrap();
} else {
let parent_id =
AbstractIdentifier::new(self.stack_id.get_tid().clone(), parent_location);
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_i64(offset)
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
&Data::from_target(
param_id.clone(),
Bitvector::zero(param_id.bytesize().into()).into(),
),
global_memory,
)
.unwrap();
}
}
}
}
/// Set the MIPS link register `t9` to the address of the callee TID.
///
/// According to the System V ABI for MIPS the caller has to save the callee address in register `t9`
......@@ -124,6 +209,89 @@ impl State {
Ok(())
}
/// Remove all objects and registers from the state whose contents will not be used after returning to a caller.
///
/// All remaining memory objects after the minimization are reachable in the caller
/// either via a parameter object that may have been mutated in the call
/// or via a return register.
pub fn minimize_before_return_instruction(
&mut self,
fn_sig: &FunctionSignature,
cconv: &CallingConvention,
) {
self.clear_non_return_register(cconv);
self.remove_immutable_parameter_objects(fn_sig);
self.memory.remove(&self.stack_id);
self.remove_unreferenced_objects();
}
/// Remove all parameter objects (including global parameter objects) that are not marked as mutably accessed.
/// Used to minimize state before a return instruction.
fn remove_immutable_parameter_objects(&mut self, fn_sig: &FunctionSignature) {
let current_fn_tid = self.get_fn_tid().clone();
self.memory.retain(|object_id, _object| {
if *object_id.get_tid() == current_fn_tid && object_id.get_path_hints().is_empty() {
if let Some(access_pattern) = fn_sig.parameters.get(object_id.get_location()) {
if !access_pattern.is_mutably_dereferenced() {
return false;
}
}
if let Some(access_pattern) = fn_sig.global_parameters.get(object_id.get_location())
{
if !access_pattern.is_mutably_dereferenced() {
return false;
}
}
}
true
});
}
/// Clear all non-return registers from the state, including all virtual registers.
/// This function is used to minimize the state before a return instruction.
fn clear_non_return_register(&mut self, cconv: &CallingConvention) {
let return_register: HashSet<Variable> = cconv
.get_all_return_register()
.into_iter()
.cloned()
.collect();
self.register
.retain(|var, _value| return_register.contains(var));
}
/// Try to determine unique pointer locations for non-parameter memory objects.
/// When successful, merge all referenced non-parameter objects for that location
/// and replace the pointer with a pointer to the merged object.
///
/// The merged objects get new abstract IDs generated from the call TID and their abstract location in the state.
///
/// This function leaves pointers to parameter objects as is,
/// while pointers to non-parameter objects, that were not merged (e.g. due to pointers being not unique) are replaced with `Top`.
pub fn merge_mem_objects_with_unique_abstract_location(&mut self, call_tid: &Tid) {
let mut location_to_data_map = self.map_abstract_locations_to_pointer_data(call_tid);
self.filter_location_to_pointer_data_map(&mut location_to_data_map);
let location_to_object_map =
self.generate_target_objects_for_new_locations(&location_to_data_map);
self.replace_unified_mem_objects(location_to_object_map);
self.replace_ids_to_non_parameter_objects(&location_to_data_map);
self.insert_pointers_to_unified_objects(&location_to_data_map, call_tid);
}
/// Remove all memory objects corresponding to non-parameter IDs.
/// Afterwards, add the memory objects in the location to object map to the state.
fn replace_unified_mem_objects(
&mut self,
location_to_object_map: BTreeMap<AbstractIdentifier, AbstractObject>,
) {
let current_fn_tid = self.get_fn_tid().clone();
self.memory.retain(|object_id, _| {
*object_id.get_tid() == current_fn_tid && object_id.get_path_hints().is_empty()
});
for (id, object) in location_to_object_map {
self.memory.insert(id, object);
}
}
/// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names.
......@@ -260,5 +428,21 @@ impl State {
}
}
/// Sort parameters by recursion depth.
/// Helper function when one has to iterate over parameters in order of their recursion depth.
fn sort_params_by_recursion_depth(
params: &BTreeMap<AbstractLocation, AccessPattern>,
) -> BTreeMap<u64, BTreeMap<&AbstractLocation, &AccessPattern>> {
let mut sorted_params = BTreeMap::new();
for (param, access_pattern) in params {
let recursion_depth = param.recursion_depth();
let bucket = sorted_params
.entry(recursion_depth)
.or_insert(BTreeMap::new());
bucket.insert(param, access_pattern);
}
sorted_params
}
#[cfg(test)]
mod tests;
use super::*;
#[test]
fn handle_store() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP");
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(0))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 32:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-32))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 + -8:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-40))
);
state
.handle_store(&expr!("RSP:8 + 8:8"), &expr!("1:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 - 8:8"), &expr!("2:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 + -16:8"), &expr!("3:8"), &global_memory)
.unwrap();
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 4:8"));
assert_eq!(
state
.load_value(&expr!("RSP:8 + 12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(1).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 - 4:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(2).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 + -12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(3).into()
);
}
#[test]
fn global_mem_access() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(
&variable!("RSP:8"),
Tid::new("func_tid"),
BTreeSet::from([0x2000]),
);
// global read-only address
let address_expr = expr!("0x1000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("0xb3b2b1b0:4").into() // note that we read in little-endian byte order
);
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
// global writeable address
let address_expr = expr!("0x2000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
DataDomain::new_top(ByteSize::new(4))
);
assert!(state
.write_to_address(&address_expr, &bitvec!("21:4").into(), &global_memory)
.is_ok());
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("21:4").into()
);
// invalid global address
let address_expr = expr!("0x3456:8");
assert!(state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.is_err());
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
}
#[test]
fn test_eval_abstract_location() {
let mut state = State::new(&variable!("RSP:8"), Tid::new("fn_tid"), BTreeSet::new());
let global_memory = RuntimeMemoryImage::mock();
let object_id = AbstractIdentifier::mock("fn_tid", "RSI", 8);
state
.memory
.add_abstract_object(object_id.clone(), ByteSize::new(8), None);
state
.memory
.get_object_mut(&state.stack_id)
.unwrap()
.set_value(
Data::from_target(object_id.clone(), bitvec!("0x0:8").into()),
&bitvec!("0x-20:8").into(),
)
.unwrap();
state
.memory
.get_object_mut(&object_id)
.unwrap()
.set_value(bitvec!("0x42:8").into(), &bitvec!("0x10:8").into())
.unwrap();
let location = AbstractLocation::mock("RSP:8", &[-32], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(
value,
Data::from_target(object_id.clone(), bitvec!("0x0:8").into())
);
let location = AbstractLocation::mock("RSP:8", &[-32, 16], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(value, bitvec!("0x42:8").into());
// Also test evaluation of a global address
state
.memory
.get_object_mut(&state.get_global_mem_id().clone())
.unwrap()
.set_value(bitvec!("0x43:8").into(), &bitvec!("0x2000:8").into())
.unwrap();
let location = AbstractLocation::mock_global(0x2000, &[0], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(value, bitvec!("0x43:8").into());
}
use super::*;
/// Mock an ARM32 function start state with a function signature that has one mutably dereferenced parameter in r0
/// and mutably dereferenced global parameter at address 0x2000.
/// The function Tid of the state is named `callee`.
fn mock_arm32_fn_start_state() -> (State, FunctionSignature) {
let full_access = AccessPattern::new_unknown_access();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([(AbstractLocation::mock("r0:4", &[], 4), full_access)]),
global_parameters: BTreeMap::from([(
AbstractLocation::mock_global(0x2000, &[], 4),
full_access,
)]),
};
let state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("callee"));
(state, fn_sig)
}
#[test]
fn test_map_abstract_locations_to_pointer_data() {
let call_tid = Tid::new("call");
let global_memory = RuntimeMemoryImage::mock();
let (mut state, _) = mock_arm32_fn_start_state();
let param_id =
AbstractIdentifier::new(Tid::new("callee"), AbstractLocation::mock("r0:4", &[], 4));
let param_pointer = Data::from_target(param_id.clone(), bitvec!("0x2:4").into());
let global_param_pointer = Data::from_target(
state.get_global_mem_id().clone(),
bitvec!("0x2000:4").into(),
);
let callee_orig_id = AbstractIdentifier::new(
Tid::new("inside_callee"),
AbstractLocation::mock("r0:4", &[], 4),
);
let callee_orig_pointer = Data::from_target(callee_orig_id.clone(), bitvec!("0x3:4").into());
let nested_callee_orig_id = AbstractIdentifier::new(
Tid::new("inside_callee"),
AbstractLocation::mock("r0:4", &[0x10], 4),
);
let nested_callee_orig_pointer =
Data::from_target(nested_callee_orig_id.clone(), bitvec!("0x0:4").into());
state
.memory
.add_abstract_object(callee_orig_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(nested_callee_orig_id.clone(), ByteSize::new(4), None);
state
.store_value(&param_pointer, &nested_callee_orig_pointer, &global_memory)
.unwrap();
state
.store_value(
&global_param_pointer,
&nested_callee_orig_pointer,
&global_memory,
)
.unwrap();
state.set_register(&variable!("r0:4"), callee_orig_pointer.clone());
state
.store_value(
&callee_orig_pointer,
&nested_callee_orig_pointer,
&global_memory,
)
.unwrap();
let location_to_data_map = state.map_abstract_locations_to_pointer_data(&call_tid);
let expected_map = BTreeMap::from([
(
AbstractIdentifier::new(
Tid::new("call_param"),
AbstractLocation::mock("r0:4", &[2], 4),
),
nested_callee_orig_pointer.clone(),
),
(
AbstractIdentifier::new(
Tid::new("call_param"),
AbstractLocation::mock_global(0x0, &[0x2000], 4),
),
nested_callee_orig_pointer.clone(),
),
(
AbstractIdentifier::new(Tid::new("call"), AbstractLocation::mock("r0:4", &[], 4)),
callee_orig_pointer.clone(),
),
(
AbstractIdentifier::new(Tid::new("call"), AbstractLocation::mock("r0:4", &[0], 4)),
nested_callee_orig_pointer.clone(),
),
]);
assert_eq!(location_to_data_map, expected_map);
}
#[test]
fn test_filter_location_to_data_map() {
let (mut state, _) = mock_arm32_fn_start_state();
state.memory.add_abstract_object(
AbstractIdentifier::mock("callee_orig", "r0", 4),
ByteSize::new(4),
None,
);
state.memory.add_abstract_object(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
ByteSize::new(4),
None,
);
state.memory.add_abstract_object(
AbstractIdentifier::mock("callee_orig_3", "r0", 4),
ByteSize::new(4),
None,
);
let mut loc_to_data_map = BTreeMap::from([
(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_3", "r0", 4),
bitvec!("0x0:4").into(),
),
])),
),
(
AbstractIdentifier::mock("call", "r1", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
bitvec!("0x0:4").into(),
),
])),
),
(
AbstractIdentifier::mock("call", "r2", 4),
Data::mock_from_target_map(BTreeMap::from([(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
bitvec!("0x0:4").into(),
)])),
),
]);
state.filter_location_to_pointer_data_map(&mut loc_to_data_map);
let expected_map = BTreeMap::from([(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_3", "r0", 4),
bitvec!("0x0:4").into(),
),
])),
)]);
assert_eq!(loc_to_data_map, expected_map);
}
#[test]
fn test_generate_target_objects_for_new_locations() {
let global_memory = RuntimeMemoryImage::mock();
let (mut state, _) = mock_arm32_fn_start_state();
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let callee_orig_id = AbstractIdentifier::mock("callee_orig", "r0", 4);
let callee_orig_2_id = AbstractIdentifier::mock("callee_orig_2", "r0", 4);
state
.memory
.add_abstract_object(callee_orig_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(callee_orig_2_id.clone(), ByteSize::new(4), None);
state
.store_value(
&Data::from_target(param_id.clone(), bitvec!("0x0:4").into()),
&bitvec!("0x42:4").into(),
&global_memory,
)
.unwrap();
state
.store_value(
&Data::from_target(callee_orig_id.clone(), bitvec!("0x4:4").into()),
&bitvec!("0x24:4").into(),
&global_memory,
)
.unwrap();
let loc_to_data_map = BTreeMap::from([(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(param_id.clone(), bitvec!("0x0:4").into()),
(callee_orig_id.clone(), bitvec!("0x0:4").into()),
(callee_orig_2_id.clone(), bitvec!("0x0:4").into()),
])),
)]);
let loc_to_obj_map = state.generate_target_objects_for_new_locations(&loc_to_data_map);
assert_eq!(loc_to_obj_map.len(), 1);
let object = &loc_to_obj_map[&AbstractIdentifier::mock("call", "r0", 4)];
assert_eq!(
object.get_value(bitvec!("0x0:4"), ByteSize::new(4)),
Data::new_top(ByteSize::new(4))
);
let mut merged_value = Data::new_top(ByteSize::new(4));
merged_value.set_absolute_value(Some(bitvec!("0x24:4").into()));
assert_eq!(
object.get_value(bitvec!("0x4:4"), ByteSize::new(4)),
merged_value
);
}
#[test]
fn test_get_id_to_unified_id_replacement_map() {
let cconv = CallingConvention::mock_arm32();
let (mut state, fn_sig) = mock_arm32_fn_start_state();
state.minimize_before_return_instruction(&fn_sig, &cconv);
let location_to_data_map = BTreeMap::from([(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x2:4").into(),
),
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
bitvec!("0x3:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
bitvec!("0x4:4").into(),
),
])),
)]);
let id_replacement_map = state.get_id_to_unified_ids_replacement_map(&location_to_data_map);
let merged_id = AbstractIdentifier::mock("call", "r0", 4);
let mut merged_pointer = Data::from_target(merged_id.clone(), bitvec!("0x-3:4").into());
merged_pointer.set_contains_top_flag();
let mut merged_pointer_2 = Data::from_target(merged_id.clone(), bitvec!("0x-4:4").into());
merged_pointer_2.set_contains_top_flag();
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let expected_map = BTreeMap::from([
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
merged_pointer,
),
(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
merged_pointer_2,
),
(
param_id.clone(),
Data::from_target(param_id, bitvec!("0x0:4").into()),
),
]);
assert_eq!(id_replacement_map, expected_map);
}
#[test]
fn test_insert_pointers_to_unified_objects() {
let call_tid = Tid::new("call");
let (mut state, _) = mock_arm32_fn_start_state();
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let old_callee_orig_id = AbstractIdentifier::mock("instr", "r0", 4);
let old_callee_orig_id_2 = AbstractIdentifier::mock("instr_2", "r0", 4);
let new_id = AbstractIdentifier::mock("call", "r0", 4);
let new_id_2 = AbstractIdentifier::mock_nested("call", "r0:4", &[0], 4);
state
.memory
.add_abstract_object(new_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(new_id_2.clone(), ByteSize::new(4), None);
let location_to_data_map = BTreeMap::from([
(
new_id.clone(),
Data::mock_from_target_map(BTreeMap::from([
(param_id.clone(), bitvec!("0x0:4").into()),
(old_callee_orig_id.clone(), bitvec!("0x0:4").into()),
])),
),
(
new_id_2.clone(),
Data::from_target(old_callee_orig_id_2.clone(), bitvec!("0x0:4").into()),
),
]);
state.insert_pointers_to_unified_objects(&location_to_data_map, &call_tid);
assert_eq!(
state.get_register(&variable!("r0:4")),
Data::mock_from_target_map(BTreeMap::from([
(param_id.clone(), bitvec!("0x0:4").into()),
(new_id.clone(), bitvec!("0x0:4").into()),
]))
);
assert_eq!(
state
.memory
.get_object(&new_id)
.unwrap()
.get_value(bitvec!("0x0:4"), ByteSize::new(4)),
Data::from_target(new_id_2.clone(), bitvec!("0x0:4").into())
);
}
......@@ -3,6 +3,8 @@ use super::*;
use crate::analysis::pointer_inference::object::*;
use crate::{bitvec, def, expr, variable};
mod access_handling;
mod id_manipulation;
mod specialized_expressions;
fn bv(value: i64) -> ValueDomain {
......@@ -81,58 +83,6 @@ fn state() {
}
#[test]
fn handle_store() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP");
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(0))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 32:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-32))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 + -8:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-40))
);
state
.handle_store(&expr!("RSP:8 + 8:8"), &expr!("1:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 - 8:8"), &expr!("2:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 + -16:8"), &expr!("3:8"), &global_memory)
.unwrap();
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 4:8"));
assert_eq!(
state
.load_value(&expr!("RSP:8 + 12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(1).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 - 4:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(2).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 + -12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(3).into()
);
}
#[test]
fn clear_parameters_on_the_stack_on_extern_calls() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
......@@ -226,61 +176,6 @@ fn reachable_ids_under_and_overapproximation() {
);
}
#[test]
fn global_mem_access() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(
&variable!("RSP:8"),
Tid::new("func_tid"),
BTreeSet::from([0x2000]),
);
// global read-only address
let address_expr = expr!("0x1000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("0xb3b2b1b0:4").into() // note that we read in little-endian byte order
);
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
// global writeable address
let address_expr = expr!("0x2000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
DataDomain::new_top(ByteSize::new(4))
);
assert!(state
.write_to_address(&address_expr, &bitvec!("21:4").into(), &global_memory)
.is_ok());
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("21:4").into()
);
// invalid global address
let address_expr = expr!("0x3456:8");
assert!(state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.is_err());
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
}
/// Test that value specialization does not introduce unintended widening hints.
/// This is a regression test for cases where pointer comparisons introduced two-sided bounds
/// (resulting in two-sided widenings) instead of one-sided bounds.
......@@ -350,14 +245,38 @@ fn test_check_def_for_null_dereferences() {
#[test]
fn from_fn_sig() {
let fn_sig = FunctionSignature::mock_x64();
let global_memory = RuntimeMemoryImage::mock();
let full_access = AccessPattern::new_unknown_access();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([
(AbstractLocation::mock("RSI:8", &[], 8), full_access),
(AbstractLocation::mock("RSI:8", &[8], 8), full_access),
(
AbstractLocation::mock("RDI:8", &[], 8),
AccessPattern::new().with_read_flag(),
),
]),
global_parameters: BTreeMap::from([
(AbstractLocation::mock_global(0x2000, &[], 8), full_access),
(AbstractLocation::mock_global(0x2000, &[0], 8), full_access),
]),
};
let state = State::from_fn_sig(&fn_sig, &variable!("RSP:8"), Tid::new("func"));
assert_eq!(state.memory.get_num_objects(), 3);
// The state should have 5 objects: The stack, the global memory space and 3 parameter objects.
assert_eq!(
*state.memory.get_object(&new_id("func", "RSI")).unwrap(),
AbstractObject::new(None, ByteSize::new(8))
state.memory.get_all_object_ids(),
BTreeSet::from([
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSP:8", &[], 8)),
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSI:8", &[], 8)),
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSI:8", &[8], 8)),
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock_global(0x0, &[], 8)),
AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock_global(0x2000, &[0], 8)
),
])
);
// Check that pointers have been correctly added to the state.
assert_eq!(
state.get_register(&variable!("RSP:8")),
Data::from_target(new_id("func", "RSP"), bv(0).into())
......@@ -370,6 +289,32 @@ fn from_fn_sig() {
state.get_register(&variable!("RSI:8")),
Data::from_target(new_id("func", "RSI"), bv(0).into())
);
assert_eq!(
state.eval_abstract_location(&AbstractLocation::mock("RSI:8", &[8], 8), &global_memory),
Data::from_target(
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSI:8", &[8], 8)),
bitvec!("0x0:8").into()
)
);
assert_eq!(
state
.load_value_from_address(
&Data::from_target(
state.get_global_mem_id().clone(),
bitvec!("0x2000:8").into()
),
ByteSize::new(8),
&global_memory
)
.unwrap(),
Data::from_target(
AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock_global(0x2000, &[0], 8)
),
bitvec!("0x0:8").into()
)
);
}
#[test]
......@@ -411,3 +356,143 @@ fn add_param_object_from_callee() {
assert_eq!(value.get_absolute_value().unwrap(), &bv(2).into());
assert!(value.contains_top());
}
#[test]
fn test_minimize_before_return_instruction() {
let cconv = CallingConvention::mock_arm32();
let full_access = AccessPattern::new_unknown_access();
let deref_access = AccessPattern::new().with_dereference_flag();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([
(AbstractLocation::mock("r0:4", &[], 4), full_access),
(AbstractLocation::mock("r0:4", &[0], 4), deref_access),
(AbstractLocation::mock("r0:4", &[0, 0], 4), full_access),
]),
global_parameters: BTreeMap::from([]),
};
let mut state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("func"));
state.memory.add_abstract_object(
AbstractIdentifier::mock("instr", "r0", 4),
ByteSize::new(4),
None,
);
state.memory.add_abstract_object(
AbstractIdentifier::mock("instr", "r1", 4),
ByteSize::new(4),
None,
);
state.set_register(&variable!("r8:4"), bitvec!("0x42:4").into());
state.set_register(&variable!("r0:4"), bitvec!("0x42:4").into());
state.set_register(
&variable!("r3:4"),
Data::from_target(
AbstractIdentifier::mock("instr", "r0", 4),
bitvec!("0x0:4").into(),
),
);
state.minimize_before_return_instruction(&fn_sig, &cconv);
// non-return registers are cleared, but return registers remain
assert!(state.get_register(&variable!("r8:4")).is_top());
assert!(!state.get_register(&variable!("r3:4")).is_top());
// immutable parameter objects are removed, but mutable parameter objects remain (even if no pointer to them remains)
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock("r0:4", &[], 4)
))
.is_some());
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock("r0:4", &[0], 4)
))
.is_none());
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock("r0:4", &[0, 0], 4)
))
.is_some());
// The stack is removed
assert!(state.memory.get_object(&state.stack_id).is_none());
// Unreferenced callee-originating objects are removed, but referenced ones remain
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("instr"),
AbstractLocation::mock("r0:4", &[], 4)
))
.is_some());
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("instr"),
AbstractLocation::mock("r1:4", &[], 4)
))
.is_none());
}
#[test]
fn test_merge_mem_objects_with_unique_abstract_location() {
let call_tid = Tid::new("call");
let global_memory = RuntimeMemoryImage::mock();
let cconv = CallingConvention::mock_arm32();
let full_access = AccessPattern::new_unknown_access();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([(AbstractLocation::mock("r0:4", &[], 4), full_access)]),
global_parameters: BTreeMap::from([(
AbstractLocation::mock_global(0x2000, &[], 4),
full_access,
)]),
};
let mut state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("callee"));
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let old_callee_orig_id = AbstractIdentifier::mock("instr", "r0", 4);
let old_callee_orig_id_2 = AbstractIdentifier::mock("instr_2", "r0", 4);
let new_id = AbstractIdentifier::mock_nested("call_param", "r0:4", &[0], 4);
state
.memory
.add_abstract_object(old_callee_orig_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(old_callee_orig_id_2.clone(), ByteSize::new(4), None);
// The pointer locations to callee_orig_id_2 will not be unique and thus removed from the state.
state.set_register(
&variable!("r1:4"),
Data::from_target(old_callee_orig_id_2.clone(), bitvec!("0x0:4").into()),
);
state.set_register(
&variable!("r2:4"),
Data::from_target(old_callee_orig_id_2.clone(), bitvec!("0x0:4").into()),
);
// This register should be cleared before computing return objects.
state.set_register(
&variable!("r8:4"),
Data::from_target(old_callee_orig_id.clone(), bitvec!("0x0:4").into()),
);
state
.store_value(
&Data::from_target(param_id.clone(), bitvec!("0x0:4").into()),
&Data::from_target(old_callee_orig_id, bitvec!("0x0:4").into()),
&global_memory,
)
.unwrap();
state.minimize_before_return_instruction(&fn_sig, &cconv);
state.merge_mem_objects_with_unique_abstract_location(&call_tid);
let mut expected_state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("callee"));
expected_state.minimize_before_return_instruction(&fn_sig, &cconv);
expected_state
.memory
.add_abstract_object(new_id.clone(), ByteSize::new(4), None);
expected_state
.store_value(
&Data::from_target(param_id.clone(), bitvec!("0x0:4").into()),
&Data::from_target(new_id, bitvec!("0x0:4").into()),
&global_memory,
)
.unwrap();
assert_eq!(state, expected_state);
}
use super::*;
use crate::analysis::vsa_results::VsaResult;
use crate::{abstract_domain::AbstractLocation, analysis::vsa_results::VsaResult};
/// Implementation of the [`VsaResult`] trait for providing other analyses with an easy-to-use interface
/// to use the value set and points-to analysis results of the pointer inference.
......@@ -30,4 +30,15 @@ impl<'a> VsaResult for PointerInference<'a> {
.eval_parameter_arg(parameter, &context.project.runtime_memory_image)
.ok()
}
/// Evaluate the value of the given parameter at the given jump instruction.
fn eval_parameter_location_at_call(
&self,
jmp_tid: &Tid,
parameter: &AbstractLocation,
) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
let context = self.computation.get_context().get_context();
Some(state.eval_abstract_location(parameter, &context.project.runtime_memory_image))
}
}
......@@ -44,7 +44,7 @@ pub struct Context<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_start_node_map: HashMap<(Tid, Tid), NodeIndex>,
/// A set containing a given [`Def`](crate::intermediate_representation::Def) as the first `Def` of the block.
/// A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
pub block_first_def_set: HashSet<(Tid, Tid)>,
......
//! This module provides the [`VsaResult`] trait
//! which defines an interface for the results of analyses similar to a value set analysis.
use crate::abstract_domain::AbstractLocation;
use crate::intermediate_representation::{Arg, Expression};
use crate::prelude::*;
......@@ -28,6 +29,13 @@ pub trait VsaResult {
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, param: &Arg) -> Option<Self::ValueDomain>;
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_location_at_call(
&self,
jmp_tid: &Tid,
param: &AbstractLocation,
) -> Option<Self::ValueDomain>;
/// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Self::ValueDomain>;
}
......@@ -107,7 +107,7 @@ impl<'a> Context<'a> {
.function_signatures
.get(id.get_tid())
.unwrap()
.get_stack_params_total_size();
.get_stack_params_total_size(&self.project.stack_pointer_register);
replace_if_smaller_bound(
&mut upper_bound,
BoundsMetadata::from_source(
......@@ -135,6 +135,8 @@ impl<'a> Context<'a> {
object_id: &AbstractIdentifier,
current_stack_frame_id: &AbstractIdentifier,
) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
// FIXME: The malloc-tid-to-object-size-map check does not work anymore,
// because we do not use path hints in the PointerInference anymore.
if self
.malloc_tid_to_object_size_map
.contains_key(object_id.get_tid())
......@@ -153,7 +155,7 @@ impl<'a> Context<'a> {
.function_signatures
.get(object_id.get_tid())
.unwrap()
.get_stack_params_total_size();
.get_stack_params_total_size(&self.project.stack_pointer_register);
(None, Some(BoundsMetadata::new(stack_frame_upper_bound)))
} else if object_id.get_tid() == current_stack_frame_id.get_tid()
&& object_id.get_path_hints().is_empty()
......
......@@ -89,6 +89,8 @@ impl<'a> Context<'a> {
/// then the absolute value is used and unknown origins of the size value are ignored.
/// If more than one possible absolute value for the size is found then the minimum value for the size is returned.
pub fn compute_size_of_heap_object(&self, object_id: &AbstractIdentifier) -> BitvectorDomain {
// FIXME: We use path hints, which are not longer provided by the PointerInference, to substitute some values.
// We either have to change that or make sure that we provide the path hints ourselves.
if let Some(object_size) = self.malloc_tid_to_object_size_map.get(object_id.get_tid()) {
let fn_tid_at_malloc_call = self.call_to_caller_fn_map[object_id.get_tid()].clone();
let object_size = self.recursively_substitute_param_values_context_sensitive(
......
......@@ -192,9 +192,10 @@ fn add_param_replacements_for_call(
.get(callee_tid)
{
for param_arg in fn_sig.parameters.keys() {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
if let Some(param_value) =
vsa_results.eval_parameter_location_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
let param_id = AbstractIdentifier::new(call.tid.clone(), param_arg.clone());
replacement_map.insert(param_id, param_value);
}
}
......
......@@ -13,10 +13,13 @@
//!
//! The check uses the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`)
//! to check whether any memory accesses may point outside of the bounds of the corresponding memory objects.
//! For this the results of the Pointer Inference analysis are aggregated interprocedurally.
//! Additionally, the check uses a lightweight intraprocedural dataflow fixpoint computation
//! Additionally, the check uses a lightweight dataflow fixpoint computation
//! to ensure that for each memory object only the first access outside of its bounds is flagged as a CWE.
//!
//! Currently, the check is only partially interprocedural.
//! Bounds of parameter objects can be detected, but bounds of memory objects created in called functions
//! (other than the standard allocation functions) will not be detected.
//!
//! ## False Positives
//!
//! - Any analysis imprecision of the Pointer Inference analysis may lead to false positive results in this check.
......@@ -40,6 +43,20 @@
//! this still may miss buffer overflows occuring in the called function.
//! - Right now the check only considers buffers on the stack or the heap, but not buffers in global memory.
//! Thus corresponding overflows of buffers in global memory are not detected.
//! - Since the check is only partially interprocedural at the moment,
//! it will miss object sizes of objects created in called functions.
//! For example, if allocations are wrapped in simple wrapper functions,
//! the analysis will miss overflows for corresponding objects, because it cannot determine their object sizes.
// FIXME: The current implementation uses path hints for memory object IDs to determine object sizes interprocedurally.
// But the number of path hint combinations can grow exponentially
// with the call depth to the actual allocation size of a callee-created object.
// This led to state explosion in the PointerInference and thus path hints are not longer provided by the PointerInference.
// But without the path hints that this analysis depended on, the check can only resolve sizes of parameter objects,
// but not of objects returned from called functions (other than the standard allocation functions).
// A future implementation needs a better way to determine object sizes interprocedurally,
// probably depending on several fixpoint computations to circumvent the state explosion problems
// that the old implementation is vulnerable to.
use crate::analysis::pointer_inference::Data;
use crate::prelude::*;
......
......@@ -37,7 +37,7 @@ impl State {
};
let stack_upper_bound = std::cmp::max(
stack_upper_bound,
function_sig.get_stack_params_total_size(),
function_sig.get_stack_params_total_size(&project.stack_pointer_register),
);
let object_lower_bounds = BTreeMap::from([(
stack_id.clone(),
......
......@@ -112,7 +112,7 @@ impl<'a> Context<'a> {
if access_pattern.is_dereferenced() {
if let Some(arg_value) = self
.pointer_inference
.eval_parameter_arg_at_call(call_tid, arg)
.eval_parameter_location_at_call(call_tid, arg)
{
if let Some(mut warnings) = state.check_address_for_use_after_free(&arg_value) {
warning_causes.append(&mut warnings);
......
......@@ -300,7 +300,7 @@ pub mod tests {
assert_eq!(processed_warnings.len(), 1);
let processed_cwe = processed_warnings.iter().next().unwrap();
assert_eq!(&processed_cwe.other[0], &[
"Accessed ID object_origin_tid(->call_tid) @ RAX may have been freed before at free_tid.".to_string(),
"Accessed ID object_origin_tid(->call_tid) @ RAX:i64 may have been freed before at free_tid.".to_string(),
"Relevant callgraph TIDs: [root_func_tid, call_tid]".to_string(),
]);
......
......@@ -31,6 +31,11 @@ impl Tid {
}
}
/// Returns true if the ID string ends with the provided suffix.
pub fn has_id_suffix(&self, suffix: &str) -> bool {
self.id.ends_with(suffix)
}
/// Generate the ID of a block starting at the given address.
///
/// Note that the block may not actually exist.
......
......@@ -10,7 +10,7 @@ use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path;
/// Disassemble the given binary and parse it to a [`Project`](crate::intermediate_representation::Project) struct.
/// Disassemble the given binary and parse it to a [`Project`] struct.
///
/// If successful, returns the binary file (as a byte vector), the parsed project struct,
/// and a vector of log messages generated during the process.
......
......@@ -450,6 +450,7 @@ mod tests {
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "mingw32-gcc"); // TODO: Check reason for failure! Probably same as above?
mark_skipped(&mut tests, "x64", "mingw32-gcc"); // We find an additional false positive in unrelated code.
for test_case in tests {
let num_expected_occurences = 1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment