Unverified Commit f6ced95c by Enkelmann Committed by GitHub

Implement tracking of nested parameters (#432)

parent 270b4d4e
use super::AbstractMemoryLocation;
use crate::intermediate_representation::*;
use crate::prelude::*;
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is either a register or a (constant) global address.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidentally describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
/// The location is given by a register.
Register(Variable),
/// The value itself is a constant address to global memory.
/// Note that the `size` is the size of the pointer and not the size
/// of the value residing at the specific address in global memory.
GlobalAddress {
/// The address in global memory.
address: u64,
/// The byte size of the address (not the pointed-to value!).
size: ByteSize,
},
/// The location is in memory.
/// One needs to follow the pointer in the given register
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(Variable, AbstractMemoryLocation),
/// The location is in memory.
/// One needs to follow the pointer located at the given global address
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
GlobalPointer(u64, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(var) => write!(formatter, "{}", var.name)?,
Self::GlobalAddress { address, size: _ } => write!(formatter, "0x{address:x}")?,
Self::Pointer(var, location) => write!(formatter, "{}{}", var.name, location)?,
Self::GlobalPointer(address, location) => write!(formatter, "0x{address:x}{location}")?,
};
write!(formatter, ":i{}", self.bytesize().as_bit_length())
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(variable.clone()))
}
/// Create an abstract location on the stack.
/// The returned location describes the value of the given `size`
/// at the given `offset` relative to the memory location that the `stack_register` is pointing to.
pub fn from_stack_position(
stack_register: &Variable,
offset: i64,
size: ByteSize,
) -> AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Create an abstract location representing an address pointing to global memory.
pub fn from_global_address(address: &Bitvector) -> AbstractLocation {
let size = address.bytesize();
let address = address
.try_to_u64()
.expect("Global address larger than 64 bits encountered.");
AbstractLocation::GlobalAddress { address, size }
}
/// Add an offset to the abstract location.
pub fn with_offset_addendum(self, addendum: i64) -> AbstractLocation {
match self {
Self::Register(_) => panic!("Cannot add an offset to a register abstract location"),
Self::GlobalAddress { address, size } => Self::GlobalAddress {
address: address + (addendum as u64),
size,
},
Self::Pointer(var, mut location) => {
location.add_offset(addendum);
Self::Pointer(var, location)
}
Self::GlobalPointer(address, mut location) => {
location.add_offset(addendum);
Self::GlobalPointer(address, location)
}
}
}
/// Return the abstract location that one gets when dereferencing the pointer that `self` is pointing to.
///
/// Panics if `self` is not pointer-sized.
pub fn dereferenced(
self,
new_size: ByteSize,
generic_pointer_size: ByteSize,
) -> AbstractLocation {
match self {
Self::Register(var) => Self::Pointer(
var,
AbstractMemoryLocation::Location {
offset: 0,
size: new_size,
},
),
Self::GlobalAddress { address, size } => {
assert_eq!(
size, generic_pointer_size,
"Cannot dereference an abstract memory location that is not pointer-sized."
);
Self::GlobalPointer(
address,
AbstractMemoryLocation::Location {
offset: 0,
size: new_size,
},
)
}
Self::GlobalPointer(address, mut location) => {
location.dereference(new_size, generic_pointer_size);
Self::GlobalPointer(address, location)
}
Self::Pointer(var, mut location) => {
location.dereference(new_size, generic_pointer_size);
Self::Pointer(var.clone(), location)
}
}
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::GlobalAddress { size, .. } => *size,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
mem_location.bytesize()
}
}
}
/// Get the recursion depth of the abstract location,
/// i.e. how many times one has to dereference a pointer until reaching the actual location.
pub fn recursion_depth(&self) -> u64 {
match self {
Self::Register(_) => 0,
Self::GlobalAddress { .. } => 1,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
1 + mem_location.recursion_depth()
}
}
}
/// Extend the location string by adding further derefence operations to it according to the given extension.
pub fn extend(&mut self, extension: AbstractMemoryLocation, generic_pointer_size: ByteSize) {
match self {
Self::Pointer(_, location) | Self::GlobalPointer(_, location) => {
location.extend(extension, generic_pointer_size);
}
Self::GlobalAddress { address, size } => {
assert_eq!(*size, generic_pointer_size);
*self = Self::GlobalPointer(*address, extension);
}
Self::Register(var) => {
assert_eq!(var.size, generic_pointer_size);
*self = Self::Pointer(var.clone(), extension);
}
}
}
/// Get the abstract location representing the pointer pointing to the memory object
/// that contains the location represented by `self`
/// together with the offset that one has to add to the pointer to get the location of self.
///
/// Returns an error if the abstract location contains no dereference operation
/// (e.g. if `self` represents a register value).
pub fn get_parent_location(
&self,
generic_pointer_size: ByteSize,
) -> Result<(AbstractLocation, i64), Error> {
match self {
AbstractLocation::GlobalAddress { .. } | AbstractLocation::Register(_) => {
Err(anyhow!("Root location without a parent."))
}
AbstractLocation::GlobalPointer(address, location) => {
match location.get_parent_location(generic_pointer_size) {
Ok((inner_parent_location, innermost_offset)) => Ok((
Self::GlobalPointer(*address, inner_parent_location),
innermost_offset,
)),
Err(innermost_offset) => Ok((
Self::GlobalAddress {
address: *address,
size: generic_pointer_size,
},
innermost_offset,
)),
}
}
AbstractLocation::Pointer(var, location) => {
match location.get_parent_location(generic_pointer_size) {
Ok((inner_parent_location, innermost_offset)) => Ok((
Self::Pointer(var.clone(), inner_parent_location),
innermost_offset,
)),
Err(innermost_offset) => Ok((Self::Register(var.clone()), innermost_offset)),
}
}
}
}
/// Get a list of all (recursive) parent locations.
/// The list is sorted by recursion depth, starting with the root location.
pub fn get_all_parent_locations(
&self,
generic_pointer_size: ByteSize,
) -> Vec<AbstractLocation> {
match self {
AbstractLocation::GlobalAddress { .. } | AbstractLocation::Register(_) => Vec::new(),
AbstractLocation::GlobalPointer(_, _) | AbstractLocation::Pointer(_, _) => {
let (parent, _) = self.get_parent_location(generic_pointer_size).unwrap();
let mut all_parents = parent.get_all_parent_locations(generic_pointer_size);
all_parents.push(parent);
all_parents
}
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::variable;
impl AbstractLocation {
/// Mock an abstract location with a variable as root.
pub fn mock(
root_var: &str,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> AbstractLocation {
let var = variable!(root_var);
match offsets {
[] => {
assert_eq!(var.size, size.into());
AbstractLocation::Register(var)
}
_ => AbstractLocation::Pointer(var, AbstractMemoryLocation::mock(offsets, size)),
}
}
/// Mock an abstract location with a global address as root.
pub fn mock_global(
root_address: u64,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> AbstractLocation {
match offsets {
[] => AbstractLocation::GlobalAddress {
address: root_address,
size: size.into(),
},
_ => AbstractLocation::GlobalPointer(
root_address,
AbstractMemoryLocation::mock(offsets, size),
),
}
}
}
#[test]
fn test_from_variants() {
let loc = AbstractLocation::from_var(&variable!("RAX:8")).unwrap();
assert_eq!(&format!("{loc}"), "RAX:i64");
let loc = AbstractLocation::from_global_address(&Bitvector::from_u64(32));
assert_eq!(
loc,
AbstractLocation::GlobalAddress {
address: 32,
size: ByteSize::new(8)
}
);
let loc = AbstractLocation::from_stack_position(&variable!("RSP:8"), 16, ByteSize::new(8));
assert_eq!(loc, AbstractLocation::mock("RSP:8", &[16], 8));
}
#[test]
fn test_with_offset_addendum() {
let loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4).with_offset_addendum(12);
assert_eq!(loc, AbstractLocation::mock("RAX:8", &[1, 2, 15], 4));
}
#[test]
fn test_dereferenced() {
let loc = AbstractLocation::mock("RAX:8", &[], 8)
.dereferenced(ByteSize::new(4), ByteSize::new(8));
assert_eq!(loc, AbstractLocation::mock("RAX:8", &[0], 4));
}
#[test]
fn test_recursion_depth() {
let loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4);
assert_eq!(loc.recursion_depth(), 3);
}
#[test]
fn test_extend() {
let mut loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4);
let extension = AbstractMemoryLocation::mock(&[4, 5, 6], 1);
loc.extend(extension, ByteSize::new(4));
assert_eq!(loc, AbstractLocation::mock("RAX:8", &[1, 2, 3, 4, 5, 6], 1));
}
#[test]
fn test_get_parent_location() {
let loc = AbstractLocation::mock("RAX:8", &[1], 4);
let (parent, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent, AbstractLocation::mock("RAX:8", &[], 8));
assert_eq!(last_offset, 1);
let loc = AbstractLocation::mock("RAX:8", &[1, 2, 3], 4);
let (parent, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent, AbstractLocation::mock("RAX:8", &[1, 2], 8));
assert_eq!(last_offset, 3);
}
}
use crate::prelude::*;
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: i64,
/// The size in bytes of the value that the memory location points to.
size: ByteSize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: i64,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
impl AbstractMemoryLocation {
/// Get the abstract memory location representing the pointer pointing to the memory object
/// that contains the location represented by `self`
/// together with the offset that one has to add to the pointer to get the location of self.
///
/// If `self` is a location (and not a pointer), return the offset in the location instead.
pub fn get_parent_location(
&self,
generic_pointer_size: ByteSize,
) -> Result<(AbstractMemoryLocation, i64), i64> {
match self {
Self::Location { offset, .. } => Err(*offset),
Self::Pointer { offset, target } => {
match target.get_parent_location(generic_pointer_size) {
Ok((inner_parent, innermost_offset)) => Ok((
Self::Pointer {
offset: *offset,
target: Box::new(inner_parent),
},
innermost_offset,
)),
Err(inner_offset) => Ok((
Self::Location {
offset: *offset,
size: generic_pointer_size,
},
inner_offset,
)),
}
}
}
}
/// Add an offset to a memory location.
pub fn add_offset(&mut self, addendum: i64) {
match self {
Self::Location { offset, .. } => *offset += addendum,
Self::Pointer { target, .. } => target.add_offset(addendum),
}
}
/// Add an offset to the root location of the memory location.
pub fn add_offset_at_root(&mut self, addendum: i64) {
match self {
Self::Location { offset, .. } | Self::Pointer { offset, .. } => *offset += addendum,
}
}
/// Dereference the pointer that `self` is pointing to.
///
/// Panics if the old value of `self` is not pointer-sized.
pub fn dereference(&mut self, new_size: ByteSize, generic_pointer_size: ByteSize) {
match self {
Self::Pointer { target, .. } => target.dereference(new_size, generic_pointer_size),
Self::Location { offset, size } => {
assert_eq!(
*size, generic_pointer_size,
"Cannot dereference an abstract memory location that is not pointer-sized."
);
*self = Self::Pointer {
offset: *offset,
target: Box::new(Self::Location {
offset: 0,
size: new_size,
}),
}
}
};
}
/// Extend the location string by adding further derefence operations to it according to the given extension.
pub fn extend(&mut self, extension: AbstractMemoryLocation, generic_pointer_size: ByteSize) {
match self {
Self::Location { offset, size } => {
assert_eq!(*size, generic_pointer_size);
*self = Self::Pointer {
offset: *offset,
target: Box::new(extension),
};
}
Self::Pointer { target, .. } => target.extend(extension, generic_pointer_size),
}
}
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
/// Get the recursion depth of the abstract memory location,
/// i.e. how many times one has to dereference a pointer until reaching the actual location.
pub fn recursion_depth(&self) -> u64 {
match self {
Self::Location { .. } => 0,
Self::Pointer { target, .. } => 1 + target.recursion_depth(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "[0x{offset:x}]"),
Self::Pointer { offset, target } => write!(formatter, "[0x{offset:x}]{target}"),
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl AbstractMemoryLocation {
/// Mock a memory location with a given sequence of offsets.
/// The first element in the sequence is the root offset.
pub fn mock(offsets: &[i64], size: impl Into<ByteSize>) -> AbstractMemoryLocation {
match offsets {
[] => panic!(),
[offset] => AbstractMemoryLocation::Location {
offset: *offset,
size: size.into(),
},
[offset, tail @ ..] => AbstractMemoryLocation::Pointer {
offset: *offset,
target: Box::new(AbstractMemoryLocation::mock(tail, size)),
},
}
}
}
#[test]
fn test_mock() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
assert_eq!(&format!("{loc}"), "[0x1][0x2][0x3]");
}
#[test]
fn test_get_parent_location() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
let (parent_loc, last_offset) = loc.get_parent_location(ByteSize::new(8)).unwrap();
assert_eq!(parent_loc, AbstractMemoryLocation::mock(&[1, 2], 8));
assert_eq!(last_offset, 3);
let loc = AbstractMemoryLocation::mock(&[1], 4);
assert!(loc.get_parent_location(ByteSize::new(8)).is_err());
}
#[test]
fn test_offset_addendums() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
loc.add_offset(6);
assert_eq!(&loc, &AbstractMemoryLocation::mock(&[1, 2, 9], 4));
loc.add_offset_at_root(-5);
assert_eq!(&loc, &AbstractMemoryLocation::mock(&[-4, 2, 9], 4));
}
#[test]
fn test_dereference() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
loc.dereference(ByteSize::new(8), ByteSize::new(4));
assert_eq!(loc, AbstractMemoryLocation::mock(&[1, 2, 3, 0], 8))
}
#[test]
fn test_extend() {
let mut loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
let extension = AbstractMemoryLocation::mock(&[4, 5, 6], 1);
loc.extend(extension, ByteSize::new(4));
assert_eq!(loc, AbstractMemoryLocation::mock(&[1, 2, 3, 4, 5, 6], 1));
}
#[test]
fn test_recursion_depth() {
let loc = AbstractMemoryLocation::mock(&[1, 2, 3], 4);
assert_eq!(loc.recursion_depth(), 2);
let loc = AbstractMemoryLocation::mock(&[1], 4);
assert_eq!(loc.recursion_depth(), 0);
}
}
...@@ -3,12 +3,17 @@ use crate::prelude::*; ...@@ -3,12 +3,17 @@ use crate::prelude::*;
use derive_more::Deref; use derive_more::Deref;
use std::sync::Arc; use std::sync::Arc;
mod location;
pub use location::AbstractLocation;
mod mem_location;
pub use mem_location::AbstractMemoryLocation;
/// An abstract identifier is used to identify an object or a value in an abstract state. /// An abstract identifier is used to identify an object or a value in an abstract state.
/// ///
/// Since many program states can be represented by the same abstract state in data-flow analysis, /// Since many program states can be represented by the same abstract state in data-flow analysis,
/// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states. /// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states.
/// Abstract identifiers achieve this by identifying a *time*, i.e. a specific abstract state, /// Abstract identifiers achieve this by identifying a *time*, i.e. a specific abstract state,
/// and a *location*, i.e. a recipe for abstracting a concrete value from any concrete state that is represented by the abstract state. /// and a *location*, i.e. a recipe for computing a concrete value from any concrete state that is represented by the abstract state.
/// The value in question then serves as the identifier. /// The value in question then serves as the identifier.
/// For example, a pointer may uniquely determine the memory object it is pointing to. /// For example, a pointer may uniquely determine the memory object it is pointing to.
/// Or a value may represent the value of a variable at a certain time, /// Or a value may represent the value of a variable at a certain time,
...@@ -20,15 +25,15 @@ use std::sync::Arc; ...@@ -20,15 +25,15 @@ use std::sync::Arc;
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace /// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited. /// or it may only represent the value at the last time the program point was visited.
/// ///
/// Alternatively one can also add path hints to an identifier to further distinguish points in time in an execution trace. /// Alternatively, one can also add path hints to an identifier to further distinguish points in time in an execution trace.
/// Path hints are given as a possibly empty array of time identifiers. /// Path hints are given as a possibly empty array of time identifiers.
/// To prevent infinitely long path hints, each time identifier is only allowed to appear at most once in the array. /// To prevent infinitely long path hints, each time identifier is only allowed to appear at most once in the array.
/// The specific meaning of the path hints depends upon the use case. /// The specific meaning of the path hints depends upon the use case.
/// ///
/// An abstract identifier is given by a time identifier, a location identifier and a path hints array (containing time identifiers). /// An abstract identifier is given by a time identifier, a location identifier and a path hints array (containing time identifiers).
/// ///
/// For the location identifier see `AbstractLocation`. /// For the location identifier see [`AbstractLocation`].
/// The time identifier is given by a `Tid`. /// The time identifier is given by a [`Tid`].
/// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block. /// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`. /// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)]
...@@ -161,136 +166,6 @@ impl std::fmt::Display for AbstractIdentifier { ...@@ -161,136 +166,6 @@ impl std::fmt::Display for AbstractIdentifier {
} }
} }
/// An abstract location describes how to find the value of a variable in memory at a given time.
///
/// It is defined recursively, where the root is always a register.
/// This way only locations that the local state knows about are representable.
/// It is also impossible to accidentally describe circular references.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractLocation {
/// The location is given by a register.
Register(Variable),
/// The value itself is a constant address to global memory.
/// Note that the `size` is the size of the pointer and not the size
/// of the value residing at the specific address in global memory.
GlobalAddress {
/// The address in global memory.
address: u64,
/// The byte size of the address (not the pointed-to value!).
size: ByteSize,
},
/// The location is in memory.
/// One needs to follow the pointer in the given register
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
Pointer(Variable, AbstractMemoryLocation),
/// The location is in memory.
/// One needs to follow the pointer located at the given global address
/// and then follow the abstract memory location inside the pointed to memory object
/// to find the actual memory location.
GlobalPointer(u64, AbstractMemoryLocation),
}
impl std::fmt::Display for AbstractLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Register(var) => write!(formatter, "{}", var.name),
Self::GlobalAddress { address, size: _ } => write!(formatter, "0x{address:x}"),
Self::Pointer(var, location) => write!(formatter, "{}->{}", var.name, location),
Self::GlobalPointer(address, location) => {
write!(formatter, "0x{address:x}->{location}")
}
}
}
}
impl AbstractLocation {
/// Create an abstract location from a variable corresponding to a register.
/// This function returns an error if the variable is not a physical register.
pub fn from_var(variable: &Variable) -> Result<AbstractLocation, Error> {
if variable.is_temp {
return Err(anyhow!(
"Cannot create abstract location from temporary variables."
));
}
Ok(AbstractLocation::Register(variable.clone()))
}
/// Create an abstract location on the stack.
/// The returned location describes the value of the given `size`
/// at the given `offset` relative to the memory location that the `stack_register` is pointing to.
pub fn from_stack_position(
stack_register: &Variable,
offset: i64,
size: ByteSize,
) -> AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Create an abstract location representing an address pointing to global memory.
pub fn from_global_address(address: &Bitvector) -> AbstractLocation {
let size = address.bytesize();
let address = address
.try_to_u64()
.expect("Global address larger than 64 bits encountered.");
AbstractLocation::GlobalAddress { address, size }
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::GlobalAddress { size, .. } => *size,
Self::Pointer(_, mem_location) | Self::GlobalPointer(_, mem_location) => {
mem_location.bytesize()
}
}
}
}
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
/// A location inside the current memory object.
Location {
/// The offset with respect to the zero offset of the memory object where the value can be found.
offset: i64,
/// The size in bytes of the value that the memory location points to.
size: ByteSize,
},
/// A pointer which needs to be followed to get to the actual memory location
Pointer {
/// The offset inside the current memory object where the pointer can be found.
offset: i64,
/// The memory location inside the target of the pointer that this memory location points to.
target: Box<AbstractMemoryLocation>,
},
}
impl AbstractMemoryLocation {
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Location { offset, .. } => write!(formatter, "({offset})"),
Self::Pointer { offset, target } => write!(formatter, "({offset})->{target}"),
}
}
}
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use super::*; use super::*;
...@@ -313,6 +188,20 @@ pub mod tests { ...@@ -313,6 +188,20 @@ pub mod tests {
.unwrap(), .unwrap(),
) )
} }
/// Mock an abstract identifier with the given TID name
/// and with a nested abstract location starting at the register given by `var`.
pub fn mock_nested(
tid: impl ToString,
var: &str,
offsets: &[i64],
size: impl Into<ByteSize>,
) -> Self {
AbstractIdentifier::new(
Tid::new(tid.to_string()),
AbstractLocation::mock(var, offsets, size),
)
}
} }
#[test] #[test]
......
use super::*;
use crate::abstract_domain::{ use crate::abstract_domain::{
AbstractDomain, AbstractIdentifier, AbstractLocation, BitvectorDomain, DataDomain, SizedDomain, AbstractDomain, AbstractIdentifier, AbstractLocation, BitvectorDomain, DataDomain,
TryToBitvec, RegisterDomain as _, SizedDomain, TryToBitvec,
}; };
use crate::utils::arguments; use crate::utils::arguments;
use crate::{ use crate::{
...@@ -8,8 +9,6 @@ use crate::{ ...@@ -8,8 +9,6 @@ use crate::{
intermediate_representation::Project, intermediate_representation::Project,
}; };
use super::*;
/// The context struct for the fixpoint algorithm. /// The context struct for the fixpoint algorithm.
pub struct Context<'a> { pub struct Context<'a> {
graph: &'a Graph<'a>, graph: &'a Graph<'a>,
...@@ -35,11 +34,9 @@ impl<'a> Context<'a> { ...@@ -35,11 +34,9 @@ impl<'a> Context<'a> {
/// Compute the return values of a call and return them (without adding them to the caller state). /// Compute the return values of a call and return them (without adding them to the caller state).
/// ///
/// The `callee_state` is the state of the callee at the return site. /// The `callee_state` is the state of the callee at the return site.
/// The return values are expressed in the abstract IDs that are known to the caller. /// Return values corresponding to callee parameters are expressed in the abstract IDs that are known to the caller.
/// If a return value may contain `Top` values, /// Additionally, each return value also contains one abstract ID specific to the call instruction and return register.
/// i.e. values for which the origin is not known or not expressible in the abstract IDs known to the caller, /// This ID is used to track abstract location access patterns to the return value of the call in the caller.
/// then a call- and register-specific abstract ID is added to the corresponding return value.
/// This ID is not added to the tracked IDs of the caller state.
fn compute_return_values_of_call<'cconv>( fn compute_return_values_of_call<'cconv>(
&self, &self,
caller_state: &mut State, caller_state: &mut State,
...@@ -74,8 +71,9 @@ impl<'a> Context<'a> { ...@@ -74,8 +71,9 @@ impl<'a> Context<'a> {
/// Compute the return value for the given register. /// Compute the return value for the given register.
/// ///
/// The return value contains the IDs of all possible input IDs of the call that it may reference. /// The return value contains the IDs of all possible input IDs of the call that it may reference.
/// If the value may also contain a value not originating from the caller /// Additionally, it also contains a call- and register-specific abstract ID,
/// then replace it with a call- and register-specific abstract ID. /// which can be used to track the access patterns of the return value
/// independently of whether the return value only references caller values or not.
fn compute_return_register_value_of_call( fn compute_return_register_value_of_call(
&self, &self,
caller_state: &mut State, caller_state: &mut State,
...@@ -86,20 +84,18 @@ impl<'a> Context<'a> { ...@@ -86,20 +84,18 @@ impl<'a> Context<'a> {
let callee_value = callee_state.get_register(return_register); let callee_value = callee_state.get_register(return_register);
let mut return_value: DataDomain<BitvectorDomain> = let mut return_value: DataDomain<BitvectorDomain> =
DataDomain::new_empty(return_register.size); DataDomain::new_empty(return_register.size);
// For absolute or Top-values originating in the callee the Top-flag of the return value is set.
if callee_value.contains_top() || callee_value.get_absolute_value().is_some() {
return_value.set_contains_top_flag();
}
// For every relative value in the callee we check whether it is relative a parameter to the callee. // For every relative value in the callee we check whether it is relative a parameter to the callee.
// If yes, we can compute it relative to the value of the parameter at the callsite and add the result to the return value. // If yes, we can compute it relative to the value of the parameter at the callsite and add the result to the return value.
// Else we just set the Top-flag of the return value to indicate some value originating in the callee. for (callee_id, callee_offset) in callee_value
for (callee_id, callee_offset) in callee_value.get_relative_values() { .get_relative_values()
if callee_id.get_tid() == callee_state.get_current_function_tid() .iter()
&& matches!( .filter(|(callee_id, _)| callee_id.get_tid() == callee_state.get_current_function_tid())
callee_id.get_location(),
AbstractLocation::GlobalAddress { .. }
)
{ {
if matches!(
callee_id.get_location(),
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _)
) {
// Globals get the same ID as if the global pointer originated in the caller. // Globals get the same ID as if the global pointer originated in the caller.
let caller_global_id = AbstractIdentifier::new( let caller_global_id = AbstractIdentifier::new(
caller_state.get_current_function_tid().clone(), caller_state.get_current_function_tid().clone(),
...@@ -109,13 +105,13 @@ impl<'a> Context<'a> { ...@@ -109,13 +105,13 @@ impl<'a> Context<'a> {
let caller_global = let caller_global =
DataDomain::from_target(caller_global_id, callee_offset.clone()); DataDomain::from_target(caller_global_id, callee_offset.clone());
return_value = return_value.merge(&caller_global); return_value = return_value.merge(&caller_global);
} else if let Some(param_arg) = callee_state.get_arg_corresponding_to_id(callee_id) { } else {
let param_value = caller_state.eval_parameter_arg(&param_arg); let param_value = caller_state.eval_param_location(
callee_id.get_location(),
&self.project.runtime_memory_image,
);
let param_value = caller_state let param_value = caller_state
.substitute_global_mem_address(param_value, &self.project.runtime_memory_image); .substitute_global_mem_address(param_value, &self.project.runtime_memory_image);
if param_value.contains_top() || param_value.get_absolute_value().is_some() {
return_value.set_contains_top_flag()
}
for (param_id, param_offset) in param_value.get_relative_values() { for (param_id, param_offset) in param_value.get_relative_values() {
let value = DataDomain::from_target( let value = DataDomain::from_target(
param_id.clone(), param_id.clone(),
...@@ -123,19 +119,14 @@ impl<'a> Context<'a> { ...@@ -123,19 +119,14 @@ impl<'a> Context<'a> {
); );
return_value = return_value.merge(&value); return_value = return_value.merge(&value);
} }
} else {
return_value.set_contains_top_flag();
} }
} }
// If the Top-flag of the return value was set we replace it with an ID representing the return register // Also add an ID representing the return register (regardless of what was added before).
// to indicate where the unknown value originated from. // This ID is used to track abstract location access patterns in relation to the return value.
if return_value.contains_top() {
let id = AbstractIdentifier::from_var(call.tid.clone(), return_register); let id = AbstractIdentifier::from_var(call.tid.clone(), return_register);
let value = let value =
DataDomain::from_target(id, Bitvector::zero(return_register.size.into()).into()); DataDomain::from_target(id, Bitvector::zero(return_register.size.into()).into());
return_value = return_value.merge(&value); return_value = return_value.merge(&value);
return_value.unset_contains_top_flag();
}
return_value return_value
} }
...@@ -314,6 +305,34 @@ impl<'a> Context<'a> { ...@@ -314,6 +305,34 @@ impl<'a> Context<'a> {
} }
None None
} }
/// Adjust the stack register after a call to a function.
///
/// On x86, this removes the return address from the stack
/// (other architectures pass the return address in a register, not on the stack).
/// On other architectures the stack register retains the value it had before the call.
/// Note that in some calling conventions the callee also clears function parameters from the stack.
/// We do not detect and handle these cases yet.
fn adjust_stack_register_on_return_from_call(
&self,
state_before_call: &State,
new_state: &mut State,
) {
let stack_register = &self.project.stack_pointer_register;
let stack_pointer = state_before_call.get_register(stack_register);
match self.project.cpu_architecture.as_str() {
"x86" | "x86_32" | "x86_64" => {
let offset = Bitvector::from_u64(stack_register.size.into())
.into_truncate(apint::BitWidth::from(stack_register.size))
.unwrap();
new_state.set_register(
stack_register,
stack_pointer.bin_op(BinOpType::IntAdd, &offset.into()),
);
}
_ => new_state.set_register(stack_register, stack_pointer),
}
}
} }
impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
...@@ -339,7 +358,8 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -339,7 +358,8 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
new_state.set_register(var, value); new_state.set_register(var, value);
} }
Def::Load { var, address } => { Def::Load { var, address } => {
new_state.set_deref_flag_for_input_ids_of_expression(address); new_state.set_deref_flag_for_pointer_inputs_of_expression(address);
new_state.set_read_flag_for_input_ids_of_expression(address);
let address = new_state.substitute_global_mem_address( let address = new_state.substitute_global_mem_address(
state.eval(address), state.eval(address),
&self.project.runtime_memory_image, &self.project.runtime_memory_image,
...@@ -352,10 +372,13 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -352,10 +372,13 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
); );
let value = new_state let value = new_state
.substitute_global_mem_address(value, &self.project.runtime_memory_image); .substitute_global_mem_address(value, &self.project.runtime_memory_image);
new_state.track_contained_ids(&value);
new_state.set_read_flag_for_contained_ids(&value);
new_state.set_register(var, value); new_state.set_register(var, value);
} }
Def::Store { address, value } => { Def::Store { address, value } => {
new_state.set_mutable_deref_flag_for_input_ids_of_expression(address); new_state.set_mutable_deref_flag_for_pointer_inputs_of_expression(address);
new_state.set_read_flag_for_input_ids_of_expression(address);
let address = new_state.substitute_global_mem_address( let address = new_state.substitute_global_mem_address(
state.eval(address), state.eval(address),
&self.project.runtime_memory_image, &self.project.runtime_memory_image,
...@@ -420,6 +443,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -420,6 +443,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
cconv, cconv,
&self.project.runtime_memory_image, &self.project.runtime_memory_image,
); );
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
return Some(new_state); return Some(new_state);
} }
} }
...@@ -427,6 +451,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -427,6 +451,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) { if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) {
self.handle_extern_symbol_call(&mut new_state, extern_symbol, &call.tid); self.handle_extern_symbol_call(&mut new_state, extern_symbol, &call.tid);
if !extern_symbol.no_return { if !extern_symbol.no_return {
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
return Some(new_state); return Some(new_state);
} }
} else if let Some(cconv) = self.project.get_standard_calling_convention() { } else if let Some(cconv) = self.project.get_standard_calling_convention() {
...@@ -435,6 +460,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -435,6 +460,7 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
cconv, cconv,
&self.project.runtime_memory_image, &self.project.runtime_memory_image,
); );
self.adjust_stack_register_on_return_from_call(state, &mut new_state);
return Some(new_state); return Some(new_state);
} }
} }
...@@ -462,9 +488,9 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -462,9 +488,9 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
Some(cconv) => cconv, Some(cconv) => cconv,
None => return None, None => return None,
}; };
let old_state = state_before_call.unwrap(); let state_before_call = state_before_call.unwrap();
let callee_state = state.unwrap(); let callee_state = state.unwrap();
let mut new_state = old_state.clone(); let mut new_state = state_before_call.clone();
// Merge parameter access patterns with the access patterns from the callee. // Merge parameter access patterns with the access patterns from the callee.
let parameters = callee_state.get_params_of_current_function(); let parameters = callee_state.get_params_of_current_function();
new_state.merge_parameter_access(&parameters, &self.project.runtime_memory_image); new_state.merge_parameter_access(&parameters, &self.project.runtime_memory_image);
...@@ -480,8 +506,11 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> { ...@@ -480,8 +506,11 @@ impl<'a> forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
new_state.clear_non_callee_saved_register(&calling_convention.callee_saved_register); new_state.clear_non_callee_saved_register(&calling_convention.callee_saved_register);
// Now we can insert the return values into the state // Now we can insert the return values into the state
for (var, value) in return_value_list { for (var, value) in return_value_list {
// The return values may contain new IDs that have to be tracked.
new_state.track_contained_ids(&value);
new_state.set_register(var, value); new_state.set_register(var, value);
} }
self.adjust_stack_register_on_return_from_call(state_before_call, &mut new_state);
Some(new_state) Some(new_state)
} }
......
use super::*; use super::*;
use crate::{bitvec, variable}; use crate::{analysis::forward_interprocedural_fixpoint::Context as _, bitvec, def, variable};
#[test] #[test]
fn test_compute_return_values_of_call() { fn test_compute_return_values_of_call() {
...@@ -25,21 +25,27 @@ fn test_compute_return_values_of_call() { ...@@ -25,21 +25,27 @@ fn test_compute_return_values_of_call() {
&call, &call,
); );
let expected_val = DataDomain::from_target( let expected_val = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("call_tid"), &variable!("RAX:8")), AbstractIdentifier::mock("call_tid", "RAX", 8),
bitvec!("0x0:8").into(), bitvec!("0x0:8").into(),
); );
assert_eq!(return_values.iter().len(), 3); assert_eq!(return_values.iter().len(), 3);
assert_eq!(return_values[0], (&variable!("RAX:8"), expected_val)); assert_eq!(return_values[0], (&variable!("RAX:8"), expected_val));
// Test returning a known value. // Test returning a known value.
let param_ref = DataDomain::from_target( let param_ref = DataDomain::from_target(
AbstractIdentifier::from_var(Tid::new("callee"), &variable!("RDI:8")), AbstractIdentifier::mock("callee", "RDI", 8),
bitvec!("0x0:8").into(), bitvec!("0x0:8").into(),
); );
callee_state.set_register(&variable!("RAX:8"), param_ref); callee_state.set_register(&variable!("RAX:8"), param_ref);
let expected_val = DataDomain::from_target( let expected_val = DataDomain::mock_from_target_map(BTreeMap::from([
AbstractIdentifier::from_var(Tid::new("caller"), &variable!("RDI:8")), (
AbstractIdentifier::mock("caller", "RDI", 8),
bitvec!("0x0:8").into(), bitvec!("0x0:8").into(),
); ),
(
AbstractIdentifier::mock("call_tid", "RAX", 8),
bitvec!("0x0:8").into(),
),
]));
let return_values = context.compute_return_values_of_call( let return_values = context.compute_return_values_of_call(
&mut caller_state, &mut caller_state,
&callee_state, &callee_state,
...@@ -69,7 +75,7 @@ fn test_call_stub_handling() { ...@@ -69,7 +75,7 @@ fn test_call_stub_handling() {
assert_eq!( assert_eq!(
state.get_params_of_current_function(), state.get_params_of_current_function(),
vec![( vec![(
Arg::from_var(variable!("r0:4"), None), &AbstractLocation::from_var(&variable!("r0:4")).unwrap(),
AccessPattern::new().with_read_flag() AccessPattern::new().with_read_flag()
)] )]
); );
...@@ -97,14 +103,14 @@ fn test_call_stub_handling() { ...@@ -97,14 +103,14 @@ fn test_call_stub_handling() {
assert_eq!( assert_eq!(
params[0], params[0],
( (
Arg::from_var(variable!("r0:4"), None), &AbstractLocation::from_var(&variable!("r0:4")).unwrap(),
AccessPattern::new_unknown_access() AccessPattern::new_unknown_access()
) )
); );
assert_eq!( assert_eq!(
params[1], params[1],
( (
Arg::from_var(variable!("r2:4"), None), &AbstractLocation::from_var(&variable!("r2:4")).unwrap(),
AccessPattern::new() AccessPattern::new()
.with_read_flag() .with_read_flag()
.with_dereference_flag() .with_dereference_flag()
...@@ -114,6 +120,51 @@ fn test_call_stub_handling() { ...@@ -114,6 +120,51 @@ fn test_call_stub_handling() {
} }
#[test] #[test]
fn test_stack_register_adjustment_after_call() {
let project = Project::mock_x64();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let mut state_before_call = State::mock_x64("mock_fn");
let stack_id = AbstractIdentifier::mock("mock_fn", "RSP", 8);
state_before_call.set_register(
&variable!("RSP:8"),
DataDomain::from_target(stack_id.clone(), bitvec!("0x-20:8").into()),
);
let call_term = Term {
tid: Tid::new("call_tid"),
term: Jmp::CallInd {
target: Expression::Var(variable!("R15:8")),
return_: Some(Tid::new("return_")),
},
};
// Test adjustment on extern calls
let state_after_call = context
.update_call_stub(&state_before_call, &call_term)
.unwrap();
let adjusted_sp = state_after_call.get_register(&variable!("RSP:8"));
assert_eq!(
adjusted_sp,
DataDomain::from_target(stack_id.clone(), bitvec!("0x-18:8").into())
);
// Test adjustment on intern calls
let state_before_return = State::mock_x64("callee");
let state_after_call = context
.update_return(
Some(&state_before_return),
Some(&state_before_call),
&call_term,
&call_term,
&None,
)
.unwrap();
let adjusted_sp = state_after_call.get_register(&variable!("RSP:8"));
assert_eq!(
adjusted_sp,
DataDomain::from_target(stack_id.clone(), bitvec!("0x-18:8").into())
);
}
#[test]
fn test_get_global_mem_address() { fn test_get_global_mem_address() {
let project = Project::mock_arm32(); let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program); let graph = crate::analysis::graph::get_program_cfg(&project.program);
...@@ -135,3 +186,82 @@ fn test_get_global_mem_address() { ...@@ -135,3 +186,82 @@ fn test_get_global_mem_address() {
let result = context.get_global_mem_address(&value); let result = context.get_global_mem_address(&value);
assert!(result.is_none()); assert!(result.is_none());
} }
#[test]
fn test_generation_of_nested_ids_and_access_patterns_on_load_and_store() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let state = State::mock_arm32();
// Load from a tracked pointer value
let def = def!["load_instr: r0:4 := Load from r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let loaded_value = new_state.get_register(&variable!("r0:4"));
assert_eq!(
loaded_value,
DataDomain::from_target(
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r1:4", &[16], 4)
),
bitvec!("0x0:4").into()
)
);
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 1);
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new()
.with_read_flag()
.with_dereference_flag()
)));
// Load from an untracked register value
let def = def!["load_instr: r0:4 := Load from r8:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let loaded_value = new_state.get_register(&variable!("r0:4"));
assert!(loaded_value.is_top());
assert_eq!(new_state.get_params_of_current_function(), []);
// Store a tracked pointer value
let def = def!["store_instr: Store at r0:4 := r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 2);
assert!(params.contains(&(
&AbstractLocation::mock("r0:4", &[], 4),
AccessPattern::new()
.with_read_flag()
.with_mutably_dereferenced_flag()
)));
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new().with_read_flag()
)));
// Store to an untracked register value
let def = def!["store_instr: Store at r8:4 := r1:4 + 0x10:4"];
let new_state = context.update_def(&state, &def).unwrap();
let params = new_state.get_params_of_current_function();
assert_eq!(params.len(), 1);
assert!(params.contains(&(
&AbstractLocation::mock("r1:4", &[], 4),
AccessPattern::new().with_read_flag()
)));
}
#[test]
fn test_stack_param_loaded_but_not_accessed() {
// Regression test for the case that a stack parameter is loaded into a register but then not directly accessed.
// In such a case the stack parameter must still be proactively marked as read,
// because its later usage might simply be missed by the analysis
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
let state = State::mock_arm32();
let def = def!["r0:4 := Load from sp:4"];
let new_state = context.update_def(&state, &def).unwrap();
let fn_sig = new_state.get_params_of_current_function();
assert!(fn_sig.contains(&(
&AbstractLocation::mock("sp:4", &[0], 4),
AccessPattern::new().with_read_flag()
)));
}
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
use super::AccessPattern; use super::AccessPattern;
use super::FunctionSignature; use super::FunctionSignature;
use crate::abstract_domain::AbstractDomain; use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::AbstractLocation;
use crate::abstract_domain::DomainMap; use crate::abstract_domain::DomainMap;
use crate::abstract_domain::UnionMergeStrategy; use crate::abstract_domain::UnionMergeStrategy;
use crate::analysis::callgraph::get_program_callgraph; use crate::analysis::callgraph::get_program_callgraph;
...@@ -12,6 +13,7 @@ use crate::analysis::fixpoint::{Computation, Context}; ...@@ -12,6 +13,7 @@ use crate::analysis::fixpoint::{Computation, Context};
use crate::intermediate_representation::*; use crate::intermediate_representation::*;
use crate::utils::log::LogMessage; use crate::utils::log::LogMessage;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet; use std::collections::HashSet;
/// The context object for propagating known global variables top-down in the call graph. /// The context object for propagating known global variables top-down in the call graph.
...@@ -31,7 +33,7 @@ impl<'a> Context for KnownGlobalsContext<'a> { ...@@ -31,7 +33,7 @@ impl<'a> Context for KnownGlobalsContext<'a> {
type EdgeLabel = &'a Term<Jmp>; type EdgeLabel = &'a Term<Jmp>;
type NodeLabel = Tid; type NodeLabel = Tid;
/// The values at nodes are the sets of known addresses of global variables for that function. /// The values at nodes are the sets of known addresses of global variables for that function.
type NodeValue = HashSet<u64>; type NodeValue = BTreeSet<AbstractLocation>;
/// Get the call graph corresponding to the context object. /// Get the call graph corresponding to the context object.
fn get_graph(&self) -> &CallGraph<'a> { fn get_graph(&self) -> &CallGraph<'a> {
...@@ -39,10 +41,14 @@ impl<'a> Context for KnownGlobalsContext<'a> { ...@@ -39,10 +41,14 @@ impl<'a> Context for KnownGlobalsContext<'a> {
} }
/// The merge function returns the union of the two input sets of global addresses. /// The merge function returns the union of the two input sets of global addresses.
fn merge(&self, set1: &HashSet<u64>, set2: &HashSet<u64>) -> HashSet<u64> { fn merge(
&self,
set1: &BTreeSet<AbstractLocation>,
set2: &BTreeSet<AbstractLocation>,
) -> BTreeSet<AbstractLocation> {
let mut result = set1.clone(); let mut result = set1.clone();
for address in set2 { for address in set2 {
result.insert(*address); result.insert(address.clone());
} }
result result
} }
...@@ -50,9 +56,9 @@ impl<'a> Context for KnownGlobalsContext<'a> { ...@@ -50,9 +56,9 @@ impl<'a> Context for KnownGlobalsContext<'a> {
/// We always propagate all known addresses of global variables along the edges of the call graph. /// We always propagate all known addresses of global variables along the edges of the call graph.
fn update_edge( fn update_edge(
&self, &self,
globals: &HashSet<u64>, globals: &BTreeSet<AbstractLocation>,
_edge: petgraph::stable_graph::EdgeIndex, _edge: petgraph::stable_graph::EdgeIndex,
) -> Option<HashSet<u64>> { ) -> Option<BTreeSet<AbstractLocation>> {
Some(globals.clone()) Some(globals.clone())
} }
} }
...@@ -66,7 +72,7 @@ impl<'a> Context for KnownGlobalsContext<'a> { ...@@ -66,7 +72,7 @@ impl<'a> Context for KnownGlobalsContext<'a> {
fn propagate_known_globals_top_down( fn propagate_known_globals_top_down(
project: &Project, project: &Project,
fn_sigs: &BTreeMap<Tid, FunctionSignature>, fn_sigs: &BTreeMap<Tid, FunctionSignature>,
) -> BTreeMap<Tid, HashSet<u64>> { ) -> BTreeMap<Tid, BTreeSet<AbstractLocation>> {
let graph = get_program_callgraph(&project.program); let graph = get_program_callgraph(&project.program);
let context = KnownGlobalsContext::new(&graph); let context = KnownGlobalsContext::new(&graph);
let mut computation = Computation::new(context, None); let mut computation = Computation::new(context, None);
...@@ -96,12 +102,15 @@ struct GlobalsPropagationContext<'a> { ...@@ -96,12 +102,15 @@ struct GlobalsPropagationContext<'a> {
/// The reversed (!) call graph of the program. /// The reversed (!) call graph of the program.
graph: &'a CallGraph<'a>, graph: &'a CallGraph<'a>,
/// A map from TIDs of functions to the set of known addresses of global variables for that function. /// A map from TIDs of functions to the set of known addresses of global variables for that function.
known_globals: &'a BTreeMap<Tid, HashSet<u64>>, known_globals: &'a BTreeMap<Tid, BTreeSet<AbstractLocation>>,
} }
impl<'a> GlobalsPropagationContext<'a> { impl<'a> GlobalsPropagationContext<'a> {
/// Create a new [`GlobalsPropagationContext`] object. /// Create a new [`GlobalsPropagationContext`] object.
fn new(graph: &'a CallGraph<'a>, known_globals: &'a BTreeMap<Tid, HashSet<u64>>) -> Self { fn new(
graph: &'a CallGraph<'a>,
known_globals: &'a BTreeMap<Tid, BTreeSet<AbstractLocation>>,
) -> Self {
GlobalsPropagationContext { GlobalsPropagationContext {
graph, graph,
known_globals, known_globals,
...@@ -113,9 +122,9 @@ impl<'a> Context for GlobalsPropagationContext<'a> { ...@@ -113,9 +122,9 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
type EdgeLabel = &'a Term<Jmp>; type EdgeLabel = &'a Term<Jmp>;
type NodeLabel = Tid; type NodeLabel = Tid;
/// The node values for the fixpoint comutation /// The node values for the fixpoint comutation
/// are maps from addresses of global variables known to the function represented by the node /// are maps from locations of (possibly nested) global variables known to the function represented by the node
/// to the corresponding access pattern of the global variable. /// to the corresponding access pattern of the global variable.
type NodeValue = DomainMap<u64, AccessPattern, UnionMergeStrategy>; type NodeValue = DomainMap<AbstractLocation, AccessPattern, UnionMergeStrategy>;
/// Get the (reversed!) call graph corresponding to the program /// Get the (reversed!) call graph corresponding to the program
fn get_graph(&self) -> &CallGraph<'a> { fn get_graph(&self) -> &CallGraph<'a> {
...@@ -144,7 +153,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> { ...@@ -144,7 +153,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
.iter() .iter()
.filter_map(|(address, access_pattern)| { .filter_map(|(address, access_pattern)| {
if caller_known_globals.contains(address) { if caller_known_globals.contains(address) {
Some((*address, *access_pattern)) Some((address.clone(), *access_pattern))
} else { } else {
None None
} }
...@@ -161,7 +170,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> { ...@@ -161,7 +170,7 @@ impl<'a> Context for GlobalsPropagationContext<'a> {
/// that are known to the caller anyway (i.e. some function upwards in the call graph accesses the global variable). /// that are known to the caller anyway (i.e. some function upwards in the call graph accesses the global variable).
fn propagate_globals_bottom_up( fn propagate_globals_bottom_up(
project: &Project, project: &Project,
known_globals: &BTreeMap<Tid, HashSet<u64>>, known_globals: &BTreeMap<Tid, BTreeSet<AbstractLocation>>,
fn_sigs: &mut BTreeMap<Tid, FunctionSignature>, fn_sigs: &mut BTreeMap<Tid, FunctionSignature>,
logs: &mut Vec<LogMessage>, logs: &mut Vec<LogMessage>,
) { ) {
...@@ -178,7 +187,7 @@ fn propagate_globals_bottom_up( ...@@ -178,7 +187,7 @@ fn propagate_globals_bottom_up(
let globals = fn_sig let globals = fn_sig
.global_parameters .global_parameters
.iter() .iter()
.map(|(address, access_pattern)| (*address, *access_pattern)) .map(|(address, access_pattern)| (address.clone(), *access_pattern))
.collect(); .collect();
computation.set_node_value(node, globals); computation.set_node_value(node, globals);
} }
...@@ -198,7 +207,7 @@ fn propagate_globals_bottom_up( ...@@ -198,7 +207,7 @@ fn propagate_globals_bottom_up(
let fn_globals = &mut fn_sigs.get_mut(fn_tid).unwrap().global_parameters; let fn_globals = &mut fn_sigs.get_mut(fn_tid).unwrap().global_parameters;
for (address, propagated_access_pattern) in propagated_globals.iter() { for (address, propagated_access_pattern) in propagated_globals.iter() {
fn_globals fn_globals
.entry(*address) .entry(address.clone())
.and_modify(|access_pattern| { .and_modify(|access_pattern| {
*access_pattern = access_pattern.merge(propagated_access_pattern); *access_pattern = access_pattern.merge(propagated_access_pattern);
}) })
...@@ -207,6 +216,48 @@ fn propagate_globals_bottom_up( ...@@ -207,6 +216,48 @@ fn propagate_globals_bottom_up(
} }
} }
/// For all nested global parameters add the corresponding parent locations to the function signatures.
///
/// This ensures that subsequent analyses can safely assume
/// that for each nested parameter the parent location is also a parameter.
fn add_parents_of_known_nested_globals(
fn_sigs: &mut BTreeMap<Tid, FunctionSignature>,
generic_pointer_size: ByteSize,
) {
for fn_sig in fn_sigs.values_mut() {
let mut parents_to_add = HashSet::new();
for global in fn_sig.global_parameters.keys() {
parents_to_add.extend(get_parents_of_global(global, generic_pointer_size).into_iter());
}
for parent in parents_to_add {
fn_sig
.global_parameters
.entry(parent)
.and_modify(|pattern| pattern.set_dereference_flag())
.or_insert(
AccessPattern::new()
.with_read_flag()
.with_dereference_flag(),
);
}
}
}
/// get all parent locations for the given potentially nested global location.
fn get_parents_of_global(
location: &AbstractLocation,
generic_pointer_size: ByteSize,
) -> Vec<AbstractLocation> {
if let AbstractLocation::GlobalPointer(_, _) = location {
let (parent, _offset) = location.get_parent_location(generic_pointer_size).unwrap();
let mut parents = get_parents_of_global(&parent, generic_pointer_size);
parents.push(parent);
parents
} else {
Vec::new()
}
}
/// Propagate the access patterns of global variables along the edges of the call graph of the given project. /// Propagate the access patterns of global variables along the edges of the call graph of the given project.
/// ///
/// The propagation works as follows: /// The propagation works as follows:
...@@ -230,14 +281,22 @@ pub fn propagate_globals( ...@@ -230,14 +281,22 @@ pub fn propagate_globals(
) { ) {
let known_globals = propagate_known_globals_top_down(project, fn_sigs); let known_globals = propagate_known_globals_top_down(project, fn_sigs);
propagate_globals_bottom_up(project, &known_globals, fn_sigs, logs); propagate_globals_bottom_up(project, &known_globals, fn_sigs, logs);
// Also add parent locations of propagated globals to the function signatures
add_parents_of_known_nested_globals(fn_sigs, project.get_pointer_bytesize());
} }
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use std::collections::HashMap;
use super::*; use super::*;
/// Mock the abstract location of a global parameter.
fn mock_global(address: u64) -> AbstractLocation {
AbstractLocation::GlobalAddress {
address: address,
size: ByteSize::new(4),
}
}
#[test] #[test]
fn test_globals_propagation() { fn test_globals_propagation() {
let mut project = Project::mock_arm32(); let mut project = Project::mock_arm32();
...@@ -265,15 +324,16 @@ pub mod tests { ...@@ -265,15 +324,16 @@ pub mod tests {
let mut sig_main = FunctionSignature::new(); let mut sig_main = FunctionSignature::new();
sig_main sig_main
.global_parameters .global_parameters
.insert(1000, AccessPattern::new().with_read_flag()); .insert(mock_global(1000), AccessPattern::new().with_read_flag());
let mut sig_callee1 = FunctionSignature::new(); let mut sig_callee1 = FunctionSignature::new();
sig_callee1 sig_callee1.global_parameters.insert(
.global_parameters mock_global(2000),
.insert(2000, AccessPattern::new().with_dereference_flag()); AccessPattern::new().with_dereference_flag(),
);
let mut sig_callee2 = FunctionSignature::new(); let mut sig_callee2 = FunctionSignature::new();
sig_callee2 sig_callee2
.global_parameters .global_parameters
.insert(1000, AccessPattern::new_unknown_access()); .insert(mock_global(1000), AccessPattern::new_unknown_access());
let mut fn_sigs = BTreeMap::from([ let mut fn_sigs = BTreeMap::from([
(Tid::new("main"), sig_main), (Tid::new("main"), sig_main),
(Tid::new("callee1"), sig_callee1), (Tid::new("callee1"), sig_callee1),
...@@ -285,18 +345,53 @@ pub mod tests { ...@@ -285,18 +345,53 @@ pub mod tests {
// Check propagation results // Check propagation results
assert_eq!( assert_eq!(
&fn_sigs[&Tid::new("main")].global_parameters, &fn_sigs[&Tid::new("main")].global_parameters,
&HashMap::from([(1000, AccessPattern::new_unknown_access())]) &BTreeMap::from([(mock_global(1000), AccessPattern::new_unknown_access())])
); );
assert_eq!( assert_eq!(
&fn_sigs[&Tid::new("callee1")].global_parameters, &fn_sigs[&Tid::new("callee1")].global_parameters,
&HashMap::from([ &BTreeMap::from([
(1000, AccessPattern::new_unknown_access()), (mock_global(1000), AccessPattern::new_unknown_access()),
(2000, AccessPattern::new().with_dereference_flag()) (
mock_global(2000),
AccessPattern::new().with_dereference_flag()
)
]) ])
); );
assert_eq!( assert_eq!(
&fn_sigs[&Tid::new("callee2")].global_parameters, &fn_sigs[&Tid::new("callee2")].global_parameters,
&HashMap::from([(1000, AccessPattern::new_unknown_access())]) &BTreeMap::from([(mock_global(1000), AccessPattern::new_unknown_access())])
);
}
#[test]
fn test_add_parent_locations() {
// The case of a known nested global parameter without knowing the parent locations happens
// when a callee returns a nested global in a return register.
let location = AbstractLocation::mock_global(0x2000, &[8, 16], 8);
let globals = BTreeMap::from([(location, AccessPattern::new_unknown_access())]);
let fn_sig = FunctionSignature {
parameters: BTreeMap::new(),
global_parameters: globals,
};
let mut fn_sigs = BTreeMap::from([(Tid::new("func"), fn_sig)]);
add_parents_of_known_nested_globals(&mut fn_sigs, ByteSize::new(8));
let fn_sig = &fn_sigs[&Tid::new("func")];
let deref_pattern = AccessPattern::new()
.with_read_flag()
.with_dereference_flag();
assert_eq!(
fn_sig.global_parameters,
BTreeMap::from([
(
AbstractLocation::mock_global(0x2000, &[8, 16], 8),
AccessPattern::new_unknown_access()
),
(
AbstractLocation::mock_global(0x2000, &[8], 8),
deref_pattern
),
(AbstractLocation::mock_global(0x2000, &[], 8), deref_pattern),
])
); );
} }
} }
...@@ -6,6 +6,11 @@ ...@@ -6,6 +6,11 @@
//! (is the value read, dereferenced for read access or dereferenced for write access). //! (is the value read, dereferenced for read access or dereferenced for write access).
//! Accesses to constant addresses that may correspond to global variables are also tracked. //! Accesses to constant addresses that may correspond to global variables are also tracked.
//! //!
//! For values that are not directly tracked,
//! the algorithm tracks the abstract location that describes how the pointer to that value was computed.
//! This enables tracking of nested parameter objects
//! without actually tracking the memory objects where these objects are located.
//!
//! Known limitations of the analysis: //! Known limitations of the analysis:
//! * The analysis is an overapproximation in the sense that it may generate more input parameters //! * The analysis is an overapproximation in the sense that it may generate more input parameters
//! than actually exist in some cases. //! than actually exist in some cases.
...@@ -17,16 +22,19 @@ ...@@ -17,16 +22,19 @@
//! * Parameters that are used as input values for variadic functions may be missed. //! * Parameters that are used as input values for variadic functions may be missed.
//! Some variadic functions are stubbed, i.e. parameter recognition should work for these. //! Some variadic functions are stubbed, i.e. parameter recognition should work for these.
//! But not all variadic functions are stubbed. //! But not all variadic functions are stubbed.
//! * If only a part (e.g. a single byte) of a stack parameter is accessed instead of the whole parameter
//! then a duplicate stack parameter may be generated.
//! A proper sanitation for this case is not yet implemented,
//! although error messages are generated if such a case is detected.
//! * For floating point parameter registers the base register is detected as a parameter, //! * For floating point parameter registers the base register is detected as a parameter,
//! although only a smaller sub-register is the actual parameter in many cases. //! although only a smaller sub-register is the actual parameter in many cases.
//! Also, if a function uses sub-registers of floating point registers as local variables, //! Also, if a function uses sub-registers of floating point registers as local variables,
//! the registers may be incorrectly flagged as input parameters. //! the registers may be incorrectly flagged as input parameters.
//! * Tracking of nested parameters via their abstract locations is an unsound, heuristic approach,
//! as the analysis does not keep track of when such nested pointers might get overwritten.
//! Nevertheless, it should result in an overapproximation of parameters and their access patterns in most cases.
//! * The nesting depth for tracked nested parameters is limited
//! to avoid generating infinitely many parameters for recursive types like linked lists.
use crate::abstract_domain::AbstractDomain; use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::AbstractLocation;
use crate::abstract_domain::AbstractMemoryLocation;
use crate::analysis::fixpoint::Computation; use crate::analysis::fixpoint::Computation;
use crate::analysis::forward_interprocedural_fixpoint::create_computation; use crate::analysis::forward_interprocedural_fixpoint::create_computation;
use crate::analysis::forward_interprocedural_fixpoint::GeneralizedContext; use crate::analysis::forward_interprocedural_fixpoint::GeneralizedContext;
...@@ -36,12 +44,10 @@ use crate::intermediate_representation::*; ...@@ -36,12 +44,10 @@ use crate::intermediate_representation::*;
use crate::prelude::*; use crate::prelude::*;
use crate::utils::log::LogMessage; use crate::utils::log::LogMessage;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap;
mod context; mod context;
use context::*; use context::*;
mod state; mod state;
use itertools::Itertools;
use state::State; use state::State;
mod access_pattern; mod access_pattern;
pub use access_pattern::AccessPattern; pub use access_pattern::AccessPattern;
...@@ -49,6 +55,11 @@ mod global_var_propagation; ...@@ -49,6 +55,11 @@ mod global_var_propagation;
use global_var_propagation::propagate_globals; use global_var_propagation::propagate_globals;
pub mod stubs; pub mod stubs;
/// The recursion depth limit for abstract locations to be tracked by the function signature analysis,
/// i.e. how many dereference operations an abstract location is allowed to contain
/// before the analysis stops tracking the location.
const POINTER_RECURSION_DEPTH_LIMIT: u64 = 2;
/// Generate the computation object for the fixpoint computation /// Generate the computation object for the fixpoint computation
/// and set the node values for all function entry nodes. /// and set the node values for all function entry nodes.
fn generate_fixpoint_computation<'a>( fn generate_fixpoint_computation<'a>(
...@@ -147,7 +158,7 @@ pub fn compute_function_signatures<'a>( ...@@ -147,7 +158,7 @@ pub fn compute_function_signatures<'a>(
// Sanitize the parameters // Sanitize the parameters
let mut logs = Vec::new(); let mut logs = Vec::new();
for (fn_tid, fn_sig) in fn_sig_map.iter_mut() { for (fn_tid, fn_sig) in fn_sig_map.iter_mut() {
let (info_log, debug_log) = fn_sig.sanitize(project); let info_log = fn_sig.sanitize(project);
for log in info_log { for log in info_log {
logs.push( logs.push(
LogMessage::new_info(log) LogMessage::new_info(log)
...@@ -155,13 +166,6 @@ pub fn compute_function_signatures<'a>( ...@@ -155,13 +166,6 @@ pub fn compute_function_signatures<'a>(
.source("Function Signature Analysis"), .source("Function Signature Analysis"),
) )
} }
for log in debug_log {
logs.push(
LogMessage::new_debug(log)
.location(fn_tid.clone())
.source("Function Signature Analysis"),
)
}
} }
// Propagate globals in bottom-up direction in the call graph // Propagate globals in bottom-up direction in the call graph
propagate_globals(project, &mut fn_sig_map, &mut logs); propagate_globals(project, &mut fn_sig_map, &mut logs);
...@@ -174,30 +178,42 @@ pub fn compute_function_signatures<'a>( ...@@ -174,30 +178,42 @@ pub fn compute_function_signatures<'a>(
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct FunctionSignature { pub struct FunctionSignature {
/// The parameters of the function together with their access patterns. /// The parameters of the function together with their access patterns.
pub parameters: HashMap<Arg, AccessPattern>, pub parameters: BTreeMap<AbstractLocation, AccessPattern>,
/// Values in writeable global memory accessed by the function. /// Values in writeable global memory accessed by the function.
/// Does not contain indirectly accessed values, e.g. values accessed by callees of this function. pub global_parameters: BTreeMap<AbstractLocation, AccessPattern>,
pub global_parameters: HashMap<u64, AccessPattern>,
} }
impl FunctionSignature { impl FunctionSignature {
/// Generate an empty function signature. /// Generate an empty function signature.
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
parameters: HashMap::new(), parameters: BTreeMap::new(),
global_parameters: HashMap::new(), global_parameters: BTreeMap::new(),
} }
} }
/// The returned number is the maximum of stack offset plus parameter size /// The returned number is the maximum of stack offset plus parameter size
/// taken over all stack parameters in the function signature. /// taken over all stack parameters in the function signature.
pub fn get_stack_params_total_size(&self) -> i64 { pub fn get_stack_params_total_size(&self, stack_register: &Variable) -> i64 {
let mut stack_params_total_size: i64 = 0; let mut stack_params_total_size: i64 = 0;
for param in self.parameters.keys() { for param in self.parameters.keys() {
if let Ok(param_offset) = param.eval_stack_offset() { if let AbstractLocation::Pointer(var, mem_location) = param {
let param_upper_bound = if var == stack_register {
param_offset.try_to_i64().unwrap() + (u64::from(param.bytesize()) as i64); match mem_location {
stack_params_total_size = std::cmp::max(stack_params_total_size, param_upper_bound); AbstractMemoryLocation::Location { offset, size } => {
stack_params_total_size = std::cmp::max(
stack_params_total_size,
offset + (u64::from(*size) as i64),
);
}
AbstractMemoryLocation::Pointer { offset, target: _ } => {
stack_params_total_size = std::cmp::max(
stack_params_total_size,
offset + (u64::from(stack_register.size) as i64),
);
}
}
}
} }
} }
stack_params_total_size stack_params_total_size
...@@ -206,21 +222,21 @@ impl FunctionSignature { ...@@ -206,21 +222,21 @@ impl FunctionSignature {
/// Merge the parameter list and the global parameter list of `self` with the given lists. /// Merge the parameter list and the global parameter list of `self` with the given lists.
fn merge_parameter_lists( fn merge_parameter_lists(
&mut self, &mut self,
params: &[(Arg, AccessPattern)], params: &[(&AbstractLocation, AccessPattern)],
global_params: &[(u64, AccessPattern)], global_params: &[(&AbstractLocation, AccessPattern)],
) { ) {
for (arg, sig_new) in params { for (arg, sig_new) in params {
if let Some(sig_self) = self.parameters.get_mut(arg) { if let Some(sig_self) = self.parameters.get_mut(arg) {
*sig_self = sig_self.merge(sig_new); *sig_self = sig_self.merge(sig_new);
} else { } else {
self.parameters.insert(arg.clone(), *sig_new); self.parameters.insert((*arg).clone(), *sig_new);
} }
} }
for (address, sig_new) in global_params { for (address, sig_new) in global_params {
if let Some(sig_self) = self.global_parameters.get_mut(address) { if let Some(sig_self) = self.global_parameters.get_mut(address) {
*sig_self = sig_self.merge(sig_new); *sig_self = sig_self.merge(sig_new);
} else { } else {
self.global_parameters.insert(*address, *sig_new); self.global_parameters.insert((*address).clone(), *sig_new);
} }
} }
} }
...@@ -239,172 +255,161 @@ impl FunctionSignature { ...@@ -239,172 +255,161 @@ impl FunctionSignature {
/// This may indicate an error in the analysis /// This may indicate an error in the analysis
/// as no proper sanitation pass is implemented for such cases yet. /// as no proper sanitation pass is implemented for such cases yet.
/// * Merge intersecting stack parameters /// * Merge intersecting stack parameters
fn sanitize(&mut self, project: &Project) -> (Vec<String>, Vec<String>) { fn sanitize(&mut self, project: &Project) -> Vec<String> {
match project.cpu_architecture.as_str() { match project.cpu_architecture.as_str() {
"x86" | "x86_32" | "x86_64" => { "x86" | "x86_32" | "x86_64" => {
let return_addr_expr = Expression::Var(project.stack_pointer_register.clone()); let return_addr_location = AbstractLocation::from_stack_position(
let return_addr_arg = Arg::Stack { &project.stack_pointer_register,
address: return_addr_expr, 0,
size: project.stack_pointer_register.size, project.get_pointer_bytesize(),
data_type: None, );
}; self.parameters.remove(&return_addr_location);
self.parameters.remove(&return_addr_arg);
} }
_ => (), _ => (),
} }
let debug_messages = self.merge_intersecting_stack_parameters(); // FIXME: We check for intersecting stack parameter register, but not for intersecting nested parameters.
let info_messages = self.check_for_unaligned_stack_params(&project.stack_pointer_register); // We should add a check for these to generate log messages (but probably without trying to merge such parameters)
self.merge_intersecting_stack_parameters(&project.stack_pointer_register);
(info_messages, debug_messages) self.check_for_unaligned_stack_params(&project.stack_pointer_register)
} }
/// Return a log message for every unaligned stack parameter /// Return a log message for every unaligned stack parameter
/// or a stack parameter of different size than the generic pointer size is found. /// or a stack parameter of different size than the generic pointer size is found.
fn check_for_unaligned_stack_params(&self, stack_register: &Variable) -> Vec<String> { fn check_for_unaligned_stack_params(&self, stack_register: &Variable) -> Vec<String> {
let mut log_messages: Vec<String> = vec![]; let mut log_messages: Vec<String> = vec![];
for arg in self.parameters.keys() { for param in self.parameters.keys() {
if let Arg::Stack { size, .. } = arg { if let Some(offset) = get_offset_if_simple_stack_param(param, stack_register) {
if *size != stack_register.size { if param.bytesize() != stack_register.size {
log_messages.push("Unexpected stack parameter size".into()); log_messages.push("Unexpected stack parameter size".into());
} }
if let Ok(offset) = arg.eval_stack_offset() { if offset % u64::from(stack_register.size) as i64 != 0 {
if offset.try_to_u64().unwrap_or(0) % u64::from(stack_register.size) != 0 {
log_messages.push("Unexpected stack parameter alignment".into()); log_messages.push("Unexpected stack parameter alignment".into());
} }
} }
} }
}
log_messages log_messages
} }
/// Merges two intersecting stack parameters by joining them into one stack parameter.
/// Merges intersecting stack parameters by joining them into one stack parameter.
/// ///
/// Two [Arg](crate::intermediate_representation::Arg) are merged if *all* of the following applies: /// Only non-nested stack parameters are joined by this function.
/// * parameters return `Ok` on `Arg::eval_stack_offset()` fn merge_intersecting_stack_parameters(&mut self, stack_register: &Variable) {
/// * parameters intersect let stack_params: BTreeMap<(i64, ByteSize), (AbstractLocation, AccessPattern)> = self
fn merge_intersecting_stack_parameters(&mut self) -> Vec<String> {
let mut stack_parms = self
.parameters .parameters
.clone() .iter()
.into_iter() .filter_map(|(location, access_pattern)| {
.filter(|x| x.0.eval_stack_offset().is_ok()) get_offset_if_simple_stack_param(location, stack_register).map(|offset| {
.sorted_by(|a, b| { (
match a (offset, location.bytesize()),
.0 (location.clone(), *access_pattern),
.eval_stack_offset() )
.unwrap()
.checked_sgt(&b.0.eval_stack_offset().unwrap())
.unwrap()
{
true => std::cmp::Ordering::Greater,
false => std::cmp::Ordering::Less,
}
}) })
.collect_vec(); })
let mut logs = vec![]; .collect();
if !stack_parms.is_empty() { let mut current_param: Option<(i64, i64, AccessPattern)> = None;
let mut i = 0; for ((offset, _), (param, access_pattern)) in stack_params.into_iter() {
while i < stack_parms.len() - 1 { self.parameters.remove(&param);
if let Ok((merged_arg, log)) = if let Some((cur_offset, cur_size, cur_access_pattern)) = current_param {
get_bounds_intersecting_stack_arg(&stack_parms[i].0, &stack_parms[i + 1].0) if offset < cur_offset + cur_size {
{ let merged_size = std::cmp::max(
self.parameters.remove(&stack_parms[i].0); cur_size,
self.parameters.remove(&stack_parms[i + 1].0); offset - cur_offset + u64::from(param.bytesize()) as i64,
self.parameters.insert(
merged_arg.clone(),
stack_parms[i].1.merge(&stack_parms[i + 1].1),
); );
let merged_access_pattern = cur_access_pattern.merge(&access_pattern);
stack_parms.insert( current_param = Some((cur_offset, merged_size, merged_access_pattern));
i, } else {
(merged_arg, stack_parms[i].1.merge(&stack_parms[i + 1].1)), self.parameters.insert(
generate_simple_stack_param(
cur_offset,
ByteSize::new(cur_size as u64),
stack_register,
),
cur_access_pattern,
); );
stack_parms.remove(i + 1); current_param =
stack_parms.remove(i + 1); Some((offset, u64::from(param.bytesize()) as i64, access_pattern));
}
logs.extend(log);
} else { } else {
i += 1; current_param = Some((offset, u64::from(param.bytesize()) as i64, access_pattern));
} }
} }
if let Some((cur_offset, cur_size, cur_access_pattern)) = current_param {
self.parameters.insert(
generate_simple_stack_param(
cur_offset,
ByteSize::new(cur_size as u64),
stack_register,
),
cur_access_pattern,
);
} }
logs
} }
} }
/// Merges two stack parameters and returns the merged [Arg](crate::intermediate_representation::Arg). impl Default for FunctionSignature {
/// Also returns a message, if one argument is not a subset of the other one. fn default() -> Self {
/// Self::new()
/// Assumes the provided `Arg` are ordered by equal or increasing stack offset. }
/// }
/// Returns `Err` if `first_arg` or `second_arg`:
/// * are not `Arg::Stack` types
/// * return `Err` on `Arg::eval_stack_offset()`
/// * do not intersect
fn get_bounds_intersecting_stack_arg(
first_arg: &Arg,
second_arg: &Arg,
) -> Result<(Arg, Vec<String>), Error> {
if let (
Arg::Stack {
data_type: _,
size: first_size,
address: first_address,
},
Arg::Stack {
data_type: _,
size: second_size,
..
},
) = (first_arg, second_arg)
{
let first_arg_offset = first_arg.eval_stack_offset()?.try_to_u64()?;
let first_arg_size = u64::from(*first_size);
let second_arg_offset = second_arg.eval_stack_offset()?.try_to_u64()?;
let second_arg_size = u64::from(*second_size);
let mut logs = vec![];
let first_arg_upper_bound = first_arg_offset + first_arg_size;
// Check if they intersect
if first_arg_upper_bound > second_arg_offset {
let second_arg_upper_bound = second_arg_offset + second_arg_size;
// Check if subset impl FunctionSignature {
if second_arg_upper_bound <= first_arg_upper_bound /// Generate a compact JSON-representation of the function signature for pretty printing.
&& second_arg_offset >= first_arg_offset #[allow(dead_code)]
{ pub fn to_json_compact(&self) -> serde_json::Value {
// second arg is a subset, we just keep first_arg let mut json_map = serde_json::Map::new();
return Ok((first_arg.clone(), logs)); let mut param_map = serde_json::Map::new();
} for (param, pattern) in self.parameters.iter() {
if first_arg_upper_bound <= second_arg_upper_bound param_map.insert(
&& first_arg_offset >= second_arg_offset format!("{param}"),
{ serde_json::Value::String(format!("{pattern}")),
// first arg is a subset, we just keep second_arg );
return Ok((second_arg.clone(), logs));
} }
logs.push( json_map.insert(
"Merged a stack parameter, that intersect another but is no subset".to_string(), "Parameters".to_string(),
serde_json::Value::Object(param_map),
);
let mut global_param_map = serde_json::Map::new();
for (param, pattern) in self.global_parameters.iter() {
global_param_map.insert(
format!("{param}"),
serde_json::Value::String(format!("{pattern}")),
); );
let merged_arg = Arg::Stack {
address: first_address.clone(),
size: (second_arg_upper_bound - first_arg_offset).into(),
data_type: None,
};
return Ok((merged_arg, logs));
} else {
return Err(anyhow!("Args do not intersect"));
} }
json_map.insert(
"Globals".to_string(),
serde_json::Value::Object(global_param_map),
);
serde_json::Value::Object(json_map)
} }
Err(anyhow!("Args are no stack arguments"))
} }
impl Default for FunctionSignature { /// If the abstract location is a location on the stack
fn default() -> Self { /// then return its offset relative to the zero position on the stack.
Self::new() fn get_offset_if_simple_stack_param(
param: &AbstractLocation,
stack_register: &Variable,
) -> Option<i64> {
if let AbstractLocation::Pointer(var, mem_location) = param {
if var == stack_register {
if let AbstractMemoryLocation::Location { offset, .. } = mem_location {
return Some(*offset);
}
}
} }
None
}
/// Generate an abstract location of a (non-nested) stack parameter.
fn generate_simple_stack_param(
offset: i64,
size: ByteSize,
stack_register: &Variable,
) -> AbstractLocation {
AbstractLocation::Pointer(
stack_register.clone(),
AbstractMemoryLocation::Location { offset, size },
)
} }
#[cfg(test)] #[cfg(test)]
......
...@@ -91,22 +91,21 @@ impl State { ...@@ -91,22 +91,21 @@ impl State {
/// Fill every return register that might be a pointer with a value that may point to any pointer-sized input ID /// Fill every return register that might be a pointer with a value that may point to any pointer-sized input ID
/// or to an output ID specific to the call and output register. /// or to an output ID specific to the call and output register.
/// Non-pointer-sized output registers are only filled with an ID specific to the call and output register.
fn generate_return_values_for_call( fn generate_return_values_for_call(
&mut self, &mut self,
input_ids: &BTreeSet<AbstractIdentifier>, input_ids: &BTreeSet<AbstractIdentifier>,
return_args: &[Arg], return_args: &[Arg],
call_tid: &Tid, call_tid: &Tid,
) { ) {
// Fill every output register with a value that may point to any pointer-sized input ID let generic_pointer_size = self.stack_id.bytesize();
// or to an output ID specific to the call and output register.
let generic_pointer_size = self.stack_id.unwrap_register().size;
let generic_output_relative_values: BTreeMap<AbstractIdentifier, BitvectorDomain> = let generic_output_relative_values: BTreeMap<AbstractIdentifier, BitvectorDomain> =
input_ids input_ids
.iter() .iter()
.filter(|id| id.bytesize() == generic_pointer_size) .filter(|id| id.bytesize() == generic_pointer_size)
.map(|id| (id.clone(), BitvectorDomain::new_top(generic_pointer_size))) .map(|id| (id.clone(), BitvectorDomain::new_top(generic_pointer_size)))
.collect(); .collect();
let mut generic_output = DataDomain::new_top(generic_pointer_size); let mut generic_output = DataDomain::new_empty(generic_pointer_size);
generic_output.set_relative_values(generic_output_relative_values); generic_output.set_relative_values(generic_output_relative_values);
for output_arg in return_args { for output_arg in return_args {
...@@ -115,13 +114,15 @@ impl State { ...@@ -115,13 +114,15 @@ impl State {
data_type: _, data_type: _,
} = output_arg } = output_arg
{ {
let specific_id = AbstractIdentifier::from_var(call_tid.clone(), var);
self.add_id_to_tracked_ids(&specific_id);
let specific_target =
DataDomain::from_target(specific_id, Bitvector::zero(var.size.into()).into());
if var.size == generic_pointer_size { if var.size == generic_pointer_size {
let specific_target = DataDomain::from_target(
AbstractIdentifier::from_var(call_tid.clone(), var),
Bitvector::zero(var.size.into()).into(),
);
let output = generic_output.merge(&specific_target); let output = generic_output.merge(&specific_target);
self.set_register(var, output); self.set_register(var, output);
} else {
self.set_register(var, specific_target);
} }
} }
} }
...@@ -131,21 +132,21 @@ impl State { ...@@ -131,21 +132,21 @@ impl State {
/// ///
/// A register (or stack position with positive offset) is considered a parameter /// A register (or stack position with positive offset) is considered a parameter
/// if any access to its value at function start is recorded in the corresponding object signature. /// if any access to its value at function start is recorded in the corresponding object signature.
pub fn get_params_of_current_function(&self) -> Vec<(Arg, AccessPattern)> { /// A nested location is considered a parameter if it was dereferenced during the function execution.
pub fn get_params_of_current_function(&self) -> Vec<(&AbstractLocation, AccessPattern)> {
let mut params = Vec::new(); let mut params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() { for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() { if self.is_register_based_param_id(id) {
if let Ok(param_arg) = generate_param_arg_from_abstract_id(id) { if (id.get_location().recursion_depth() > 0 && access_pattern.is_dereferenced())
if access_pattern.is_accessed() { || (id.get_location().recursion_depth() == 0 && access_pattern.is_accessed())
params.push((param_arg, *access_pattern)); {
} else if matches!(id.get_location(), &AbstractLocation::Pointer { .. }) { params.push((id.get_location(), *access_pattern));
// This is a stack parameter.
// If it was only loaded into a register but otherwise not used, then the read-flag needs to be set.
let mut access_pattern = *access_pattern;
access_pattern.set_read_flag();
params.push((param_arg, access_pattern));
}
} }
} else if self.is_stack_based_param_id(id)
&& ((id.get_location().recursion_depth() > 1 && access_pattern.is_dereferenced())
|| (id.get_location().recursion_depth() == 1 && access_pattern.is_accessed()))
{
params.push((id.get_location(), *access_pattern));
} }
} }
params params
...@@ -153,16 +154,26 @@ impl State { ...@@ -153,16 +154,26 @@ impl State {
/// Return a list of all potential global memory addresses /// Return a list of all potential global memory addresses
/// for which any type of access has been tracked by the current state. /// for which any type of access has been tracked by the current state.
pub fn get_global_mem_params_of_current_function(&self) -> Vec<(u64, AccessPattern)> { pub fn get_global_mem_params_of_current_function(
&self,
) -> Vec<(&AbstractLocation, AccessPattern)> {
let mut global_params = Vec::new(); let mut global_params = Vec::new();
for (id, access_pattern) in self.tracked_ids.iter() { for (id, access_pattern) in self.tracked_ids.iter() {
if id.get_tid() == self.get_current_function_tid() { if id.get_tid() == self.get_current_function_tid() {
match id.get_location() { let location = id.get_location();
AbstractLocation::GlobalPointer(address, _) match location {
| AbstractLocation::GlobalAddress { address, .. } => { AbstractLocation::GlobalAddress { .. } => {
global_params.push((*address, *access_pattern)); if access_pattern.is_accessed() {
global_params.push((location, *access_pattern));
}
} }
AbstractLocation::Pointer(_, _) | AbstractLocation::Register(_) => (), AbstractLocation::GlobalPointer(_, _) => {
// Nested parameters are only explicitly tracked if they are dereferenced.
if access_pattern.is_dereferenced() {
global_params.push((location, *access_pattern));
}
}
_ => (),
} }
} }
} }
...@@ -177,52 +188,155 @@ impl State { ...@@ -177,52 +188,155 @@ impl State {
/// Note that this may create new stack parameter objects for self. /// Note that this may create new stack parameter objects for self.
pub fn merge_parameter_access( pub fn merge_parameter_access(
&mut self, &mut self,
params: &[(Arg, AccessPattern)], params: &[(&AbstractLocation, AccessPattern)],
global_memory: &RuntimeMemoryImage, global_memory: &RuntimeMemoryImage,
) { ) {
for (parameter, call_access_pattern) in params { for (parameter, call_access_pattern) in params {
let param_value = self.eval_parameter_arg(parameter); let param_value = self.eval_param_location(parameter, global_memory);
let param_value = self.substitute_global_mem_address(param_value, global_memory); let param_value = self.substitute_global_mem_address(param_value, global_memory);
for (id, offset) in param_value.get_relative_values() { for (id, offset) in param_value.get_relative_values() {
if let Some(object) = self.tracked_ids.get_mut(id) { if let Some(object) = self.tracked_ids.get_mut(id) {
*object = object.merge(call_access_pattern); *object = object.merge(call_access_pattern);
} else if *id == self.stack_id {
// Add stack IDs only if they correspond to stack parameters, i.e. the offset is positive.
if let Ok(concrete_offset) = offset.try_to_bitvec() {
if !concrete_offset.sign_bit().to_bool() {
if let Some(stack_param) = self.generate_stack_param_id_if_nonexistent(
concrete_offset,
id.bytesize(),
) {
let object = self.tracked_ids.get_mut(&stack_param).unwrap();
*object = object.merge(call_access_pattern);
} }
if *id == self.stack_id && call_access_pattern.is_dereferenced() {
if let Ok(offset) = offset.try_to_bitvec() {
// We also have to dereference the stack pointer and set the access flags of the pointed-to value
let value = self.load_unsized_value_from_stack(offset.clone());
for id in value.referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
// Since we do not know whether the value itself was also dereferenced in the callee,
// we have to assume some unknown access to the value.
object.set_unknown_access_flags();
} }
} }
} else {
self.tracked_ids.insert(id.clone(), *call_access_pattern);
} }
if call_access_pattern.is_mutably_dereferenced() {
if *id == self.stack_id && call_access_pattern.is_mutably_dereferenced() {
// The stack value may have been overwritten by the call // The stack value may have been overwritten by the call
if let Ok(offset) = offset.try_to_offset() { if let Ok(offset) = offset.try_to_offset() {
self.stack.mark_interval_values_as_top( self.stack
offset, .mark_interval_values_as_top(offset, offset, ByteSize::new(1));
offset,
ByteSize::new(1),
);
} }
} }
} }
} }
} }
/// Evaluate the value of a parameter location from a call on the current state.
///
/// This function panics for global parameters.
pub fn eval_param_location(
&mut self,
param_location: &AbstractLocation,
global_memory: &RuntimeMemoryImage,
) -> DataDomain<BitvectorDomain> {
match param_location {
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _) => {
panic!("Globals are not valid parameter locations.")
}
AbstractLocation::Register(var) => {
let value = self.get_register(var);
self.substitute_global_mem_address(value, global_memory)
}
AbstractLocation::Pointer(var, mem_location) => {
if var == self.stack_id.unwrap_register() {
self.eval_stack_pointer_param_location(mem_location, global_memory)
} else {
let value = self.get_register(var);
let value = self.substitute_global_mem_address(value, global_memory);
self.eval_mem_location_relative_value(value, mem_location)
}
}
}
} }
/// If the given abstract ID represents a possible parameter of the current function /// Evaluate the value of a parameter location relative to the stack pointer position in the current state.
/// then return an argument object corresponding to the parameter. fn eval_stack_pointer_param_location(
pub fn get_arg_corresponding_to_id(&self, id: &AbstractIdentifier) -> Option<Arg> { &mut self,
if id.get_tid() == self.stack_id.get_tid() { mem_location: &AbstractMemoryLocation,
generate_param_arg_from_abstract_id(id).ok() global_memory: &RuntimeMemoryImage,
) -> DataDomain<BitvectorDomain> {
let stack_register = self.stack_id.unwrap_register();
match mem_location {
AbstractMemoryLocation::Location { offset, size } => {
if let Some(stack_offset) =
self.get_offset_if_exact_stack_pointer(&self.get_register(stack_register))
{
let stack_offset = stack_offset
+ &Bitvector::from_i64(*offset).into_sign_resize(self.stack_id.bytesize());
self.load_value_from_stack(stack_offset, *size)
} else {
DataDomain::new_top(*size)
}
}
AbstractMemoryLocation::Pointer {
offset,
target: inner_mem_location,
} => {
if let Some(stack_offset) =
self.get_offset_if_exact_stack_pointer(&self.get_register(stack_register))
{
let stack_offset = stack_offset
+ &Bitvector::from_i64(*offset).into_sign_resize(self.stack_id.bytesize());
let value = self.load_value_from_stack(stack_offset, self.stack_id.bytesize());
let value = self.substitute_global_mem_address(value, global_memory);
self.eval_mem_location_relative_value(value, inner_mem_location)
} else { } else {
None DataDomain::new_top(inner_mem_location.bytesize())
}
}
}
}
/// Return `true` if the given ID is a parameter ID,
/// but not a global parameter.
/// This function does not check access patterns for the ID.
fn is_register_based_param_id(&self, id: &AbstractIdentifier) -> bool {
if id.get_tid() != self.get_current_function_tid() || id == &self.stack_id {
return false;
}
// Filter out global IDs
if matches!(
id.get_location(),
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _)
) {
return false;
}
// Filter out stack based IDs
if let AbstractLocation::Pointer(var, _) = id.get_location() {
if var == self.stack_id.unwrap_register() {
return false;
} }
} }
true
}
/// Return `true` if the given ID is a stack parameter ID or a nested stack parameter ID.
/// This function does not check access patterns for the ID.
fn is_stack_based_param_id(&self, id: &AbstractIdentifier) -> bool {
if id.get_tid() != self.get_current_function_tid() || id == &self.stack_id {
return false;
}
if let AbstractLocation::Pointer(register, mem_location) = id.get_location() {
if register == self.stack_id.unwrap_register() {
// ID is stack based, we have to filter out negative stack offsets.
match mem_location {
AbstractMemoryLocation::Location { offset, .. }
| AbstractMemoryLocation::Pointer { offset, .. } => {
if *offset < 0 {
return false;
}
}
}
return true;
}
}
false
}
} }
/// Generate register arguments from a list of registers. /// Generate register arguments from a list of registers.
...@@ -233,25 +347,5 @@ fn generate_args_from_registers(registers: &[Variable]) -> Vec<Arg> { ...@@ -233,25 +347,5 @@ fn generate_args_from_registers(registers: &[Variable]) -> Vec<Arg> {
.collect() .collect()
} }
/// Generate an argument representing the location in the given abstract ID. #[cfg(test)]
/// If the location is a pointer, it is assumed that the pointer points to the stack. pub mod tests;
/// Returns an error if the location contains a second level of indirection
/// or if the location is associated to global memory.
fn generate_param_arg_from_abstract_id(id: &AbstractIdentifier) -> Result<Arg, Error> {
match id.get_location() {
AbstractLocation::Register(var) => Ok(Arg::from_var(var.clone(), None)),
AbstractLocation::Pointer(var, mem_location) => match mem_location {
AbstractMemoryLocation::Location { offset, size } => Ok(Arg::Stack {
address: Expression::Var(var.clone()).plus_const(*offset),
size: *size,
data_type: None,
}),
AbstractMemoryLocation::Pointer { .. } => {
Err(anyhow!("Memory location is not a stack offset."))
}
},
AbstractLocation::GlobalAddress { .. } | AbstractLocation::GlobalPointer(_, _) => {
Err(anyhow!("Global values are not parameters."))
}
}
}
use super::*;
use crate::{bitvec, variable};
#[test]
fn test_generate_return_values_for_call() {
let mut state = State::mock_arm32();
let input_ids = BTreeSet::from([
AbstractIdentifier::mock("mock_fn", "r0", 4),
AbstractIdentifier::mock("mock_fn", "big_register", 16),
]);
let return_args = [Arg::mock_register("r1", 4)];
let call_tid = Tid::new("call");
state.generate_return_values_for_call(&input_ids, &return_args, &call_tid);
assert!(state
.tracked_ids
.get(&AbstractIdentifier::mock("call", "r1", 4))
.is_some());
let expected_return_value = DataDomain::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("mock_fn", "r0", 4),
BitvectorDomain::new_top(ByteSize::new(4)),
),
(
AbstractIdentifier::mock("call", "r1", 4),
bitvec!("0x0:4").into(),
),
]));
assert_eq!(state.register[&variable!("r1:4")], expected_return_value);
}
#[test]
fn test_get_params_of_current_function() {
let mut state = State::mock_arm32();
let param_one = AbstractIdentifier::mock("mock_fn", "param_one", 4);
let param_two = AbstractIdentifier::mock("mock_fn", "param_two", 4);
let not_param = AbstractIdentifier::mock("call_tid", "r0", 4);
let non_param_stack_offset = AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[-8], 4),
);
let global_param = AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::GlobalAddress {
address: 0x1000,
size: ByteSize::new(4),
},
);
state
.tracked_ids
.insert(param_one.clone(), AccessPattern::new().with_read_flag());
state.tracked_ids.insert(
param_two.clone(),
AccessPattern::new().with_dereference_flag(),
);
state
.tracked_ids
.insert(not_param, AccessPattern::new_unknown_access());
state
.tracked_ids
.insert(non_param_stack_offset, AccessPattern::new_unknown_access());
state
.tracked_ids
.insert(global_param.clone(), AccessPattern::new_unknown_access());
let params = state.get_params_of_current_function();
let global_params = state.get_global_mem_params_of_current_function();
assert_eq!(
params,
Vec::from([
(
param_one.get_location(),
AccessPattern::new().with_read_flag()
),
(
param_two.get_location(),
AccessPattern::new().with_dereference_flag()
)
])
);
assert_eq!(
global_params,
Vec::from([(
global_param.get_location(),
AccessPattern::new_unknown_access()
)])
);
}
#[test]
fn test_merge_parameter_access() {
let mut state = State::mock_arm32();
let num_original_tracked_ids = state.tracked_ids.len();
let global_memory = RuntimeMemoryImage::mock();
state.register.insert(
variable!("sp:4"),
DataDomain::from_target(state.stack_id.clone(), bitvec!("0x-20:4").into()),
);
state.register.insert(
variable!("r1:4"),
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r0", 4),
bitvec!("0x2:4").into(),
),
);
let param_loc = AbstractLocation::mock("r0:4", &[], 4);
let stack_param_loc = AbstractLocation::mock("sp:4", &[0], 4);
let high_stack_param_loc = AbstractLocation::mock("sp:4", &[32], 4);
let nested_param_loc = AbstractLocation::mock("r1:4", &[6], 4);
let params = [
(&param_loc, AccessPattern::new_unknown_access()),
(&stack_param_loc, AccessPattern::new_unknown_access()),
(&high_stack_param_loc, AccessPattern::new_unknown_access()),
(&nested_param_loc, AccessPattern::new_unknown_access()),
];
state.merge_parameter_access(&params, &global_memory);
// Merge normal param access
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
param_loc.clone()
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
// Do not merge/track access to local stack variable
assert!(state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[-32], 4)
))
.is_none());
// Generate new stack param if necessary
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("sp:4", &[0], 4)
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
// Track new nested parameter (in the right register)
assert_eq!(
state
.tracked_ids
.get(&AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r0:4", &[8], 4)
))
.unwrap(),
&AccessPattern::new_unknown_access()
);
assert_eq!(state.tracked_ids.len(), num_original_tracked_ids + 2);
}
#[test]
fn test_eval_param_location() {
let mut state = State::mock_arm32();
let global_memory = RuntimeMemoryImage::mock();
// Param is a register
state
.register
.insert(variable!("r0:4"), bitvec!("0x123:4").into());
let value = state.eval_param_location(&AbstractLocation::mock("r0:4", &[], 4), &global_memory);
assert_eq!(value, bitvec!("0x123:4").into());
// Param is a nested register (and values in nested objects are not tracked)
state.register.insert(
variable!("r0:4"),
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r3", 4),
bitvec!("0x0:4").into(),
),
);
let value = state.eval_param_location(&AbstractLocation::mock("r0:4", &[8], 4), &global_memory);
assert_eq!(
value,
DataDomain::from_target(
AbstractIdentifier::new(Tid::new("mock_fn"), AbstractLocation::mock("r3:4", &[8], 4)),
bitvec!("0x0:4").into()
)
);
// Read the value at a stack offset
state
.stack
.insert_at_byte_index(bitvec!("0x42:4").into(), -8);
let value =
state.eval_param_location(&AbstractLocation::mock("sp:4", &[-8], 4), &global_memory);
assert_eq!(value, bitvec!("0x42:4").into());
// Read a nested pointer from the stack. The read has to remove one level of indirection if the stack value can be read.
state.stack.insert_at_byte_index(
DataDomain::from_target(
AbstractIdentifier::mock("mock_fn", "r0", 4),
bitvec!("0x5:4").into(),
),
-8,
);
let value = state.eval_param_location(
&AbstractLocation::mock("sp:4", &[-8, 2, 6], 4),
&global_memory,
);
assert_eq!(
value,
DataDomain::from_target(
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::mock("r0:4", &[7, 6], 4)
),
bitvec!("0x0:4").into()
)
);
}
use super::State;
use super::POINTER_RECURSION_DEPTH_LIMIT;
use crate::abstract_domain::*;
use crate::intermediate_representation::*;
impl State {
/// Load the value at the given address.
///
/// Only values on the stack and in registers are tracked directly.
/// For all other values abstract location strings are generated
/// that track how the pointer to the value is computed.
///
/// This function does not set any access flags for input IDs in the address value.
pub fn load_value(
&mut self,
address: DataDomain<BitvectorDomain>,
size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> {
let mut loaded_value = DataDomain::new_empty(size);
for (id, offset) in address.get_relative_values() {
loaded_value = loaded_value.merge(&self.load_value_via_id_and_offset(id, offset, size));
}
if let Some(global_address) = address.get_absolute_value() {
loaded_value =
loaded_value.merge(&self.load_global_address(global_address, size, global_memory));
}
if address.contains_top() {
loaded_value.set_contains_top_flag();
}
loaded_value
}
/// Load the value whose position is given by derefencing the given ID and then adding an offset.
///
/// If the ID is the stack then this function actually loads the value at the given stack position.
/// Otherwise it only generates the abstract location of the value and returns it as a relative value.
fn load_value_via_id_and_offset(
&mut self,
id: &AbstractIdentifier,
offset: &BitvectorDomain,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
if *id == self.stack_id {
// Try to load a value from the stack (which may generate a new stack parameter)
match offset.try_to_bitvec() {
Ok(stack_offset) => self.load_value_from_stack(stack_offset, size),
Err(_) => DataDomain::new_top(size),
}
} else if let (true, Ok(constant_offset)) = (
id.get_location().recursion_depth() < POINTER_RECURSION_DEPTH_LIMIT,
offset.try_to_offset(),
) {
// Extend the abstract location string
let new_id = AbstractIdentifier::new(
id.get_tid().clone(),
id.get_location()
.clone()
.dereferenced(size, self.stack_id.bytesize())
.with_offset_addendum(constant_offset),
);
DataDomain::from_target(new_id, Bitvector::zero(size.into()).into())
} else {
// The abstract location string cannot be extended
DataDomain::new_top(size)
}
}
/// Load a value from the global address space.
/// If the address is located in writeable global memory then generate a new abstract ID for the value
/// and return a value relative to the new ID.
fn load_global_address(
&mut self,
global_address: &BitvectorDomain,
size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> {
if let (Ok(offset), Some(global_mem)) = (global_address.try_to_bitvec(), global_memory) {
match global_mem.read(&offset, size) {
Ok(Some(value)) => value.into(),
Ok(None) => {
let address = global_address.try_to_offset().unwrap() as u64;
let global_mem_location = AbstractLocation::GlobalAddress { address, size };
let global_mem_id = AbstractIdentifier::new(
self.get_current_function_tid().clone(),
global_mem_location,
);
DataDomain::from_target(global_mem_id, Bitvector::zero(size.into()).into())
}
Err(_) => DataDomain::new_top(size),
}
} else {
DataDomain::new_top(size)
}
}
/// Load the value at the given stack offset.
/// If the offset is non-negative a corresponding stack parameter is generated if necessary.
pub fn load_value_from_stack(
&mut self,
stack_offset: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
if !stack_offset.sign_bit().to_bool() {
// Stack offset is nonnegative, i.e. this is a stack parameter access.
self.get_stack_param(stack_offset, size)
} else {
self.stack.get(stack_offset, size)
}
}
/// Load a value of unknown bytesize at the given stack offset.
/// If the offset is non-negative, a corresponding stack parameter is generated if necessary.
///
/// One must be careful to not rely on the correctness of the bytesize of the returned value!
/// If the size of the value cannot be guessed from the contents of the stack,
/// then a size of 1 byte is assumed, which will be wrong in general!
pub fn load_unsized_value_from_stack(
&mut self,
offset: Bitvector,
) -> DataDomain<BitvectorDomain> {
if !offset.sign_bit().to_bool() {
// This is a pointer to a stack parameter of the current function
self.stack
.get_unsized(offset.clone())
.unwrap_or_else(|| self.get_stack_param(offset, ByteSize::new(1)))
} else {
self.stack
.get_unsized(offset)
.unwrap_or_else(|| DataDomain::new_top(ByteSize::new(1)))
}
}
/// If `address` is a stack offset, then write `value` onto the stack.
///
/// If address points to a stack parameter, whose ID does not yet exists,
/// then the ID is generated and added to the tracked IDs.
///
/// This function does not set any access flags for input IDs of the given address or value.
pub fn write_value(
&mut self,
address: DataDomain<BitvectorDomain>,
value: DataDomain<BitvectorDomain>,
) {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
if !stack_offset.sign_bit().to_bool() {
// We generate a new stack parameter object, but do not set any access flags,
// since the stack parameter is not accessed but overwritten.
let _ = self
.generate_stack_param_id_if_nonexistent(stack_offset.clone(), value.bytesize());
}
self.stack.add(value, stack_offset);
} else if let Some(stack_offset_domain) = address.get_relative_values().get(&self.stack_id)
{
if let Ok(stack_offset) = stack_offset_domain.try_to_bitvec() {
if !stack_offset.sign_bit().to_bool() {
// We generate a new stack parameter object, but do not set any access flags,
// since the stack parameter is not accessed but overwritten.
let _ = self.generate_stack_param_id_if_nonexistent(
stack_offset.clone(),
value.bytesize(),
);
}
let previous_value = self.stack.get(stack_offset.clone(), value.bytesize());
self.stack.add(previous_value.merge(&value), stack_offset);
} else {
self.stack.mark_all_values_as_top();
}
}
}
/// Get the value located at a positive stack offset.
/// This function panics if the address is a negative offset.
///
/// If no corresponding stack parameter ID exists for the value,
/// generate it and then return it as an unmodified stack parameter.
/// Otherwise just read the value at the given stack address.
fn get_stack_param(
&mut self,
address: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
assert!(!address.sign_bit().to_bool());
if let Some(param_id) = self.generate_stack_param_id_if_nonexistent(address.clone(), size) {
let stack_param =
DataDomain::from_target(param_id, Bitvector::zero(size.into()).into());
self.stack.add(stack_param.clone(), address);
stack_param
} else {
self.stack.get(address, size)
}
}
/// If the address is an exactly known pointer to the stack with a constant offset, then return the offset.
pub fn get_offset_if_exact_stack_pointer(
&self,
address: &DataDomain<BitvectorDomain>,
) -> Option<Bitvector> {
if let Some((target, offset)) = address.get_if_unique_target() {
if *target == self.stack_id {
return offset.try_to_bitvec().ok();
}
}
None
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::{bitvec, variable};
/// Mock an abstract ID representing the stack.
fn mock_stack_id() -> AbstractIdentifier {
AbstractIdentifier::from_var(Tid::new("mock_fn"), &variable!("sp:4"))
}
/// Mock an abstract ID of a stack parameter
fn mock_stack_param_id(offset: i64, size: u64) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::from_stack_position(
mock_stack_id().unwrap_register(),
offset,
ByteSize::new(size),
),
)
}
#[test]
fn test_get_offset_if_exact_stack_pointer() {
let state = State::mock_arm32();
let stack_pointer =
DataDomain::from_target(mock_stack_id(), Bitvector::from_i32(-10).into());
assert_eq!(
state.get_offset_if_exact_stack_pointer(&stack_pointer),
Some(Bitvector::from_i32(-10))
);
}
#[test]
fn test_get_stack_param() {
// Reading a previously non-existing stack parameter
let mut state = State::mock_arm32();
let stack_param = state.get_stack_param(bitvec!("0xc:4"), ByteSize::new(8));
let expected_stack_id = AbstractIdentifier::mock_nested("mock_fn", "sp:4", &[12], 8);
let expected_value =
DataDomain::from_target(expected_stack_id.clone(), bitvec!("0x0:8").into());
assert_eq!(&stack_param, &expected_value);
assert!(state.tracked_ids.contains_key(&expected_stack_id));
// Reading the stack parameter again. The position should still contain the stack parameter.
let stack_param = state.get_stack_param(bitvec!("0xc:4"), ByteSize::new(8));
assert_eq!(&stack_param, &expected_value);
// Reading the stack parameter after it has been overwritten with a value.
state
.stack
.insert_at_byte_index(bitvec!("0x2a:8").into(), 12);
let value = state.get_stack_param(bitvec!("0xc:4"), ByteSize::new(8));
assert_eq!(value, bitvec!("0x2a:8").into());
}
#[test]
fn test_store_and_load_from_stack() {
let mut state = State::mock_arm32();
let address = DataDomain::from_target(mock_stack_id(), bitvec!("-4:4").into());
let value: DataDomain<BitvectorDomain> = bitvec!("0x0:4").into();
// write and load a value to the current stack frame
state.write_value(address.clone(), value.clone());
assert_eq!(state.stack.iter().len(), 1);
assert_eq!(
state.stack.get(bitvec!("-4:4"), ByteSize::new(4)),
value.clone()
);
assert_eq!(state.load_value(address, ByteSize::new(4), None), value);
// Load a parameter register and check that the parameter gets generated
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x4:4").into());
let stack_param_id = mock_stack_param_id(4, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
assert_eq!(state.tracked_ids.iter().len(), 6);
assert_eq!(
state.load_value(address.clone(), ByteSize::new(4), None),
stack_param
);
assert_eq!(state.tracked_ids.iter().len(), 7);
assert_eq!(
state
.tracked_ids
.get(&stack_param_id)
.unwrap()
.is_accessed(),
false
); // The load method does not set access flags.
}
#[test]
fn test_load_unsized_from_stack() {
let mut state = State::mock_arm32();
// Load an existing stack param (generated by a sized load at the same address)
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x0:4").into());
let stack_param_id = mock_stack_param_id(0, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
state.load_value(address, ByteSize::new(4), None);
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x0:4").into());
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Load a non-existing stack param
let stack_param_id = mock_stack_param_id(4, 1);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:1").into());
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x4:4"));
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Unsized load from the current stack frame
let unsized_load = state.load_unsized_value_from_stack(bitvec!("-4:4"));
assert_eq!(unsized_load, DataDomain::new_top(ByteSize::new(1)));
}
#[test]
fn test_load_nested_pointers() {
let mut state = State::mock_arm32();
let global_memory = RuntimeMemoryImage::mock();
let parent_id = AbstractIdentifier::mock_nested("mock_fn", "r0:4", &[4], 4);
let pointer = DataDomain::from_target(parent_id.clone(), bitvec!("0x8:4").into());
let loaded_value = state.load_value(pointer, ByteSize::new(4), Some(&global_memory));
let expected_id = AbstractIdentifier::mock_nested("mock_fn", "r0:4", &[4, 8], 4);
let expected_value = DataDomain::from_target(expected_id.clone(), bitvec!("0x0:4").into());
assert_eq!(loaded_value, expected_value);
}
}
use std::collections::BTreeMap; use super::AccessPattern;
use std::collections::BTreeSet; use super::POINTER_RECURSION_DEPTH_LIMIT;
use crate::abstract_domain::*; use crate::abstract_domain::*;
use crate::intermediate_representation::*; use crate::intermediate_representation::*;
use crate::prelude::*; use crate::prelude::*;
use std::collections::BTreeMap;
use super::AccessPattern; use std::collections::BTreeSet;
/// Methods of [`State`] related to handling call instructions. /// Methods of [`State`] related to handling call instructions.
mod call_handling; mod call_handling;
/// Methods of [`State`] related to handling load and store instructions.
mod memory_handling;
/// The state tracks knowledge about known register values, /// The state tracks knowledge about known register values,
/// known values on the stack, and access patterns of tracked variables. /// known values on the stack, and access patterns of tracked variables.
...@@ -113,90 +114,6 @@ impl State { ...@@ -113,90 +114,6 @@ impl State {
self.stack_id.get_tid() self.stack_id.get_tid()
} }
/// Load the value at the given address.
///
/// Only constant addresses on the stack are tracked.
/// Thus this function will always return a `Top` domain for any address
/// that may not be a stack address with constant offset.
///
/// This function does not set any access flags for input IDs in the address value.
pub fn load_value(
&mut self,
address: DataDomain<BitvectorDomain>,
size: ByteSize,
global_memory: Option<&RuntimeMemoryImage>,
) -> DataDomain<BitvectorDomain> {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
self.load_value_from_stack(stack_offset, size)
} else if let (Ok(global_address), Some(global_mem)) =
(address.try_to_bitvec(), global_memory)
{
if let Ok(Some(value)) = global_mem.read(&global_address, size) {
value.into()
} else {
DataDomain::new_top(size)
}
} else {
DataDomain::new_top(size)
}
}
/// Load the value at the given stack offset.
/// If the offset is non-negative a corresponding stack parameter is generated if necessary.
fn load_value_from_stack(
&mut self,
stack_offset: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
if !stack_offset.sign_bit().to_bool() {
// Stack offset is nonnegative, i.e. this is a stack parameter access.
self.get_stack_param(stack_offset, size)
} else {
self.stack.get(stack_offset, size)
}
}
/// Load a value of unknown bytesize at the given stack offset.
/// If the offset is non-negative, a corresponding stack parameter is generated if necessary.
///
/// One must be careful to not rely on the correctness of the bytesize of the returned value!
/// If the size of the value cannot be guessed from the contents of the stack,
/// then a size of 1 byte is assumed, which will be wrong in general!
fn load_unsized_value_from_stack(&mut self, offset: Bitvector) -> DataDomain<BitvectorDomain> {
if !offset.sign_bit().to_bool() {
// This is a pointer to a stack parameter of the current function
self.stack
.get_unsized(offset.clone())
.unwrap_or_else(|| self.get_stack_param(offset, ByteSize::new(1)))
} else {
self.stack
.get_unsized(offset)
.unwrap_or_else(|| DataDomain::new_top(ByteSize::new(1)))
}
}
/// If `address` is a stack offset, then write `value` onto the stack.
///
/// If address points to a stack parameter, whose ID does not yet exists,
/// then the ID is generated and added to the tracked IDs.
///
/// This function does not set any access flags for input IDs of the given address or value.
pub fn write_value(
&mut self,
address: DataDomain<BitvectorDomain>,
value: DataDomain<BitvectorDomain>,
) {
if let Some(stack_offset) = self.get_offset_if_exact_stack_pointer(&address) {
// We generate a new stack parameter object, but do not set any access flags,
// since the stack parameter is not accessed but overwritten.
if !stack_offset.sign_bit().to_bool() {
let _ = self
.generate_stack_param_id_if_nonexistent(stack_offset.clone(), value.bytesize());
}
self.stack.add(value, stack_offset);
}
}
/// If the stack parameter ID corresponding to the given stack offset does not exist /// If the stack parameter ID corresponding to the given stack offset does not exist
/// then generate it, add it to the list of tracked IDs, and return it. /// then generate it, add it to the list of tracked IDs, and return it.
fn generate_stack_param_id_if_nonexistent( fn generate_stack_param_id_if_nonexistent(
...@@ -228,40 +145,6 @@ impl State { ...@@ -228,40 +145,6 @@ impl State {
} }
} }
/// Get the value located at a positive stack offset.
///
/// If no corresponding stack parameter ID exists for the value,
/// generate it and then return it as an unmodified stack parameter.
/// Otherwise just read the value at the given stack address.
fn get_stack_param(
&mut self,
address: Bitvector,
size: ByteSize,
) -> DataDomain<BitvectorDomain> {
assert!(!address.sign_bit().to_bool());
if let Some(param_id) = self.generate_stack_param_id_if_nonexistent(address.clone(), size) {
let stack_param =
DataDomain::from_target(param_id, Bitvector::zero(size.into()).into());
self.stack.add(stack_param.clone(), address);
stack_param
} else {
self.stack.get(address, size)
}
}
/// If the address is an exactly known pointer to the stack with a constant offset, then return the offset.
pub fn get_offset_if_exact_stack_pointer(
&self,
address: &DataDomain<BitvectorDomain>,
) -> Option<Bitvector> {
if let Some((target, offset)) = address.get_if_unique_target() {
if *target == self.stack_id {
return offset.try_to_bitvec().ok();
}
}
None
}
/// Merges the access pattern of the given abstract identifer in `self` with the provided access pattern. /// Merges the access pattern of the given abstract identifer in `self` with the provided access pattern.
/// ///
/// Does not add the identifier to the list of tracked identifiers if it is not already tracked in `self`. /// Does not add the identifier to the list of tracked identifiers if it is not already tracked in `self`.
...@@ -308,13 +191,56 @@ impl State { ...@@ -308,13 +191,56 @@ impl State {
size, size,
data_type: _, data_type: _,
} => { } => {
self.set_deref_flag_for_input_ids_of_expression(address); self.set_deref_flag_for_pointer_inputs_of_expression(address);
self.set_read_flag_for_input_ids_of_expression(address);
let address = self.eval(address); let address = self.eval(address);
self.load_value(address, *size, None) self.load_value(address, *size, None)
} }
} }
} }
/// Evaluate the value at the given memory location
/// where `value` represents the root pointer relative to which the memory location needs to be computed.
fn eval_mem_location_relative_value(
&mut self,
value: DataDomain<BitvectorDomain>,
mem_location: &AbstractMemoryLocation,
) -> DataDomain<BitvectorDomain> {
let target_size = mem_location.bytesize();
let mut eval_result = DataDomain::new_empty(target_size);
for (id, offset) in value.get_relative_values() {
let mut location = id.get_location().clone();
let mut mem_location = mem_location.clone();
match offset.try_to_offset() {
Ok(concrete_offset) => mem_location.add_offset_at_root(concrete_offset),
Err(_) => {
eval_result.set_contains_top_flag();
continue;
}
};
location.extend(mem_location, self.stack_id.bytesize());
if location.recursion_depth() <= POINTER_RECURSION_DEPTH_LIMIT {
eval_result = eval_result.merge(&DataDomain::from_target(
AbstractIdentifier::new(id.get_tid().clone(), location),
Bitvector::zero(target_size.into()).into(),
));
} else {
eval_result.set_contains_top_flag();
}
}
if value.contains_top() || value.get_absolute_value().is_some() {
eval_result.set_contains_top_flag();
}
eval_result
}
/// Add all relative IDs in `data` to the list of tracked IDs.
pub fn track_contained_ids(&mut self, data: &DataDomain<BitvectorDomain>) {
for id in data.referenced_ids() {
self.add_id_to_tracked_ids(id);
}
}
/// If the given expression is not an [`Expression::Var`] set the read flags /// If the given expression is not an [`Expression::Var`] set the read flags
/// for all IDs that may be referenced when computing the value of the expression. /// for all IDs that may be referenced when computing the value of the expression.
/// ///
...@@ -341,22 +267,34 @@ impl State { ...@@ -341,22 +267,34 @@ impl State {
} }
} }
/// Set the read and dereferenced flag for every tracked ID /// Set the read and dereferenced flag for every tracked pointer ID
/// that may be referenced when computing the value of the expression. /// that may be referenced when computing the value of the given address expression.
pub fn set_deref_flag_for_input_ids_of_expression(&mut self, expression: &Expression) { pub fn set_deref_flag_for_pointer_inputs_of_expression(&mut self, expression: &Expression) {
for register in expression.input_vars() { for register in get_pointer_inputs_vars_of_address_expression(expression) {
self.set_deref_flag_for_contained_ids(&self.get_register(register)); self.set_deref_flag_for_contained_ids(&self.get_register(register));
} }
} }
/// Set the read and mutably dereferenced flag for every tracked ID /// Set the read and mutably dereferenced flag for every tracked pointer ID
/// that may be referenced when computing the value of the expression. /// that may be referenced when computing the value of the given address expression.
pub fn set_mutable_deref_flag_for_input_ids_of_expression(&mut self, expression: &Expression) { pub fn set_mutable_deref_flag_for_pointer_inputs_of_expression(
for register in expression.input_vars() { &mut self,
expression: &Expression,
) {
for register in get_pointer_inputs_vars_of_address_expression(expression) {
self.set_deref_mut_flag_for_contained_ids(&self.get_register(register)); self.set_deref_mut_flag_for_contained_ids(&self.get_register(register));
} }
} }
/// Set the read flag for every tracked ID contained in the given value.
pub fn set_read_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) {
for id in value.referenced_ids() {
if let Some(object) = self.tracked_ids.get_mut(id) {
object.set_read_flag();
}
}
}
/// Set the read and dereferenced flag for every tracked ID contained in the given value. /// Set the read and dereferenced flag for every tracked ID contained in the given value.
pub fn set_deref_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) { pub fn set_deref_flag_for_contained_ids(&mut self, value: &DataDomain<BitvectorDomain>) {
for id in value.referenced_ids() { for id in value.referenced_ids() {
...@@ -413,6 +351,34 @@ impl State { ...@@ -413,6 +351,34 @@ impl State {
} }
} }
/// Get a list of possible pointer input variables for the given address expression.
///
/// Only addition, subtraction and bitwise AND, OR, XOR can have pointers as inputs.
/// All other subexpressions are assumed to only compute offsets.
fn get_pointer_inputs_vars_of_address_expression(expr: &Expression) -> Vec<&Variable> {
let mut input_vars = Vec::new();
match expr {
Expression::BinOp { op, lhs, rhs } => {
match op {
BinOpType::IntAdd | BinOpType::IntAnd | BinOpType::IntXOr | BinOpType::IntOr => {
// There could be a pointer on either of the sides
input_vars.extend(get_pointer_inputs_vars_of_address_expression(lhs));
input_vars.extend(get_pointer_inputs_vars_of_address_expression(rhs));
}
BinOpType::IntSub => {
// Only the left side could be a pointer
input_vars.extend(get_pointer_inputs_vars_of_address_expression(lhs));
}
_ => (),
}
}
Expression::Var(var) => input_vars.push(var),
_ => (),
}
input_vars
}
impl AbstractDomain for State { impl AbstractDomain for State {
/// Merge two states /// Merge two states
fn merge(&self, other: &Self) -> Self { fn merge(&self, other: &Self) -> Self {
...@@ -472,4 +438,4 @@ impl State { ...@@ -472,4 +438,4 @@ impl State {
} }
#[cfg(test)] #[cfg(test)]
mod tests; pub mod tests;
...@@ -26,18 +26,6 @@ fn mock_stack_id() -> AbstractIdentifier { ...@@ -26,18 +26,6 @@ fn mock_stack_id() -> AbstractIdentifier {
AbstractIdentifier::from_var(Tid::new("mock_fn"), &variable!("sp:4")) AbstractIdentifier::from_var(Tid::new("mock_fn"), &variable!("sp:4"))
} }
/// Mock an abstract ID of a stack parameter
fn mock_stack_param_id(offset: i64, size: u64) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("mock_fn"),
AbstractLocation::from_stack_position(
mock_stack_id().unwrap_register(),
offset,
ByteSize::new(size),
),
)
}
#[test] #[test]
fn test_new() { fn test_new() {
let state = State::mock_arm32(); let state = State::mock_arm32();
...@@ -65,61 +53,6 @@ fn test_new() { ...@@ -65,61 +53,6 @@ fn test_new() {
} }
#[test] #[test]
fn test_store_and_load_from_stack() {
let mut state = State::mock_arm32();
let address = DataDomain::from_target(mock_stack_id(), bitvec!("-4:4").into());
let value: DataDomain<BitvectorDomain> = bitvec!("0x0:4").into();
// write and load a value to the current stack frame
state.write_value(address.clone(), value.clone());
assert_eq!(state.stack.iter().len(), 1);
assert_eq!(
state.stack.get(bitvec!("-4:4"), ByteSize::new(4)),
value.clone()
);
assert_eq!(state.load_value(address, ByteSize::new(4), None), value);
// Load a parameter register and check that the parameter gets generated
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x4:4").into());
let stack_param_id = mock_stack_param_id(4, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
assert_eq!(state.tracked_ids.iter().len(), 6);
assert_eq!(
state.load_value(address.clone(), ByteSize::new(4), None),
stack_param
);
assert_eq!(state.tracked_ids.iter().len(), 7);
assert_eq!(
state
.tracked_ids
.get(&stack_param_id)
.unwrap()
.is_accessed(),
false
); // The load method does not set access flags.
}
#[test]
fn test_load_unsized_from_stack() {
let mut state = State::mock_arm32();
// Load an existing stack param (generated by a sized load at the same address)
let address = DataDomain::from_target(mock_stack_id(), bitvec!("0x0:4").into());
let stack_param_id = mock_stack_param_id(0, 4);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:4").into());
state.load_value(address, ByteSize::new(4), None);
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x0:4").into());
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Load a non-existing stack param
let stack_param_id = mock_stack_param_id(4, 1);
let stack_param = DataDomain::from_target(stack_param_id.clone(), bitvec!("0x0:1").into());
let unsized_load = state.load_unsized_value_from_stack(bitvec!("0x4:4"));
assert_eq!(unsized_load, stack_param);
assert!(state.tracked_ids.get(&stack_param_id).is_some());
// Unsized load from the current stack frame
let unsized_load = state.load_unsized_value_from_stack(bitvec!("-4:4"));
assert_eq!(unsized_load, DataDomain::new_top(ByteSize::new(1)));
}
#[test]
fn test_eval() { fn test_eval() {
let mut state = State::mock_arm32(); let mut state = State::mock_arm32();
// Test the eval method // Test the eval method
......
use super::*; use super::*;
use crate::{expr, variable}; use crate::variable;
/// Mock the abstract location of a global parameter.
fn mock_global_x64(address: u64) -> AbstractLocation {
AbstractLocation::GlobalAddress {
address: address,
size: ByteSize::new(8),
}
}
impl FunctionSignature { impl FunctionSignature {
/// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably, /// Create a mock x64 function signature with 2 parameters, one of which is accessed mutably,
/// one mutably accessed global variable at address 0x2000 /// one mutably accessed global variable at address 0x2000
...@@ -7,40 +16,45 @@ impl FunctionSignature { ...@@ -7,40 +16,45 @@ impl FunctionSignature {
pub fn mock_x64() -> FunctionSignature { pub fn mock_x64() -> FunctionSignature {
let mut write_access_pattern = AccessPattern::new(); let mut write_access_pattern = AccessPattern::new();
write_access_pattern.set_unknown_access_flags(); write_access_pattern.set_unknown_access_flags();
let parameters = HashMap::from_iter([ let parameters = BTreeMap::from_iter([
( (
Arg::from_var(variable!("RDI:8"), None), AbstractLocation::from_var(&variable!("RDI:8")).unwrap(),
AccessPattern::new(), AccessPattern::new(),
), ),
( (
Arg::from_var(variable!("RSI:8"), None), AbstractLocation::from_var(&variable!("RSI:8")).unwrap(),
write_access_pattern, write_access_pattern,
), ),
]); ]);
FunctionSignature { FunctionSignature {
parameters, parameters,
global_parameters: HashMap::from([ global_parameters: BTreeMap::from([
(0x2000, AccessPattern::new_unknown_access()), (mock_global_x64(0x2000), AccessPattern::new_unknown_access()),
(0x3000, AccessPattern::new().with_dereference_flag()), (
mock_global_x64(0x3000),
AccessPattern::new().with_dereference_flag(),
),
]), ]),
} }
} }
} }
fn mock_stack_arg(address: Expression, size: u64) -> Arg { fn mock_stack_arg(offset: i64, size: u64) -> AbstractLocation {
Arg::Stack { AbstractLocation::Pointer(
address, variable!("RSP:8"),
size: size.into(), AbstractMemoryLocation::Location {
data_type: None, offset: offset,
} size: ByteSize::new(size),
},
)
} }
#[test] #[test]
fn test_two_parameter_overlapping_merging() { fn test_two_parameter_overlapping_merging() {
let proj = Project::mock_x64(); let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64(); let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 8); let stack_parm_1 = mock_stack_arg(0x1000, 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1004:8"), 8); let stack_parm_2 = mock_stack_arg(0x1004, 8);
func_sig func_sig
.parameters .parameters
...@@ -51,13 +65,10 @@ fn test_two_parameter_overlapping_merging() { ...@@ -51,13 +65,10 @@ fn test_two_parameter_overlapping_merging() {
assert_eq!( assert_eq!(
func_sig.sanitize(&proj), func_sig.sanitize(&proj),
(
vec!["Unexpected stack parameter size".to_string()], vec!["Unexpected stack parameter size".to_string()],
vec!["Merged a stack parameter, that intersect another but is no subset".to_string()]
)
); );
let mut expected_function_sig = FunctionSignature::mock_x64(); let mut expected_function_sig = FunctionSignature::mock_x64();
let expected_stack_arg = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 12); let expected_stack_arg = mock_stack_arg(0x1000, 12);
expected_function_sig expected_function_sig
.parameters .parameters
...@@ -69,10 +80,10 @@ fn test_two_parameter_overlapping_merging() { ...@@ -69,10 +80,10 @@ fn test_two_parameter_overlapping_merging() {
fn test_merging_multiple_parameters() { fn test_merging_multiple_parameters() {
let proj = Project::mock_x64(); let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64(); let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 8); let stack_parm_1 = mock_stack_arg(0x8, 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1000:8"), 1); let stack_parm_2 = mock_stack_arg(0x8, 1);
let stack_parm_3 = mock_stack_arg(expr!("RSP:8 + 0x1007:8"), 1); let stack_parm_3 = mock_stack_arg(0xf, 1);
let stack_parm_4 = mock_stack_arg(expr!("RSP:8 + 0x1008:8"), 8); let stack_parm_4 = mock_stack_arg(0x10, 8);
func_sig.parameters.extend([ func_sig.parameters.extend([
(stack_parm_1.clone(), AccessPattern::new()), (stack_parm_1.clone(), AccessPattern::new()),
...@@ -80,7 +91,8 @@ fn test_merging_multiple_parameters() { ...@@ -80,7 +91,8 @@ fn test_merging_multiple_parameters() {
(stack_parm_3, AccessPattern::new()), (stack_parm_3, AccessPattern::new()),
(stack_parm_4.clone(), AccessPattern::new()), (stack_parm_4.clone(), AccessPattern::new()),
]); ]);
assert_eq!((vec![], vec![]), func_sig.sanitize(&proj)); let logs = func_sig.sanitize(&proj);
assert_eq!(logs, Vec::<String>::new());
let mut expected_function_sig = FunctionSignature::mock_x64(); let mut expected_function_sig = FunctionSignature::mock_x64();
expected_function_sig.parameters.extend([ expected_function_sig.parameters.extend([
...@@ -93,8 +105,8 @@ fn test_merging_multiple_parameters() { ...@@ -93,8 +105,8 @@ fn test_merging_multiple_parameters() {
fn test_log_messages() { fn test_log_messages() {
let proj = Project::mock_x64(); let proj = Project::mock_x64();
let mut func_sig = FunctionSignature::mock_x64(); let mut func_sig = FunctionSignature::mock_x64();
let stack_parm_1 = mock_stack_arg(expr!("RSP:8 + 0x1001:8"), 8); let stack_parm_1 = mock_stack_arg(0x1001, 8);
let stack_parm_2 = mock_stack_arg(expr!("RSP:8 + 0x1007:8"), 4); let stack_parm_2 = mock_stack_arg(0x1007, 4);
func_sig.parameters.extend([ func_sig.parameters.extend([
(stack_parm_1.clone(), AccessPattern::new()), (stack_parm_1.clone(), AccessPattern::new()),
...@@ -103,13 +115,10 @@ fn test_log_messages() { ...@@ -103,13 +115,10 @@ fn test_log_messages() {
let logs = func_sig.sanitize(&proj); let logs = func_sig.sanitize(&proj);
assert_eq!( assert_eq!(
(
vec![ vec![
"Unexpected stack parameter size".to_string(), "Unexpected stack parameter size".to_string(),
"Unexpected stack parameter alignment".to_string() "Unexpected stack parameter alignment".to_string()
], ],
vec!["Merged a stack parameter, that intersect another but is no subset".to_string()]
),
logs logs
); );
} }
...@@ -6,50 +6,52 @@ impl<'a> Context<'a> { ...@@ -6,50 +6,52 @@ impl<'a> Context<'a> {
/// to the value that represents it in the caller. /// to the value that represents it in the caller.
/// ///
/// For parameter IDs this is the value of the parameter on function call. /// For parameter IDs this is the value of the parameter on function call.
/// For IDs of objects created in the callee it is the ID together with a path hint given by the call TID. /// For IDs of objects created in the callee it is the ID itself.
/// For other IDs (including the callee stack frame ID) it is a `Top` value, /// For other IDs (including the callee stack frame ID) it is a `Top` value,
/// i.e. the value of the ID should be unknown to the caller. /// i.e. the value of the ID should be unknown to the caller.
///
/// Note that this function assumes that callee-originating IDs have already been renamed
/// to the name they should represent in the caller beforehand.
pub fn create_callee_id_to_caller_data_map( pub fn create_callee_id_to_caller_data_map(
&self, &self,
state_before_call: &State, state_before_call: &State,
state_before_return: &State, state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> { ) -> BTreeMap<AbstractIdentifier, Data> {
let stack_register = &self.project.stack_pointer_register; let stack_register = &self.project.stack_pointer_register;
let mut id_map = BTreeMap::new(); let mut id_map = BTreeMap::new();
let callee_tid = state_before_return.get_fn_tid(); let callee_tid = state_before_return.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap(); if let Some(callee_fn_sig) = self.fn_signatures.get(callee_tid) {
for param in callee_fn_sig.parameters.keys() { for param in callee_fn_sig.parameters.keys() {
let param_id = AbstractIdentifier::from_arg(callee_tid, param); let param_id = AbstractIdentifier::new(callee_tid.clone(), param.clone());
if let Ok(param_value) = let param_value = state_before_call
state_before_call.eval_parameter_arg(param, &self.project.runtime_memory_image) .eval_abstract_location(param, &self.project.runtime_memory_image);
{
id_map.insert(param_id, param_value); id_map.insert(param_id, param_value);
} else { }
id_map.insert(param_id, Data::new_top(param.bytesize())); for global_param in callee_fn_sig.global_parameters.keys() {
let global_param_id =
AbstractIdentifier::new(callee_tid.clone(), global_param.clone());
let global_param_value = state_before_call
.eval_abstract_location(global_param, &self.project.runtime_memory_image);
id_map.insert(global_param_id, global_param_value);
} }
} }
for object_id in state_before_return.memory.get_all_object_ids() { for object_id in state_before_return.memory.get_all_object_ids() {
if object_id.get_tid() != callee_tid || !object_id.get_path_hints().is_empty() { if object_id.get_tid() != callee_tid || !object_id.get_path_hints().is_empty() {
// Object is neither a parameter object nor the stack frame of the callee. // Object is neither a parameter object nor the stack frame of the callee.
if let Ok(new_object_id) = object_id.with_path_hint(call_tid.clone()) {
id_map.insert( id_map.insert(
object_id, object_id.clone(),
Data::from_target( Data::from_target(
new_object_id, object_id,
Bitvector::zero(stack_register.size.into()).into(), Bitvector::zero(stack_register.size.into()).into(),
), ),
); );
} else {
id_map.insert(object_id, Data::new_top(stack_register.size));
}
} }
} }
id_map.insert( id_map.insert(
state_before_return.stack_id.clone(), state_before_return.stack_id.clone(),
Data::new_top(stack_register.size), Data::new_top(stack_register.size),
); );
// Also insert the global memory IDs to the map. // Also insert the global memory ID to the map.
id_map.insert( id_map.insert(
state_before_return.get_global_mem_id(), state_before_return.get_global_mem_id(),
Data::from_target( Data::from_target(
...@@ -61,6 +63,55 @@ impl<'a> Context<'a> { ...@@ -61,6 +63,55 @@ impl<'a> Context<'a> {
id_map id_map
} }
/// Create a map that maps callee IDs to the value assigned to it in the caller after a return instruction.
///
/// This is *not* the map used in the internal `update_return` handling.
/// Instead, the created map combines several ID renaming steps used internally into one renaming map.
/// The map is intended for use in other analyses depending on the PointerInference,
/// but not in the PointerInference itself.
pub fn create_full_callee_id_to_caller_data_map(
&self,
state_before_call: &State,
state_before_return: &State,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let cconv = &self.project.program.term.subs[state_before_return.get_fn_tid()]
.term
.calling_convention;
let cconv = match self.project.get_specific_calling_convention(cconv) {
Some(cconv) => cconv,
None => {
return BTreeMap::new();
}
};
let callee_fn_sig = self
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
let mut minimized_return_state = state_before_return.clone();
minimized_return_state.minimize_before_return_instruction(callee_fn_sig, cconv);
let mut location_to_data_map =
minimized_return_state.map_abstract_locations_to_pointer_data(call_tid);
minimized_return_state.filter_location_to_pointer_data_map(&mut location_to_data_map);
let mut replacement_map =
minimized_return_state.get_id_to_unified_ids_replacement_map(&location_to_data_map);
minimized_return_state.merge_mem_objects_with_unique_abstract_location(call_tid);
let unified_to_caller_replacement_map =
self.create_callee_id_to_caller_data_map(state_before_call, &minimized_return_state);
// In the ID-to-unified-ID map replace parameter IDs with their corresponding values in the caller.
for value in replacement_map.values_mut() {
value.replace_all_ids(&unified_to_caller_replacement_map);
}
// Add all parameter IDs to the map
let callee_tid = state_before_return.get_fn_tid();
for (id, value) in unified_to_caller_replacement_map {
if id.get_tid() == callee_tid && id.get_path_hints().is_empty() {
replacement_map.insert(id, value);
}
}
replacement_map
}
/// Create a map from the parameter IDs (of the function that the given state corresponds to) /// Create a map from the parameter IDs (of the function that the given state corresponds to)
/// to the corresponding access patterns. /// to the corresponding access patterns.
pub fn create_id_to_access_pattern_map( pub fn create_id_to_access_pattern_map(
...@@ -71,7 +122,11 @@ impl<'a> Context<'a> { ...@@ -71,7 +122,11 @@ impl<'a> Context<'a> {
let fn_tid = state.get_fn_tid(); let fn_tid = state.get_fn_tid();
let callee_fn_sig = self.fn_signatures.get(fn_tid).unwrap(); let callee_fn_sig = self.fn_signatures.get(fn_tid).unwrap();
for (param, access_pattern) in &callee_fn_sig.parameters { for (param, access_pattern) in &callee_fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(fn_tid, param); let param_id = AbstractIdentifier::new(fn_tid.clone(), param.clone());
id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
}
for (param, access_pattern) in &callee_fn_sig.global_parameters {
let param_id = AbstractIdentifier::new(fn_tid.clone(), param.clone());
id_to_access_pattern_map.insert(param_id.clone(), access_pattern); id_to_access_pattern_map.insert(param_id.clone(), access_pattern);
} }
......
...@@ -97,6 +97,18 @@ impl<'a> Context<'a> { ...@@ -97,6 +97,18 @@ impl<'a> Context<'a> {
} }
} }
/// If `result` is an `Err`, log the error message as an error message through the `log_collector` channel.
pub fn log_error(&self, result: Result<(), Error>, location: Option<&Tid>) {
if let Err(err) = result {
let mut log_message =
LogMessage::new_error(format!("{err}")).source("Pointer Inference");
if let Some(loc) = location {
log_message = log_message.location(loc.clone());
};
let _ = self.log_collector.send(LogThreadMsg::Log(log_message));
}
}
/// Detect and log if the stack pointer is not as expected when returning from a function. /// Detect and log if the stack pointer is not as expected when returning from a function.
fn detect_stack_pointer_information_loss_on_return( fn detect_stack_pointer_information_loss_on_return(
&self, &self,
...@@ -299,7 +311,7 @@ impl<'a> Context<'a> { ...@@ -299,7 +311,7 @@ impl<'a> Context<'a> {
/// Merge global memory data from the callee global memory object to the caller global memory object /// Merge global memory data from the callee global memory object to the caller global memory object
/// if the corresponding global variable is marked as mutable in both the caller and callee. /// if the corresponding global variable is marked as mutable in both the caller and callee.
fn merge_global_mem_from_callee( fn merge_non_nested_global_mem_from_callee(
&self, &self,
caller_state: &mut State, caller_state: &mut State,
callee_global_mem: &AbstractObject, callee_global_mem: &AbstractObject,
...@@ -360,23 +372,47 @@ fn compute_call_return_global_var_access_intervals( ...@@ -360,23 +372,47 @@ fn compute_call_return_global_var_access_intervals(
caller_fn_sig: &FunctionSignature, caller_fn_sig: &FunctionSignature,
callee_fn_sig: &FunctionSignature, callee_fn_sig: &FunctionSignature,
) -> BTreeMap<u64, AccessPattern> { ) -> BTreeMap<u64, AccessPattern> {
let caller_mut_indices: BTreeSet<u64> = caller_fn_sig
.global_parameters
.iter()
.filter_map(|(location, access_pattern)| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
if access_pattern.is_mutably_dereferenced() {
return Some(*address);
}
}
None
})
.collect();
let callee_mut_indices: BTreeSet<u64> = callee_fn_sig
.global_parameters
.iter()
.filter_map(|(location, access_pattern)| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
if access_pattern.is_mutably_dereferenced() {
return Some(*address);
}
}
None
})
.collect();
let mut intervals: BTreeMap<u64, AccessPattern> = caller_fn_sig let mut intervals: BTreeMap<u64, AccessPattern> = caller_fn_sig
.global_parameters .global_parameters
.keys() .keys()
.chain(callee_fn_sig.global_parameters.keys()) .chain(callee_fn_sig.global_parameters.keys())
.map(|index| (*index, AccessPattern::new())) .filter_map(|location| {
if let AbstractLocation::GlobalAddress { address, .. } = location {
Some((*address, AccessPattern::new()))
} else {
None
}
})
.collect(); .collect();
for (index, access_pattern) in intervals.iter_mut() { for (index, access_pattern) in intervals.iter_mut() {
if let (Some(caller_pattern), Some(callee_pattern)) = ( if caller_mut_indices.contains(index) && callee_mut_indices.contains(index) {
caller_fn_sig.global_parameters.get(index),
callee_fn_sig.global_parameters.get(index),
) {
if caller_pattern.is_mutably_dereferenced() && callee_pattern.is_mutably_dereferenced()
{
access_pattern.set_mutably_dereferenced_flag(); access_pattern.set_mutably_dereferenced_flag();
} }
} }
}
intervals intervals
} }
......
...@@ -173,6 +173,12 @@ fn update_return() { ...@@ -173,6 +173,12 @@ fn update_return() {
&variable!("RDX:8"), &variable!("RDX:8"),
Data::from_target(new_id("callee", "RDI"), bv(0)), Data::from_target(new_id("callee", "RDI"), bv(0)),
); );
state_before_return
.memory
.get_object_mut(&callee_created_heap_id)
.unwrap()
.set_value(bitvec!("0x42:8").into(), &bitvec!("0x6:8").into())
.unwrap();
let state_before_call = State::new(&variable!("RSP:8"), Tid::new("caller"), BTreeSet::new()); let state_before_call = State::new(&variable!("RSP:8"), Tid::new("caller"), BTreeSet::new());
let mut state_before_call = context let mut state_before_call = context
...@@ -210,10 +216,8 @@ fn update_return() { ...@@ -210,10 +216,8 @@ fn update_return() {
assert_eq!( assert_eq!(
state.get_register(&variable!("RAX:8")), state.get_register(&variable!("RAX:8")),
Data::from_target( Data::from_target(
callee_created_heap_id AbstractIdentifier::mock("call_callee", "RAX", 8),
.with_path_hint(Tid::new("call_callee")) bv(0).into()
.unwrap(),
bv(16).into()
) )
); );
assert_eq!( assert_eq!(
...@@ -234,15 +238,12 @@ fn update_return() { ...@@ -234,15 +238,12 @@ fn update_return() {
.get_all_object_ids() .get_all_object_ids()
.get(&param_obj_id) .get(&param_obj_id)
.is_some()); .is_some());
assert!(state let value = state
.memory .memory
.get_all_object_ids() .get_object(&AbstractIdentifier::mock("call_callee", "RAX", 8))
.get(
&callee_created_heap_id
.with_path_hint(Tid::new("call_callee"))
.unwrap() .unwrap()
) .get_value(bitvec!("0x-a:8"), ByteSize::new(8));
.is_some()); assert_eq!(value, bitvec!("0x42:8").into());
} }
#[test] #[test]
...@@ -297,6 +298,20 @@ fn get_unsound_caller_ids() { ...@@ -297,6 +298,20 @@ fn get_unsound_caller_ids() {
new_id("callee", "RSI"), new_id("callee", "RSI"),
Data::from_target(new_id("caller", "RAX"), bv(2).into()), Data::from_target(new_id("caller", "RAX"), bv(2).into()),
); );
callee_id_to_caller_data_map.insert(
AbstractIdentifier::new(
Tid::new("callee"),
AbstractLocation::mock_global(0x2000, &[], 8),
),
bv(42).into(),
);
callee_id_to_caller_data_map.insert(
AbstractIdentifier::new(
Tid::new("callee"),
AbstractLocation::mock_global(0x3000, &[], 8),
),
bv(42).into(),
);
let callee_tid = Tid::new("callee"); let callee_tid = Tid::new("callee");
let callee_state = State::from_fn_sig( let callee_state = State::from_fn_sig(
...@@ -395,7 +410,7 @@ fn test_merge_global_mem_from_callee() { ...@@ -395,7 +410,7 @@ fn test_merge_global_mem_from_callee() {
Data::from_target(caller_state.get_global_mem_id(), bitvec!("0:8").into()), Data::from_target(caller_state.get_global_mem_id(), bitvec!("0:8").into()),
)]); )]);
context.merge_global_mem_from_callee( context.merge_non_nested_global_mem_from_callee(
&mut caller_state, &mut caller_state,
callee_global_mem, callee_global_mem,
&replacement_map, &replacement_map,
......
...@@ -124,6 +124,20 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -124,6 +124,20 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
return None; return None;
} }
}; };
let callee_fn_sig = match self.fn_signatures.get(state_before_return.get_fn_tid()) {
Some(fn_sig) => fn_sig,
None => {
let location = state_before_return.get_fn_tid();
self.log_error(
Err(anyhow!(
"Internal function {} has no function signature.",
location
)),
Some(location),
);
return None;
}
};
// Detect possible information loss on the stack pointer and report it. // Detect possible information loss on the stack pointer and report it.
if let Err(err) = self.detect_stack_pointer_information_loss_on_return(state_before_return) if let Err(err) = self.detect_stack_pointer_information_loss_on_return(state_before_return)
...@@ -133,19 +147,19 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -133,19 +147,19 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
// or a call to a non-returning extern function that was not marked as non-returning. // or a call to a non-returning extern function that was not marked as non-returning.
return None; return None;
} }
// Minimize the callee state and replace callee-originating object IDs whenever possible.
let mut state_before_return = state_before_return.clone();
state_before_return.minimize_before_return_instruction(callee_fn_sig, cconv);
state_before_return.merge_mem_objects_with_unique_abstract_location(&call_term.tid);
// Create a mapping of IDs from the callee to IDs that should be used in the caller. // Create a mapping of IDs from the callee to IDs that should be used in the caller.
let id_map = self.create_callee_id_to_caller_data_map( let id_map =
state_before_call, self.create_callee_id_to_caller_data_map(state_before_call, &state_before_return);
state_before_return,
&call_term.tid,
);
let callee_id_to_access_pattern_map = let callee_id_to_access_pattern_map =
self.create_id_to_access_pattern_map(state_before_return); self.create_id_to_access_pattern_map(&state_before_return);
// Identify caller IDs for which the callee analysis may be unsound for this callsite. // Identify caller IDs for which the callee analysis may be unsound for this callsite.
let unsound_caller_ids = let unsound_caller_ids =
self.get_unsound_caller_ids(&id_map, &callee_id_to_access_pattern_map); self.get_unsound_caller_ids(&id_map, &callee_id_to_access_pattern_map);
// TODO: Unsound caller IDs occur too often to log the cases right now. // FIXME: Unsound caller IDs occur too often to log the cases right now.
// We have to investigate the reasons for it (maybe too many parameters on the caller stack?) // We have to investigate the reasons for it (maybe too many parameters on the caller stack?)
// and find better heuristics to prevent them poisoning the analysis soundness. // and find better heuristics to prevent them poisoning the analysis soundness.
...@@ -167,11 +181,7 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -167,11 +181,7 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
continue; continue;
} }
if *callee_object_id == state_before_return.get_global_mem_id() { if *callee_object_id == state_before_return.get_global_mem_id() {
let callee_fn_sig = self self.merge_non_nested_global_mem_from_callee(
.fn_signatures
.get(state_before_return.get_fn_tid())
.unwrap();
self.merge_global_mem_from_callee(
&mut state_after_return, &mut state_after_return,
callee_object, callee_object,
&id_map, &id_map,
...@@ -196,11 +206,9 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -196,11 +206,9 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
.is_none() .is_none()
{ {
// Add a callee object that does not correspond to a parameter to the caller or the stack of the callee. // Add a callee object that does not correspond to a parameter to the caller or the stack of the callee.
if let Ok(new_object_id) = callee_object_id.with_path_hint(call_term.tid.clone()) {
state_after_return state_after_return
.memory .memory
.insert(new_object_id, callee_object); .insert(callee_object_id.clone(), callee_object);
}
} else { } else {
// The callee object is a parameter object. // The callee object is a parameter object.
self.log_debug( self.log_debug(
...@@ -217,7 +225,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -217,7 +225,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
state_after_return state_after_return
.memory .memory
.assume_arbitrary_writes_to_object(id, &BTreeSet::new()); .assume_arbitrary_writes_to_object(id, &BTreeSet::new());
// TODO: We should specify more possible reference targets.
} }
// Cleanup // Cleanup
state_after_return.remove_unreferenced_objects(); state_after_return.remove_unreferenced_objects();
......
...@@ -49,6 +49,8 @@ pub use state::State; ...@@ -49,6 +49,8 @@ pub use state::State;
/// The version number of the analysis. /// The version number of the analysis.
const VERSION: &str = "0.2"; const VERSION: &str = "0.2";
/// The recursion limit for nested pointers.
const POINTER_RECURSION_DEPTH_LIMIT: u64 = 2;
/// The name and version number of the "Memory" CWE check. /// The name and version number of the "Memory" CWE check.
pub static CWE_MODULE: crate::CweModule = crate::CweModule { pub static CWE_MODULE: crate::CweModule = crate::CweModule {
...@@ -315,7 +317,7 @@ impl<'a> PointerInference<'a> { ...@@ -315,7 +317,7 @@ impl<'a> PointerInference<'a> {
}) => (state_before_call, state_before_return), }) => (state_before_call, state_before_return),
_ => continue, _ => continue,
}; };
let id_to_data_map = context.create_callee_id_to_caller_data_map( let id_to_data_map = context.create_full_callee_id_to_caller_data_map(
state_before_call, state_before_call,
state_before_return, state_before_return,
call_tid, call_tid,
......
...@@ -87,11 +87,23 @@ impl AbstractObject { ...@@ -87,11 +87,23 @@ impl AbstractObject {
inner.is_unique = false; inner.is_unique = false;
} }
/// Mark the abstract object as unique, i.e. it represents exactly one memory object.
pub fn mark_as_unique(&mut self) {
let inner = Arc::make_mut(&mut self.inner);
inner.is_unique = true;
}
/// Get the type of the memory object. /// Get the type of the memory object.
pub fn get_object_type(&self) -> Option<ObjectType> { pub fn get_object_type(&self) -> Option<ObjectType> {
self.inner.type_ self.inner.type_
} }
/// Set the type of the memory object.
pub fn set_object_type(&mut self, object_type: Option<ObjectType>) {
let inner = Arc::make_mut(&mut self.inner);
inner.type_ = object_type;
}
/// Overwrite the values in `self` with those in `other` /// Overwrite the values in `self` with those in `other`
/// under the assumption that the zero offset in `other` corresponds to the offset `offset_other` in `self`. /// under the assumption that the zero offset in `other` corresponds to the offset `offset_other` in `self`.
/// ///
......
...@@ -5,7 +5,6 @@ use super::*; ...@@ -5,7 +5,6 @@ use super::*;
impl AbstractObjectList { impl AbstractObjectList {
/// Get a reference to the object corresponding to the given ID. /// Get a reference to the object corresponding to the given ID.
#[cfg(test)]
pub fn get_object(&self, id: &AbstractIdentifier) -> Option<&AbstractObject> { pub fn get_object(&self, id: &AbstractIdentifier) -> Option<&AbstractObject> {
self.objects.get(id) self.objects.get(id)
} }
...@@ -64,6 +63,11 @@ impl AbstractObjectList { ...@@ -64,6 +63,11 @@ impl AbstractObjectList {
self.objects.iter() self.objects.iter()
} }
/// Get an iterator of mutable references over the abstract objects in `self`.
pub fn iter_objects_mut(&mut self) -> impl Iterator<Item = &mut AbstractObject> {
self.objects.values_mut()
}
/// Get the number of objects that are currently tracked. /// Get the number of objects that are currently tracked.
#[cfg(test)] #[cfg(test)]
pub fn get_num_objects(&self) -> usize { pub fn get_num_objects(&self) -> usize {
......
...@@ -137,6 +137,26 @@ impl AbstractObjectList { ...@@ -137,6 +137,26 @@ impl AbstractObjectList {
None => Err(anyhow!("Object ID not contained in object list.")), None => Err(anyhow!("Object ID not contained in object list.")),
} }
} }
/// Only retain those memory objects for which the provided predicate returns `true`.
/// All memory objects for which the predicate returns `False` are removed from `self`.
pub fn retain<F>(&mut self, f: F)
where
F: FnMut(&AbstractIdentifier, &mut AbstractObject) -> bool,
{
self.objects.retain(f)
}
/// Remove an object from the object list.
/// Returns the removed object if its ID was indeed contained in the object list.
pub fn remove(&mut self, id: &AbstractIdentifier) -> Option<AbstractObject> {
self.objects.remove(id)
}
/// Return `true` if the object list contains a memory object indexed by the given ID.
pub fn contains(&self, id: &AbstractIdentifier) -> bool {
self.objects.contains_key(id)
}
} }
impl AbstractDomain for AbstractObjectList { impl AbstractDomain for AbstractObjectList {
......
...@@ -78,7 +78,7 @@ impl State { ...@@ -78,7 +78,7 @@ impl State {
self.write_to_address(address, &self.eval(value), global_memory) self.write_to_address(address, &self.eval(value), global_memory)
} }
/// Evaluate the given load instruction and return the data read on success. /// Evaluate the given address expression and return the data read from that address on success.
pub fn load_value( pub fn load_value(
&self, &self,
address: &Expression, address: &Expression,
...@@ -86,6 +86,17 @@ impl State { ...@@ -86,6 +86,17 @@ impl State {
global_memory: &RuntimeMemoryImage, global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> { ) -> Result<Data, Error> {
let address = self.eval(address); let address = self.eval(address);
self.load_value_from_address(&address, size, global_memory)
}
/// Load the value at the given address from the state and return the data read on success.
/// If the address contains more than one possible pointer target the results are merged for all possible pointer targets.
pub fn load_value_from_address(
&self,
address: &Data,
size: ByteSize,
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
let mut result = if let Some(global_address) = address.get_absolute_value() { let mut result = if let Some(global_address) = address.get_absolute_value() {
if let Ok(address_bitvector) = global_address.try_to_bitvec() { if let Ok(address_bitvector) = global_address.try_to_bitvec() {
match global_memory.read(&address_bitvector, size) { match global_memory.read(&address_bitvector, size) {
...@@ -109,7 +120,7 @@ impl State { ...@@ -109,7 +120,7 @@ impl State {
} else { } else {
Data::new_empty(size) Data::new_empty(size)
}; };
result = result.merge(&self.memory.get_value(&address, size)); result = result.merge(&self.memory.get_value(address, size));
if let Ok(offset) = result.try_to_offset() { if let Ok(offset) = result.try_to_offset() {
if result.bytesize() == self.stack_id.bytesize() if result.bytesize() == self.stack_id.bytesize()
...@@ -217,6 +228,81 @@ impl State { ...@@ -217,6 +228,81 @@ impl State {
} }
} }
/// Evaluate the value of the given abstract location on the current state.
/// If the actual value cannot be determined (e.g. if an intermediate pointer returns `Top`)
/// then a `Top` value is returned.
pub fn eval_abstract_location(
&self,
location: &AbstractLocation,
global_memory: &RuntimeMemoryImage,
) -> Data {
match location {
AbstractLocation::GlobalAddress { address, size } => {
assert_eq!(*size, self.stack_id.bytesize());
Data::from_target(
self.get_global_mem_id().clone(),
Bitvector::from_u64(*address)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
)
}
AbstractLocation::GlobalPointer(address, nested_location) => {
let pointer = Data::from_target(
self.get_global_mem_id().clone(),
Bitvector::from_u64(*address)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
self.eval_abstract_memory_location(nested_location, pointer, global_memory)
}
AbstractLocation::Register(var) => self.get_register(var),
AbstractLocation::Pointer(var, nested_location) => {
let pointer = self.get_register(var);
self.eval_abstract_memory_location(nested_location, pointer, global_memory)
}
}
}
/// Evaluate the value of the given abstract memory location on the current state
/// with the given `root_pointer` as the start point of the location description.
fn eval_abstract_memory_location(
&self,
location: &AbstractMemoryLocation,
root_pointer: Data,
global_memory: &RuntimeMemoryImage,
) -> Data {
match location {
AbstractMemoryLocation::Location { offset, size } => {
let pointer = root_pointer.add_offset(
&Bitvector::from_i64(*offset)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
self.load_value_from_address(&pointer, *size, global_memory)
.unwrap_or_else(|_| Data::new_top(*size))
}
AbstractMemoryLocation::Pointer { offset, target } => {
let pointer = root_pointer.add_offset(
&Bitvector::from_i64(*offset)
.into_resize_unsigned(self.stack_id.bytesize())
.into(),
);
match self.load_value_from_address(
&pointer,
self.stack_id.bytesize(),
global_memory,
) {
Ok(nested_root_pointer) => self.eval_abstract_memory_location(
target,
nested_root_pointer,
global_memory,
),
Err(_) => Data::new_top(location.bytesize()),
}
}
}
}
/// Check whether the given `def` could result in a memory access through a NULL pointer. /// Check whether the given `def` could result in a memory access through a NULL pointer.
/// ///
/// If no NULL pointer dereference is detected then `Ok(false)` is returned. /// If no NULL pointer dereference is detected then `Ok(false)` is returned.
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
use super::*; use super::*;
use crate::analysis::pointer_inference::object::AbstractObject; use crate::analysis::pointer_inference::object::AbstractObject;
use crate::analysis::pointer_inference::POINTER_RECURSION_DEPTH_LIMIT;
impl State { impl State {
/// Search (recursively) through all memory objects referenced by the given IDs /// Search (recursively) through all memory objects referenced by the given IDs
...@@ -89,4 +90,390 @@ impl State { ...@@ -89,4 +90,390 @@ impl State {
} }
Ok(()) Ok(())
} }
/// Create an ID renaming map that maps IDs in `self` to the values representing them
/// after unifying and renaming non-parameter objects in `self` in preparation of returning to a caller.
pub fn get_id_to_unified_ids_replacement_map(
&self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
) -> BTreeMap<AbstractIdentifier, Data> {
let mut id_replacement_map = BTreeMap::new();
for (unified_id, value) in location_to_data_map.iter() {
for (old_id, offset) in value.get_relative_values() {
if old_id.get_tid() != self.get_fn_tid() || !old_id.get_path_hints().is_empty() {
let mut pointer_to_unified_id =
Data::from_target(unified_id.clone(), offset.un_op(UnOpType::Int2Comp));
pointer_to_unified_id.set_contains_top_flag();
id_replacement_map.insert(old_id.clone(), pointer_to_unified_id);
}
}
}
for value in self.register.values() {
for id in value.get_relative_values().keys() {
if id.get_tid() == self.get_fn_tid() && id.get_path_hints().is_empty() {
// This is a parameter ID
id_replacement_map.insert(
id.clone(),
Data::from_target(id.clone(), Bitvector::zero(id.bytesize().into()).into()),
);
}
}
}
for object_id in self.memory.get_all_object_ids() {
for id in self.memory.get_referenced_ids_overapproximation(&object_id) {
if id.get_tid() == self.get_fn_tid() && id.get_path_hints().is_empty() {
// This is a parameter ID
id_replacement_map.insert(
id.clone(),
Data::from_target(id.clone(), Bitvector::zero(id.bytesize().into()).into()),
);
}
}
}
id_replacement_map
}
/// Replace all IDs pointing to non-parameter objects.
/// - IDs contained in the values of the location to data map are replaced by the corresponding key (with adjusted offset).
/// But the Top flag is also set, because the pointers may point to other objects.
/// - All other non-parameter IDs are replaced with Top.
pub fn replace_ids_to_non_parameter_objects(
&mut self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
) {
let id_replacement_map = self.get_id_to_unified_ids_replacement_map(location_to_data_map);
// Now use the replacement map to replace IDs
for value in self.register.values_mut() {
value.replace_all_ids(&id_replacement_map);
}
for object in self.memory.iter_objects_mut() {
object.replace_ids(&id_replacement_map);
}
// Clean up registers left as Top after the replacement
self.register.retain(|_var, value| !value.is_top());
}
/// Explicitly insert pointers to unified objects at the locations specified by their abstract location.
///
/// Note that these are the only locations where we (by definition) know
/// that the pointer is unique, i.e. we do not have to set a Top flag.
/// However, we still have to add targets to parameter objects, absolute values or the `Top` flag
/// to the pointer if the original pointer value contained them,
/// because these targets were not merged to the unified object.
pub fn insert_pointers_to_unified_objects(
&mut self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
call_tid: &Tid,
) {
for (unified_id, old_value) in location_to_data_map.iter() {
// Compute the pointer (which may also contain pointers to parameter objects and absolute values).
let mut pointer_to_unified_object = Data::from_target(
unified_id.clone(),
Bitvector::zero(unified_id.bytesize().into()).into(),
);
for (old_id, old_offset) in old_value.get_relative_values() {
if old_id.get_tid() == self.get_fn_tid() && old_id.get_path_hints().is_empty() {
pointer_to_unified_object = pointer_to_unified_object
.merge(&Data::from_target(old_id.clone(), old_offset.clone()));
}
}
pointer_to_unified_object.set_absolute_value(old_value.get_absolute_value().cloned());
if old_value.contains_top() {
pointer_to_unified_object.set_contains_top_flag()
}
// Insert the pointer at the corresponding abstract location
match unified_id.get_location() {
AbstractLocation::Register(var) => {
self.set_register(var, pointer_to_unified_object)
}
unified_location => {
let (parent_location, offset_in_parent_object) = unified_location
.get_parent_location(self.stack_id.bytesize())
.unwrap();
let parent_tid = if unified_id.get_tid() == call_tid {
call_tid.clone()
} else {
// We know that the parent is a parameter object, since we cannot track nested pointers in parameter objects.
self.stack_id.get_tid().clone()
};
let parent_object = self
.memory
.get_object_mut(&AbstractIdentifier::new(parent_tid, parent_location))
.unwrap();
parent_object
.set_value(
pointer_to_unified_object,
&Bitvector::from_i64(offset_in_parent_object)
.into_resize_signed(self.stack_id.bytesize())
.into(),
)
.unwrap();
}
}
}
}
/// Merge the target objects that are non-parameter objects for the given location to data mapping.
/// Return the results as a location to memory object map.
///
/// This function is a step in the process of unifying callee-originating memory objects on a return instruction.
/// The memory objects are also marked as unique, because they will represent a unique object in the caller.
pub fn generate_target_objects_for_new_locations(
&self,
location_to_data_map: &BTreeMap<AbstractIdentifier, Data>,
) -> BTreeMap<AbstractIdentifier, AbstractObject> {
let mut location_to_object_map: BTreeMap<AbstractIdentifier, AbstractObject> =
BTreeMap::new();
for (location_id, value) in location_to_data_map {
let mut new_object: Option<AbstractObject> = None;
'target_loop: for (target_id, target_offset) in value.get_relative_values() {
if (target_id.get_tid() == self.get_fn_tid()
&& target_id.get_path_hints().is_empty())
|| !self.memory.contains(target_id)
{
continue 'target_loop;
}
let target_offset = match target_offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => {
match &mut new_object {
Some(object) => object.assume_arbitrary_writes(&BTreeSet::new()),
None => {
new_object =
Some(AbstractObject::new(None, self.stack_id.bytesize()))
}
}
continue 'target_loop;
}
};
let target_object = self.memory.get_object(target_id).unwrap();
let mut target_object = target_object.clone();
target_object
.add_offset_to_all_indices(&Bitvector::from_i64(-target_offset).into());
match &mut new_object {
None => new_object = Some(target_object),
Some(object) => *object = object.merge(&target_object),
}
}
let mut new_object =
new_object.unwrap_or_else(|| AbstractObject::new(None, self.stack_id.bytesize()));
new_object.mark_as_unique();
new_object.set_object_type(None);
location_to_object_map.insert(location_id.clone(), new_object);
}
location_to_object_map
}
/// Filter out those locations from the location to pointer data map
/// whose non-parameter object targets intersect with any of the other locations.
///
/// Note that this does not filter out locations whose targets contain the `Top` flag,
/// despite the fact that these locations theoretically may point to the same non-parameter object.
/// I.e. we trade soundness in the general case for exactness in the common case here.
pub fn filter_location_to_pointer_data_map(
&self,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
) {
let mut visited_targets = HashSet::new();
let mut non_unique_targets = HashSet::new();
for value in location_to_data_map.values() {
for id in value.get_relative_values().keys() {
if id.get_tid() != self.get_fn_tid() && self.memory.contains(id) {
if visited_targets.contains(id) {
non_unique_targets.insert(id.clone());
} else {
visited_targets.insert(id.clone());
}
}
}
}
let mut filtered_out_ids = HashSet::new();
location_to_data_map.retain(|location_id, value| {
for id in value.get_relative_values().keys() {
if non_unique_targets.contains(id) {
filtered_out_ids.insert(location_id.clone());
return false;
}
}
true
});
// Also filter out those locations whose parent locations were filtered out.
location_to_data_map.retain(|location, _| {
if location.get_tid().has_id_suffix("_param") {
return true;
}
for parent in location
.get_location()
.get_all_parent_locations(self.stack_id.bytesize())
{
let parent_id = AbstractIdentifier::new(location.get_tid().clone(), parent);
if filtered_out_ids.contains(&parent_id) {
return false;
}
}
true
});
}
/// Add abstract locations based on register values to the location to pointer data map.
/// The TID for the corresponding abstract IDs is the given `call_tid`.
///
/// This function assumes that `self` has already been minimized
/// and thus all non-parameter register values have been removed from the state.
fn add_register_based_root_locations_to_location_to_pointer_data_map(
&self,
call_tid: &Tid,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
) {
for (var, value) in self.register.iter() {
if !var.is_temp && self.contains_non_param_pointer(value) {
let location = AbstractLocation::from_var(var).unwrap();
let id = AbstractIdentifier::new(call_tid.clone(), location);
location_to_data_map.insert(id.clone(), value.clone());
}
}
}
/// Add abstract locations based on parameter objects to the location to pointer data map.
/// The TID for the corresponding abstract IDs is the given `call_tid` with a `_param` suffix.
///
/// The TID suffix is necessary to prevent naming collisions with locations based on return registers.
///
/// This function assumes that the stack memory object of `self` has already been deleted by a call to
/// [`State::minimize_before_return_instruction`](crate::analysis::pointer_inference::State::minimize_before_return_instruction).
fn add_param_based_root_locations_to_location_to_pointer_data_map(
&self,
call_tid: &Tid,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
) {
for (object_id, object) in self.memory.iter() {
if object_id.get_tid() == self.get_fn_tid()
&& object_id.get_path_hints().is_empty()
&& object_id.get_location().recursion_depth() < POINTER_RECURSION_DEPTH_LIMIT
{
for (index, value) in object.get_mem_region().iter() {
if self.contains_non_param_pointer(value) {
let location = object_id
.get_location()
.clone()
.dereferenced(value.bytesize(), self.stack_id.bytesize())
.with_offset_addendum(*index);
location_to_data_map.insert(
AbstractIdentifier::new(
call_tid.clone().with_id_suffix("_param"),
location,
),
value.clone(),
);
}
}
}
}
}
/// Derive nested locations from the given list of locations to derive
/// and add them to the location to pointer data map.
fn add_derived_locations_to_location_to_pointer_data_map(
&self,
location_to_data_map: &mut BTreeMap<AbstractIdentifier, Data>,
mut locations_to_derive: BTreeMap<AbstractIdentifier, Data>,
) {
while let Some((location_id, location_data)) = locations_to_derive.pop_first() {
if location_id.get_location().recursion_depth() >= POINTER_RECURSION_DEPTH_LIMIT {
continue;
}
'data_target_loop: for (object_id, object_offset) in location_data.get_relative_values()
{
if object_id.get_tid() == self.get_fn_tid() && object_id.get_path_hints().is_empty()
{
// Ignore parameter objects
continue 'data_target_loop;
}
let object_offset = match object_offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => continue 'data_target_loop,
};
let mem_object = match self.memory.get_object(object_id) {
Some(object) => object,
None => continue 'data_target_loop,
};
for (elem_offset, elem_data) in mem_object.get_mem_region().iter() {
if self.contains_non_param_pointer(elem_data) {
// We want to create a new abstract location for this element.
// But the same abstract location may already exist, so we may have to merge values instead.
let new_location_offset = *elem_offset - object_offset; // TODO: Check correctness of this offset!
let new_location = location_id
.get_location()
.clone()
.dereferenced(elem_data.bytesize(), self.stack_id.bytesize())
.with_offset_addendum(new_location_offset);
let new_location_id =
AbstractIdentifier::new(location_id.get_tid().clone(), new_location);
let new_location_data = elem_data.clone();
location_to_data_map
.entry(new_location_id.clone())
.and_modify(|loc_data| *loc_data = loc_data.merge(&new_location_data))
.or_insert(new_location_data.clone());
locations_to_derive
.entry(new_location_id.clone())
.and_modify(|loc_data| *loc_data = loc_data.merge(&new_location_data))
.or_insert(new_location_data);
}
}
}
}
}
/// Generate a map from abstract locations pointing to non-parameter memory objects
/// to the data represented by the abstract location in the current state.
///
/// The abstract locations get different TIDs depending on the root of the location:
/// - If the root is a return register, then the TID is given by the provided `call_tid`.
/// - If the root is a parameter memory object, then the TID is given by appending the suffix `_param` to the `call_tid`.
/// Since parameter and return register can overlap, the abstract IDs would overlap
/// if one would use the same TID in both cases.
///
/// For return register based location this function also generates nested abstract locations.
///
/// This function assumes that
/// [`State::minimize_before_return_instruction`](crate::analysis::pointer_inference::State::minimize_before_return_instruction)
/// has been called on `self` beforehand.
pub fn map_abstract_locations_to_pointer_data(
&self,
call_tid: &Tid,
) -> BTreeMap<AbstractIdentifier, Data> {
let mut location_to_data_map = BTreeMap::new();
self.add_register_based_root_locations_to_location_to_pointer_data_map(
call_tid,
&mut location_to_data_map,
);
let locations_to_derive = location_to_data_map.clone();
self.add_param_based_root_locations_to_location_to_pointer_data_map(
call_tid,
&mut location_to_data_map,
);
// Add derived locations based on return register locations.
// FIXME: We cannot add derived locations based on parameter objects,
// because the location and ID of their parent objects would be ambiguous
// between parameter objects and other derived locations.
self.add_derived_locations_to_location_to_pointer_data_map(
&mut location_to_data_map,
locations_to_derive,
);
location_to_data_map
}
/// Returns `true` if the value contains at least one reference to a non-parameter
/// (and non-stack) memory object tracked by the current state.
fn contains_non_param_pointer(&self, value: &Data) -> bool {
for id in value.referenced_ids() {
if (id.get_tid() != self.get_fn_tid() || !id.get_path_hints().is_empty())
&& self.memory.contains(id)
{
return true;
}
}
false
}
} }
use super::object::AbstractObject;
use super::object_list::AbstractObjectList; use super::object_list::AbstractObjectList;
use super::Data; use super::Data;
use crate::abstract_domain::*; use crate::abstract_domain::*;
use crate::analysis::function_signature::AccessPattern;
use crate::analysis::function_signature::FunctionSignature; use crate::analysis::function_signature::FunctionSignature;
use crate::intermediate_representation::*; use crate::intermediate_representation::*;
use crate::prelude::*; use crate::prelude::*;
use std::collections::HashSet;
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::sync::Arc; use std::sync::Arc;
...@@ -67,36 +70,118 @@ impl State { ...@@ -67,36 +70,118 @@ impl State {
stack_register: &Variable, stack_register: &Variable,
function_tid: Tid, function_tid: Tid,
) -> State { ) -> State {
let global_addresses = fn_sig.global_parameters.keys().cloned().collect(); let global_addresses = fn_sig
.global_parameters
.keys()
.map(|location| match location {
AbstractLocation::GlobalAddress { address, .. }
| AbstractLocation::GlobalPointer(address, _) => *address,
_ => panic!("Unexpected non-global parameter"),
})
.collect();
let mock_global_memory = RuntimeMemoryImage::empty(true); let mock_global_memory = RuntimeMemoryImage::empty(true);
let mut state = State::new(stack_register, function_tid.clone(), global_addresses); let mut state = State::new(stack_register, function_tid.clone(), global_addresses);
// Set parameter values and create parameter memory objects. // Set parameter values and create parameter memory objects.
for (arg, access_pattern) in &fn_sig.parameters { for params in sort_params_by_recursion_depth(&fn_sig.parameters).values() {
let param_id = AbstractIdentifier::from_arg(&function_tid, arg); for (param_location, access_pattern) in params {
match arg { state.add_param(param_location, access_pattern, &mock_global_memory);
Arg::Register { }
expr: Expression::Var(var), }
.. for (recursion_depth, params) in sort_params_by_recursion_depth(&fn_sig.global_parameters) {
} => state.set_register( if recursion_depth > 0 {
for (param_location, access_pattern) in params {
state.add_param(param_location, access_pattern, &mock_global_memory);
}
}
}
state
}
/// Add the given parameter to the function start state represented by `self`:
/// For the given parameter location, add a parameter object if it was dereferenced (according to the access pattern)
/// and write the pointer to the parameter object to the corresponding existing memory object of `self`.
///
/// This function assumes that the parent memory object of `param` already exists if `param` is a nested parameter.
fn add_param(
&mut self,
param: &AbstractLocation,
access_pattern: &AccessPattern,
global_memory: &RuntimeMemoryImage,
) {
let param_id = AbstractIdentifier::new(self.stack_id.get_tid().clone(), param.clone());
if !matches!(param, AbstractLocation::GlobalAddress { .. })
&& access_pattern.is_dereferenced()
{
self.memory
.add_abstract_object(param_id.clone(), self.stack_id.bytesize(), None);
}
match param {
AbstractLocation::Register(var) => {
self.set_register(
var, var,
Data::from_target(param_id.clone(), Bitvector::zero(var.size.into()).into()), Data::from_target(param_id, Bitvector::zero(param.bytesize().into()).into()),
);
}
AbstractLocation::Pointer(_, _) => {
let (parent_location, offset) =
param.get_parent_location(self.stack_id.bytesize()).unwrap();
let parent_id =
AbstractIdentifier::new(self.stack_id.get_tid().clone(), parent_location);
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_i64(offset)
.into_resize_signed(self.stack_id.bytesize())
.into(),
), ),
Arg::Register { .. } => continue, // Parameters in floating point registers are currently ignored. &Data::from_target(
Arg::Stack { address, size, .. } => { param_id.clone(),
let param_data = Bitvector::zero(param_id.bytesize().into()).into(),
Data::from_target(param_id.clone(), Bitvector::zero((*size).into()).into()); ),
state global_memory,
.write_to_address(address, &param_data, &mock_global_memory) )
.unwrap(); .unwrap();
} }
AbstractLocation::GlobalAddress { .. } => (),
AbstractLocation::GlobalPointer(_, _) => {
let (parent_location, offset) =
param.get_parent_location(self.stack_id.bytesize()).unwrap();
if let AbstractLocation::GlobalAddress { address, size: _ } = parent_location {
let parent_id = self.get_global_mem_id();
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_u64(address + offset as u64)
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
&Data::from_target(
param_id.clone(),
Bitvector::zero(param_id.bytesize().into()).into(),
),
global_memory,
)
.unwrap();
} else {
let parent_id =
AbstractIdentifier::new(self.stack_id.get_tid().clone(), parent_location);
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_i64(offset)
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
&Data::from_target(
param_id.clone(),
Bitvector::zero(param_id.bytesize().into()).into(),
),
global_memory,
)
.unwrap();
} }
if access_pattern.is_dereferenced() {
state
.memory
.add_abstract_object(param_id, stack_register.size, None);
} }
} }
state
} }
/// Set the MIPS link register `t9` to the address of the callee TID. /// Set the MIPS link register `t9` to the address of the callee TID.
...@@ -124,6 +209,89 @@ impl State { ...@@ -124,6 +209,89 @@ impl State {
Ok(()) Ok(())
} }
/// Remove all objects and registers from the state whose contents will not be used after returning to a caller.
///
/// All remaining memory objects after the minimization are reachable in the caller
/// either via a parameter object that may have been mutated in the call
/// or via a return register.
pub fn minimize_before_return_instruction(
&mut self,
fn_sig: &FunctionSignature,
cconv: &CallingConvention,
) {
self.clear_non_return_register(cconv);
self.remove_immutable_parameter_objects(fn_sig);
self.memory.remove(&self.stack_id);
self.remove_unreferenced_objects();
}
/// Remove all parameter objects (including global parameter objects) that are not marked as mutably accessed.
/// Used to minimize state before a return instruction.
fn remove_immutable_parameter_objects(&mut self, fn_sig: &FunctionSignature) {
let current_fn_tid = self.get_fn_tid().clone();
self.memory.retain(|object_id, _object| {
if *object_id.get_tid() == current_fn_tid && object_id.get_path_hints().is_empty() {
if let Some(access_pattern) = fn_sig.parameters.get(object_id.get_location()) {
if !access_pattern.is_mutably_dereferenced() {
return false;
}
}
if let Some(access_pattern) = fn_sig.global_parameters.get(object_id.get_location())
{
if !access_pattern.is_mutably_dereferenced() {
return false;
}
}
}
true
});
}
/// Clear all non-return registers from the state, including all virtual registers.
/// This function is used to minimize the state before a return instruction.
fn clear_non_return_register(&mut self, cconv: &CallingConvention) {
let return_register: HashSet<Variable> = cconv
.get_all_return_register()
.into_iter()
.cloned()
.collect();
self.register
.retain(|var, _value| return_register.contains(var));
}
/// Try to determine unique pointer locations for non-parameter memory objects.
/// When successful, merge all referenced non-parameter objects for that location
/// and replace the pointer with a pointer to the merged object.
///
/// The merged objects get new abstract IDs generated from the call TID and their abstract location in the state.
///
/// This function leaves pointers to parameter objects as is,
/// while pointers to non-parameter objects, that were not merged (e.g. due to pointers being not unique) are replaced with `Top`.
pub fn merge_mem_objects_with_unique_abstract_location(&mut self, call_tid: &Tid) {
let mut location_to_data_map = self.map_abstract_locations_to_pointer_data(call_tid);
self.filter_location_to_pointer_data_map(&mut location_to_data_map);
let location_to_object_map =
self.generate_target_objects_for_new_locations(&location_to_data_map);
self.replace_unified_mem_objects(location_to_object_map);
self.replace_ids_to_non_parameter_objects(&location_to_data_map);
self.insert_pointers_to_unified_objects(&location_to_data_map, call_tid);
}
/// Remove all memory objects corresponding to non-parameter IDs.
/// Afterwards, add the memory objects in the location to object map to the state.
fn replace_unified_mem_objects(
&mut self,
location_to_object_map: BTreeMap<AbstractIdentifier, AbstractObject>,
) {
let current_fn_tid = self.get_fn_tid().clone();
self.memory.retain(|object_id, _| {
*object_id.get_tid() == current_fn_tid && object_id.get_path_hints().is_empty()
});
for (id, object) in location_to_object_map {
self.memory.insert(id, object);
}
}
/// Clear all non-callee-saved registers from the state. /// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers. /// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names. /// The parameter is a list of callee-saved register names.
...@@ -260,5 +428,21 @@ impl State { ...@@ -260,5 +428,21 @@ impl State {
} }
} }
/// Sort parameters by recursion depth.
/// Helper function when one has to iterate over parameters in order of their recursion depth.
fn sort_params_by_recursion_depth(
params: &BTreeMap<AbstractLocation, AccessPattern>,
) -> BTreeMap<u64, BTreeMap<&AbstractLocation, &AccessPattern>> {
let mut sorted_params = BTreeMap::new();
for (param, access_pattern) in params {
let recursion_depth = param.recursion_depth();
let bucket = sorted_params
.entry(recursion_depth)
.or_insert(BTreeMap::new());
bucket.insert(param, access_pattern);
}
sorted_params
}
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
use super::*;
#[test]
fn handle_store() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP");
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(0))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 32:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-32))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 + -8:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-40))
);
state
.handle_store(&expr!("RSP:8 + 8:8"), &expr!("1:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 - 8:8"), &expr!("2:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 + -16:8"), &expr!("3:8"), &global_memory)
.unwrap();
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 4:8"));
assert_eq!(
state
.load_value(&expr!("RSP:8 + 12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(1).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 - 4:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(2).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 + -12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(3).into()
);
}
#[test]
fn global_mem_access() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(
&variable!("RSP:8"),
Tid::new("func_tid"),
BTreeSet::from([0x2000]),
);
// global read-only address
let address_expr = expr!("0x1000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("0xb3b2b1b0:4").into() // note that we read in little-endian byte order
);
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
// global writeable address
let address_expr = expr!("0x2000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
DataDomain::new_top(ByteSize::new(4))
);
assert!(state
.write_to_address(&address_expr, &bitvec!("21:4").into(), &global_memory)
.is_ok());
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("21:4").into()
);
// invalid global address
let address_expr = expr!("0x3456:8");
assert!(state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.is_err());
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
}
#[test]
fn test_eval_abstract_location() {
let mut state = State::new(&variable!("RSP:8"), Tid::new("fn_tid"), BTreeSet::new());
let global_memory = RuntimeMemoryImage::mock();
let object_id = AbstractIdentifier::mock("fn_tid", "RSI", 8);
state
.memory
.add_abstract_object(object_id.clone(), ByteSize::new(8), None);
state
.memory
.get_object_mut(&state.stack_id)
.unwrap()
.set_value(
Data::from_target(object_id.clone(), bitvec!("0x0:8").into()),
&bitvec!("0x-20:8").into(),
)
.unwrap();
state
.memory
.get_object_mut(&object_id)
.unwrap()
.set_value(bitvec!("0x42:8").into(), &bitvec!("0x10:8").into())
.unwrap();
let location = AbstractLocation::mock("RSP:8", &[-32], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(
value,
Data::from_target(object_id.clone(), bitvec!("0x0:8").into())
);
let location = AbstractLocation::mock("RSP:8", &[-32, 16], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(value, bitvec!("0x42:8").into());
// Also test evaluation of a global address
state
.memory
.get_object_mut(&state.get_global_mem_id().clone())
.unwrap()
.set_value(bitvec!("0x43:8").into(), &bitvec!("0x2000:8").into())
.unwrap();
let location = AbstractLocation::mock_global(0x2000, &[0], 8);
let value = state.eval_abstract_location(&location, &global_memory);
assert_eq!(value, bitvec!("0x43:8").into());
}
use super::*;
/// Mock an ARM32 function start state with a function signature that has one mutably dereferenced parameter in r0
/// and mutably dereferenced global parameter at address 0x2000.
/// The function Tid of the state is named `callee`.
fn mock_arm32_fn_start_state() -> (State, FunctionSignature) {
let full_access = AccessPattern::new_unknown_access();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([(AbstractLocation::mock("r0:4", &[], 4), full_access)]),
global_parameters: BTreeMap::from([(
AbstractLocation::mock_global(0x2000, &[], 4),
full_access,
)]),
};
let state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("callee"));
(state, fn_sig)
}
#[test]
fn test_map_abstract_locations_to_pointer_data() {
let call_tid = Tid::new("call");
let global_memory = RuntimeMemoryImage::mock();
let (mut state, _) = mock_arm32_fn_start_state();
let param_id =
AbstractIdentifier::new(Tid::new("callee"), AbstractLocation::mock("r0:4", &[], 4));
let param_pointer = Data::from_target(param_id.clone(), bitvec!("0x2:4").into());
let global_param_pointer = Data::from_target(
state.get_global_mem_id().clone(),
bitvec!("0x2000:4").into(),
);
let callee_orig_id = AbstractIdentifier::new(
Tid::new("inside_callee"),
AbstractLocation::mock("r0:4", &[], 4),
);
let callee_orig_pointer = Data::from_target(callee_orig_id.clone(), bitvec!("0x3:4").into());
let nested_callee_orig_id = AbstractIdentifier::new(
Tid::new("inside_callee"),
AbstractLocation::mock("r0:4", &[0x10], 4),
);
let nested_callee_orig_pointer =
Data::from_target(nested_callee_orig_id.clone(), bitvec!("0x0:4").into());
state
.memory
.add_abstract_object(callee_orig_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(nested_callee_orig_id.clone(), ByteSize::new(4), None);
state
.store_value(&param_pointer, &nested_callee_orig_pointer, &global_memory)
.unwrap();
state
.store_value(
&global_param_pointer,
&nested_callee_orig_pointer,
&global_memory,
)
.unwrap();
state.set_register(&variable!("r0:4"), callee_orig_pointer.clone());
state
.store_value(
&callee_orig_pointer,
&nested_callee_orig_pointer,
&global_memory,
)
.unwrap();
let location_to_data_map = state.map_abstract_locations_to_pointer_data(&call_tid);
let expected_map = BTreeMap::from([
(
AbstractIdentifier::new(
Tid::new("call_param"),
AbstractLocation::mock("r0:4", &[2], 4),
),
nested_callee_orig_pointer.clone(),
),
(
AbstractIdentifier::new(
Tid::new("call_param"),
AbstractLocation::mock_global(0x0, &[0x2000], 4),
),
nested_callee_orig_pointer.clone(),
),
(
AbstractIdentifier::new(Tid::new("call"), AbstractLocation::mock("r0:4", &[], 4)),
callee_orig_pointer.clone(),
),
(
AbstractIdentifier::new(Tid::new("call"), AbstractLocation::mock("r0:4", &[0], 4)),
nested_callee_orig_pointer.clone(),
),
]);
assert_eq!(location_to_data_map, expected_map);
}
#[test]
fn test_filter_location_to_data_map() {
let (mut state, _) = mock_arm32_fn_start_state();
state.memory.add_abstract_object(
AbstractIdentifier::mock("callee_orig", "r0", 4),
ByteSize::new(4),
None,
);
state.memory.add_abstract_object(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
ByteSize::new(4),
None,
);
state.memory.add_abstract_object(
AbstractIdentifier::mock("callee_orig_3", "r0", 4),
ByteSize::new(4),
None,
);
let mut loc_to_data_map = BTreeMap::from([
(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_3", "r0", 4),
bitvec!("0x0:4").into(),
),
])),
),
(
AbstractIdentifier::mock("call", "r1", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
bitvec!("0x0:4").into(),
),
])),
),
(
AbstractIdentifier::mock("call", "r2", 4),
Data::mock_from_target_map(BTreeMap::from([(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
bitvec!("0x0:4").into(),
)])),
),
]);
state.filter_location_to_pointer_data_map(&mut loc_to_data_map);
let expected_map = BTreeMap::from([(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_3", "r0", 4),
bitvec!("0x0:4").into(),
),
])),
)]);
assert_eq!(loc_to_data_map, expected_map);
}
#[test]
fn test_generate_target_objects_for_new_locations() {
let global_memory = RuntimeMemoryImage::mock();
let (mut state, _) = mock_arm32_fn_start_state();
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let callee_orig_id = AbstractIdentifier::mock("callee_orig", "r0", 4);
let callee_orig_2_id = AbstractIdentifier::mock("callee_orig_2", "r0", 4);
state
.memory
.add_abstract_object(callee_orig_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(callee_orig_2_id.clone(), ByteSize::new(4), None);
state
.store_value(
&Data::from_target(param_id.clone(), bitvec!("0x0:4").into()),
&bitvec!("0x42:4").into(),
&global_memory,
)
.unwrap();
state
.store_value(
&Data::from_target(callee_orig_id.clone(), bitvec!("0x4:4").into()),
&bitvec!("0x24:4").into(),
&global_memory,
)
.unwrap();
let loc_to_data_map = BTreeMap::from([(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(param_id.clone(), bitvec!("0x0:4").into()),
(callee_orig_id.clone(), bitvec!("0x0:4").into()),
(callee_orig_2_id.clone(), bitvec!("0x0:4").into()),
])),
)]);
let loc_to_obj_map = state.generate_target_objects_for_new_locations(&loc_to_data_map);
assert_eq!(loc_to_obj_map.len(), 1);
let object = &loc_to_obj_map[&AbstractIdentifier::mock("call", "r0", 4)];
assert_eq!(
object.get_value(bitvec!("0x0:4"), ByteSize::new(4)),
Data::new_top(ByteSize::new(4))
);
let mut merged_value = Data::new_top(ByteSize::new(4));
merged_value.set_absolute_value(Some(bitvec!("0x24:4").into()));
assert_eq!(
object.get_value(bitvec!("0x4:4"), ByteSize::new(4)),
merged_value
);
}
#[test]
fn test_get_id_to_unified_id_replacement_map() {
let cconv = CallingConvention::mock_arm32();
let (mut state, fn_sig) = mock_arm32_fn_start_state();
state.minimize_before_return_instruction(&fn_sig, &cconv);
let location_to_data_map = BTreeMap::from([(
AbstractIdentifier::mock("call", "r0", 4),
Data::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock("callee", "r0", 4),
bitvec!("0x2:4").into(),
),
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
bitvec!("0x3:4").into(),
),
(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
bitvec!("0x4:4").into(),
),
])),
)]);
let id_replacement_map = state.get_id_to_unified_ids_replacement_map(&location_to_data_map);
let merged_id = AbstractIdentifier::mock("call", "r0", 4);
let mut merged_pointer = Data::from_target(merged_id.clone(), bitvec!("0x-3:4").into());
merged_pointer.set_contains_top_flag();
let mut merged_pointer_2 = Data::from_target(merged_id.clone(), bitvec!("0x-4:4").into());
merged_pointer_2.set_contains_top_flag();
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let expected_map = BTreeMap::from([
(
AbstractIdentifier::mock("callee_orig", "r0", 4),
merged_pointer,
),
(
AbstractIdentifier::mock("callee_orig_2", "r0", 4),
merged_pointer_2,
),
(
param_id.clone(),
Data::from_target(param_id, bitvec!("0x0:4").into()),
),
]);
assert_eq!(id_replacement_map, expected_map);
}
#[test]
fn test_insert_pointers_to_unified_objects() {
let call_tid = Tid::new("call");
let (mut state, _) = mock_arm32_fn_start_state();
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let old_callee_orig_id = AbstractIdentifier::mock("instr", "r0", 4);
let old_callee_orig_id_2 = AbstractIdentifier::mock("instr_2", "r0", 4);
let new_id = AbstractIdentifier::mock("call", "r0", 4);
let new_id_2 = AbstractIdentifier::mock_nested("call", "r0:4", &[0], 4);
state
.memory
.add_abstract_object(new_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(new_id_2.clone(), ByteSize::new(4), None);
let location_to_data_map = BTreeMap::from([
(
new_id.clone(),
Data::mock_from_target_map(BTreeMap::from([
(param_id.clone(), bitvec!("0x0:4").into()),
(old_callee_orig_id.clone(), bitvec!("0x0:4").into()),
])),
),
(
new_id_2.clone(),
Data::from_target(old_callee_orig_id_2.clone(), bitvec!("0x0:4").into()),
),
]);
state.insert_pointers_to_unified_objects(&location_to_data_map, &call_tid);
assert_eq!(
state.get_register(&variable!("r0:4")),
Data::mock_from_target_map(BTreeMap::from([
(param_id.clone(), bitvec!("0x0:4").into()),
(new_id.clone(), bitvec!("0x0:4").into()),
]))
);
assert_eq!(
state
.memory
.get_object(&new_id)
.unwrap()
.get_value(bitvec!("0x0:4"), ByteSize::new(4)),
Data::from_target(new_id_2.clone(), bitvec!("0x0:4").into())
);
}
...@@ -3,6 +3,8 @@ use super::*; ...@@ -3,6 +3,8 @@ use super::*;
use crate::analysis::pointer_inference::object::*; use crate::analysis::pointer_inference::object::*;
use crate::{bitvec, def, expr, variable}; use crate::{bitvec, def, expr, variable};
mod access_handling;
mod id_manipulation;
mod specialized_expressions; mod specialized_expressions;
fn bv(value: i64) -> ValueDomain { fn bv(value: i64) -> ValueDomain {
...@@ -81,58 +83,6 @@ fn state() { ...@@ -81,58 +83,6 @@ fn state() {
} }
#[test] #[test]
fn handle_store() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
let stack_id = new_id("time0", "RSP");
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(0))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 32:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-32))
);
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 + -8:8"));
assert_eq!(
state.eval(&expr!("RSP:8")),
Data::from_target(stack_id.clone(), bv(-40))
);
state
.handle_store(&expr!("RSP:8 + 8:8"), &expr!("1:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 - 8:8"), &expr!("2:8"), &global_memory)
.unwrap();
state
.handle_store(&expr!("RSP:8 + -16:8"), &expr!("3:8"), &global_memory)
.unwrap();
state.handle_register_assign(&variable!("RSP:8"), &expr!("RSP:8 - 4:8"));
assert_eq!(
state
.load_value(&expr!("RSP:8 + 12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(1).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 - 4:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(2).into()
);
assert_eq!(
state
.load_value(&expr!("RSP:8 + -12:8"), ByteSize::new(8), &global_memory)
.unwrap(),
bv(3).into()
);
}
#[test]
fn clear_parameters_on_the_stack_on_extern_calls() { fn clear_parameters_on_the_stack_on_extern_calls() {
let global_memory = RuntimeMemoryImage::mock(); let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new()); let mut state = State::new(&variable!("RSP:8"), Tid::new("time0"), BTreeSet::new());
...@@ -226,61 +176,6 @@ fn reachable_ids_under_and_overapproximation() { ...@@ -226,61 +176,6 @@ fn reachable_ids_under_and_overapproximation() {
); );
} }
#[test]
fn global_mem_access() {
let global_memory = RuntimeMemoryImage::mock();
let mut state = State::new(
&variable!("RSP:8"),
Tid::new("func_tid"),
BTreeSet::from([0x2000]),
);
// global read-only address
let address_expr = expr!("0x1000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("0xb3b2b1b0:4").into() // note that we read in little-endian byte order
);
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
// global writeable address
let address_expr = expr!("0x2000:8");
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
DataDomain::new_top(ByteSize::new(4))
);
assert!(state
.write_to_address(&address_expr, &bitvec!("21:4").into(), &global_memory)
.is_ok());
assert_eq!(
state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.unwrap(),
bitvec!("21:4").into()
);
// invalid global address
let address_expr = expr!("0x3456:8");
assert!(state
.load_value(&address_expr, ByteSize::new(4), &global_memory)
.is_err());
assert!(state
.write_to_address(
&address_expr,
&DataDomain::new_top(ByteSize::new(4)),
&global_memory
)
.is_err());
}
/// Test that value specialization does not introduce unintended widening hints. /// Test that value specialization does not introduce unintended widening hints.
/// This is a regression test for cases where pointer comparisons introduced two-sided bounds /// This is a regression test for cases where pointer comparisons introduced two-sided bounds
/// (resulting in two-sided widenings) instead of one-sided bounds. /// (resulting in two-sided widenings) instead of one-sided bounds.
...@@ -350,14 +245,38 @@ fn test_check_def_for_null_dereferences() { ...@@ -350,14 +245,38 @@ fn test_check_def_for_null_dereferences() {
#[test] #[test]
fn from_fn_sig() { fn from_fn_sig() {
let fn_sig = FunctionSignature::mock_x64(); let global_memory = RuntimeMemoryImage::mock();
let full_access = AccessPattern::new_unknown_access();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([
(AbstractLocation::mock("RSI:8", &[], 8), full_access),
(AbstractLocation::mock("RSI:8", &[8], 8), full_access),
(
AbstractLocation::mock("RDI:8", &[], 8),
AccessPattern::new().with_read_flag(),
),
]),
global_parameters: BTreeMap::from([
(AbstractLocation::mock_global(0x2000, &[], 8), full_access),
(AbstractLocation::mock_global(0x2000, &[0], 8), full_access),
]),
};
let state = State::from_fn_sig(&fn_sig, &variable!("RSP:8"), Tid::new("func")); let state = State::from_fn_sig(&fn_sig, &variable!("RSP:8"), Tid::new("func"));
// The state should have 5 objects: The stack, the global memory space and 3 parameter objects.
assert_eq!(state.memory.get_num_objects(), 3);
assert_eq!( assert_eq!(
*state.memory.get_object(&new_id("func", "RSI")).unwrap(), state.memory.get_all_object_ids(),
AbstractObject::new(None, ByteSize::new(8)) BTreeSet::from([
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSP:8", &[], 8)),
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSI:8", &[], 8)),
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSI:8", &[8], 8)),
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock_global(0x0, &[], 8)),
AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock_global(0x2000, &[0], 8)
),
])
); );
// Check that pointers have been correctly added to the state.
assert_eq!( assert_eq!(
state.get_register(&variable!("RSP:8")), state.get_register(&variable!("RSP:8")),
Data::from_target(new_id("func", "RSP"), bv(0).into()) Data::from_target(new_id("func", "RSP"), bv(0).into())
...@@ -370,6 +289,32 @@ fn from_fn_sig() { ...@@ -370,6 +289,32 @@ fn from_fn_sig() {
state.get_register(&variable!("RSI:8")), state.get_register(&variable!("RSI:8")),
Data::from_target(new_id("func", "RSI"), bv(0).into()) Data::from_target(new_id("func", "RSI"), bv(0).into())
); );
assert_eq!(
state.eval_abstract_location(&AbstractLocation::mock("RSI:8", &[8], 8), &global_memory),
Data::from_target(
AbstractIdentifier::new(Tid::new("func"), AbstractLocation::mock("RSI:8", &[8], 8)),
bitvec!("0x0:8").into()
)
);
assert_eq!(
state
.load_value_from_address(
&Data::from_target(
state.get_global_mem_id().clone(),
bitvec!("0x2000:8").into()
),
ByteSize::new(8),
&global_memory
)
.unwrap(),
Data::from_target(
AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock_global(0x2000, &[0], 8)
),
bitvec!("0x0:8").into()
)
);
} }
#[test] #[test]
...@@ -411,3 +356,143 @@ fn add_param_object_from_callee() { ...@@ -411,3 +356,143 @@ fn add_param_object_from_callee() {
assert_eq!(value.get_absolute_value().unwrap(), &bv(2).into()); assert_eq!(value.get_absolute_value().unwrap(), &bv(2).into());
assert!(value.contains_top()); assert!(value.contains_top());
} }
#[test]
fn test_minimize_before_return_instruction() {
let cconv = CallingConvention::mock_arm32();
let full_access = AccessPattern::new_unknown_access();
let deref_access = AccessPattern::new().with_dereference_flag();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([
(AbstractLocation::mock("r0:4", &[], 4), full_access),
(AbstractLocation::mock("r0:4", &[0], 4), deref_access),
(AbstractLocation::mock("r0:4", &[0, 0], 4), full_access),
]),
global_parameters: BTreeMap::from([]),
};
let mut state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("func"));
state.memory.add_abstract_object(
AbstractIdentifier::mock("instr", "r0", 4),
ByteSize::new(4),
None,
);
state.memory.add_abstract_object(
AbstractIdentifier::mock("instr", "r1", 4),
ByteSize::new(4),
None,
);
state.set_register(&variable!("r8:4"), bitvec!("0x42:4").into());
state.set_register(&variable!("r0:4"), bitvec!("0x42:4").into());
state.set_register(
&variable!("r3:4"),
Data::from_target(
AbstractIdentifier::mock("instr", "r0", 4),
bitvec!("0x0:4").into(),
),
);
state.minimize_before_return_instruction(&fn_sig, &cconv);
// non-return registers are cleared, but return registers remain
assert!(state.get_register(&variable!("r8:4")).is_top());
assert!(!state.get_register(&variable!("r3:4")).is_top());
// immutable parameter objects are removed, but mutable parameter objects remain (even if no pointer to them remains)
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock("r0:4", &[], 4)
))
.is_some());
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock("r0:4", &[0], 4)
))
.is_none());
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("func"),
AbstractLocation::mock("r0:4", &[0, 0], 4)
))
.is_some());
// The stack is removed
assert!(state.memory.get_object(&state.stack_id).is_none());
// Unreferenced callee-originating objects are removed, but referenced ones remain
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("instr"),
AbstractLocation::mock("r0:4", &[], 4)
))
.is_some());
assert!(state
.memory
.get_object(&AbstractIdentifier::new(
Tid::new("instr"),
AbstractLocation::mock("r1:4", &[], 4)
))
.is_none());
}
#[test]
fn test_merge_mem_objects_with_unique_abstract_location() {
let call_tid = Tid::new("call");
let global_memory = RuntimeMemoryImage::mock();
let cconv = CallingConvention::mock_arm32();
let full_access = AccessPattern::new_unknown_access();
let fn_sig = FunctionSignature {
parameters: BTreeMap::from([(AbstractLocation::mock("r0:4", &[], 4), full_access)]),
global_parameters: BTreeMap::from([(
AbstractLocation::mock_global(0x2000, &[], 4),
full_access,
)]),
};
let mut state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("callee"));
let param_id = AbstractIdentifier::mock("callee", "r0", 4);
let old_callee_orig_id = AbstractIdentifier::mock("instr", "r0", 4);
let old_callee_orig_id_2 = AbstractIdentifier::mock("instr_2", "r0", 4);
let new_id = AbstractIdentifier::mock_nested("call_param", "r0:4", &[0], 4);
state
.memory
.add_abstract_object(old_callee_orig_id.clone(), ByteSize::new(4), None);
state
.memory
.add_abstract_object(old_callee_orig_id_2.clone(), ByteSize::new(4), None);
// The pointer locations to callee_orig_id_2 will not be unique and thus removed from the state.
state.set_register(
&variable!("r1:4"),
Data::from_target(old_callee_orig_id_2.clone(), bitvec!("0x0:4").into()),
);
state.set_register(
&variable!("r2:4"),
Data::from_target(old_callee_orig_id_2.clone(), bitvec!("0x0:4").into()),
);
// This register should be cleared before computing return objects.
state.set_register(
&variable!("r8:4"),
Data::from_target(old_callee_orig_id.clone(), bitvec!("0x0:4").into()),
);
state
.store_value(
&Data::from_target(param_id.clone(), bitvec!("0x0:4").into()),
&Data::from_target(old_callee_orig_id, bitvec!("0x0:4").into()),
&global_memory,
)
.unwrap();
state.minimize_before_return_instruction(&fn_sig, &cconv);
state.merge_mem_objects_with_unique_abstract_location(&call_tid);
let mut expected_state = State::from_fn_sig(&fn_sig, &variable!("sp:4"), Tid::new("callee"));
expected_state.minimize_before_return_instruction(&fn_sig, &cconv);
expected_state
.memory
.add_abstract_object(new_id.clone(), ByteSize::new(4), None);
expected_state
.store_value(
&Data::from_target(param_id.clone(), bitvec!("0x0:4").into()),
&Data::from_target(new_id, bitvec!("0x0:4").into()),
&global_memory,
)
.unwrap();
assert_eq!(state, expected_state);
}
use super::*; use super::*;
use crate::analysis::vsa_results::VsaResult; use crate::{abstract_domain::AbstractLocation, analysis::vsa_results::VsaResult};
/// Implementation of the [`VsaResult`] trait for providing other analyses with an easy-to-use interface /// Implementation of the [`VsaResult`] trait for providing other analyses with an easy-to-use interface
/// to use the value set and points-to analysis results of the pointer inference. /// to use the value set and points-to analysis results of the pointer inference.
...@@ -30,4 +30,15 @@ impl<'a> VsaResult for PointerInference<'a> { ...@@ -30,4 +30,15 @@ impl<'a> VsaResult for PointerInference<'a> {
.eval_parameter_arg(parameter, &context.project.runtime_memory_image) .eval_parameter_arg(parameter, &context.project.runtime_memory_image)
.ok() .ok()
} }
/// Evaluate the value of the given parameter at the given jump instruction.
fn eval_parameter_location_at_call(
&self,
jmp_tid: &Tid,
parameter: &AbstractLocation,
) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
let context = self.computation.get_context().get_context();
Some(state.eval_abstract_location(parameter, &context.project.runtime_memory_image))
}
} }
...@@ -44,7 +44,7 @@ pub struct Context<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From< ...@@ -44,7 +44,7 @@ pub struct Context<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<
/// The keys are of the form `(Def-TID, Current-Sub-TID)` /// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function. /// to distinguish the nodes for blocks contained in more than one function.
pub block_start_node_map: HashMap<(Tid, Tid), NodeIndex>, pub block_start_node_map: HashMap<(Tid, Tid), NodeIndex>,
/// A set containing a given [`Def`](crate::intermediate_representation::Def) as the first `Def` of the block. /// A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)` /// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function. /// to distinguish the nodes for blocks contained in more than one function.
pub block_first_def_set: HashSet<(Tid, Tid)>, pub block_first_def_set: HashSet<(Tid, Tid)>,
......
//! This module provides the [`VsaResult`] trait //! This module provides the [`VsaResult`] trait
//! which defines an interface for the results of analyses similar to a value set analysis. //! which defines an interface for the results of analyses similar to a value set analysis.
use crate::abstract_domain::AbstractLocation;
use crate::intermediate_representation::{Arg, Expression}; use crate::intermediate_representation::{Arg, Expression};
use crate::prelude::*; use crate::prelude::*;
...@@ -28,6 +29,13 @@ pub trait VsaResult { ...@@ -28,6 +29,13 @@ pub trait VsaResult {
/// Return the value of a parameter at the given jump instruction. /// Return the value of a parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, param: &Arg) -> Option<Self::ValueDomain>; fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, param: &Arg) -> Option<Self::ValueDomain>;
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_location_at_call(
&self,
jmp_tid: &Tid,
param: &AbstractLocation,
) -> Option<Self::ValueDomain>;
/// Evaluate the value of the given expression at the given jump instruction. /// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Self::ValueDomain>; fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Self::ValueDomain>;
} }
...@@ -107,7 +107,7 @@ impl<'a> Context<'a> { ...@@ -107,7 +107,7 @@ impl<'a> Context<'a> {
.function_signatures .function_signatures
.get(id.get_tid()) .get(id.get_tid())
.unwrap() .unwrap()
.get_stack_params_total_size(); .get_stack_params_total_size(&self.project.stack_pointer_register);
replace_if_smaller_bound( replace_if_smaller_bound(
&mut upper_bound, &mut upper_bound,
BoundsMetadata::from_source( BoundsMetadata::from_source(
...@@ -135,6 +135,8 @@ impl<'a> Context<'a> { ...@@ -135,6 +135,8 @@ impl<'a> Context<'a> {
object_id: &AbstractIdentifier, object_id: &AbstractIdentifier,
current_stack_frame_id: &AbstractIdentifier, current_stack_frame_id: &AbstractIdentifier,
) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) { ) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
// FIXME: The malloc-tid-to-object-size-map check does not work anymore,
// because we do not use path hints in the PointerInference anymore.
if self if self
.malloc_tid_to_object_size_map .malloc_tid_to_object_size_map
.contains_key(object_id.get_tid()) .contains_key(object_id.get_tid())
...@@ -153,7 +155,7 @@ impl<'a> Context<'a> { ...@@ -153,7 +155,7 @@ impl<'a> Context<'a> {
.function_signatures .function_signatures
.get(object_id.get_tid()) .get(object_id.get_tid())
.unwrap() .unwrap()
.get_stack_params_total_size(); .get_stack_params_total_size(&self.project.stack_pointer_register);
(None, Some(BoundsMetadata::new(stack_frame_upper_bound))) (None, Some(BoundsMetadata::new(stack_frame_upper_bound)))
} else if object_id.get_tid() == current_stack_frame_id.get_tid() } else if object_id.get_tid() == current_stack_frame_id.get_tid()
&& object_id.get_path_hints().is_empty() && object_id.get_path_hints().is_empty()
......
...@@ -89,6 +89,8 @@ impl<'a> Context<'a> { ...@@ -89,6 +89,8 @@ impl<'a> Context<'a> {
/// then the absolute value is used and unknown origins of the size value are ignored. /// then the absolute value is used and unknown origins of the size value are ignored.
/// If more than one possible absolute value for the size is found then the minimum value for the size is returned. /// If more than one possible absolute value for the size is found then the minimum value for the size is returned.
pub fn compute_size_of_heap_object(&self, object_id: &AbstractIdentifier) -> BitvectorDomain { pub fn compute_size_of_heap_object(&self, object_id: &AbstractIdentifier) -> BitvectorDomain {
// FIXME: We use path hints, which are not longer provided by the PointerInference, to substitute some values.
// We either have to change that or make sure that we provide the path hints ourselves.
if let Some(object_size) = self.malloc_tid_to_object_size_map.get(object_id.get_tid()) { if let Some(object_size) = self.malloc_tid_to_object_size_map.get(object_id.get_tid()) {
let fn_tid_at_malloc_call = self.call_to_caller_fn_map[object_id.get_tid()].clone(); let fn_tid_at_malloc_call = self.call_to_caller_fn_map[object_id.get_tid()].clone();
let object_size = self.recursively_substitute_param_values_context_sensitive( let object_size = self.recursively_substitute_param_values_context_sensitive(
......
...@@ -192,9 +192,10 @@ fn add_param_replacements_for_call( ...@@ -192,9 +192,10 @@ fn add_param_replacements_for_call(
.get(callee_tid) .get(callee_tid)
{ {
for param_arg in fn_sig.parameters.keys() { for param_arg in fn_sig.parameters.keys() {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg) if let Some(param_value) =
vsa_results.eval_parameter_location_at_call(&call.tid, param_arg)
{ {
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg); let param_id = AbstractIdentifier::new(call.tid.clone(), param_arg.clone());
replacement_map.insert(param_id, param_value); replacement_map.insert(param_id, param_value);
} }
} }
......
...@@ -13,10 +13,13 @@ ...@@ -13,10 +13,13 @@
//! //!
//! The check uses the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`) //! The check uses the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`)
//! to check whether any memory accesses may point outside of the bounds of the corresponding memory objects. //! to check whether any memory accesses may point outside of the bounds of the corresponding memory objects.
//! For this the results of the Pointer Inference analysis are aggregated interprocedurally. //! Additionally, the check uses a lightweight dataflow fixpoint computation
//! Additionally, the check uses a lightweight intraprocedural dataflow fixpoint computation
//! to ensure that for each memory object only the first access outside of its bounds is flagged as a CWE. //! to ensure that for each memory object only the first access outside of its bounds is flagged as a CWE.
//! //!
//! Currently, the check is only partially interprocedural.
//! Bounds of parameter objects can be detected, but bounds of memory objects created in called functions
//! (other than the standard allocation functions) will not be detected.
//!
//! ## False Positives //! ## False Positives
//! //!
//! - Any analysis imprecision of the Pointer Inference analysis may lead to false positive results in this check. //! - Any analysis imprecision of the Pointer Inference analysis may lead to false positive results in this check.
...@@ -40,6 +43,20 @@ ...@@ -40,6 +43,20 @@
//! this still may miss buffer overflows occuring in the called function. //! this still may miss buffer overflows occuring in the called function.
//! - Right now the check only considers buffers on the stack or the heap, but not buffers in global memory. //! - Right now the check only considers buffers on the stack or the heap, but not buffers in global memory.
//! Thus corresponding overflows of buffers in global memory are not detected. //! Thus corresponding overflows of buffers in global memory are not detected.
//! - Since the check is only partially interprocedural at the moment,
//! it will miss object sizes of objects created in called functions.
//! For example, if allocations are wrapped in simple wrapper functions,
//! the analysis will miss overflows for corresponding objects, because it cannot determine their object sizes.
// FIXME: The current implementation uses path hints for memory object IDs to determine object sizes interprocedurally.
// But the number of path hint combinations can grow exponentially
// with the call depth to the actual allocation size of a callee-created object.
// This led to state explosion in the PointerInference and thus path hints are not longer provided by the PointerInference.
// But without the path hints that this analysis depended on, the check can only resolve sizes of parameter objects,
// but not of objects returned from called functions (other than the standard allocation functions).
// A future implementation needs a better way to determine object sizes interprocedurally,
// probably depending on several fixpoint computations to circumvent the state explosion problems
// that the old implementation is vulnerable to.
use crate::analysis::pointer_inference::Data; use crate::analysis::pointer_inference::Data;
use crate::prelude::*; use crate::prelude::*;
......
...@@ -37,7 +37,7 @@ impl State { ...@@ -37,7 +37,7 @@ impl State {
}; };
let stack_upper_bound = std::cmp::max( let stack_upper_bound = std::cmp::max(
stack_upper_bound, stack_upper_bound,
function_sig.get_stack_params_total_size(), function_sig.get_stack_params_total_size(&project.stack_pointer_register),
); );
let object_lower_bounds = BTreeMap::from([( let object_lower_bounds = BTreeMap::from([(
stack_id.clone(), stack_id.clone(),
......
...@@ -112,7 +112,7 @@ impl<'a> Context<'a> { ...@@ -112,7 +112,7 @@ impl<'a> Context<'a> {
if access_pattern.is_dereferenced() { if access_pattern.is_dereferenced() {
if let Some(arg_value) = self if let Some(arg_value) = self
.pointer_inference .pointer_inference
.eval_parameter_arg_at_call(call_tid, arg) .eval_parameter_location_at_call(call_tid, arg)
{ {
if let Some(mut warnings) = state.check_address_for_use_after_free(&arg_value) { if let Some(mut warnings) = state.check_address_for_use_after_free(&arg_value) {
warning_causes.append(&mut warnings); warning_causes.append(&mut warnings);
......
...@@ -300,7 +300,7 @@ pub mod tests { ...@@ -300,7 +300,7 @@ pub mod tests {
assert_eq!(processed_warnings.len(), 1); assert_eq!(processed_warnings.len(), 1);
let processed_cwe = processed_warnings.iter().next().unwrap(); let processed_cwe = processed_warnings.iter().next().unwrap();
assert_eq!(&processed_cwe.other[0], &[ assert_eq!(&processed_cwe.other[0], &[
"Accessed ID object_origin_tid(->call_tid) @ RAX may have been freed before at free_tid.".to_string(), "Accessed ID object_origin_tid(->call_tid) @ RAX:i64 may have been freed before at free_tid.".to_string(),
"Relevant callgraph TIDs: [root_func_tid, call_tid]".to_string(), "Relevant callgraph TIDs: [root_func_tid, call_tid]".to_string(),
]); ]);
......
...@@ -31,6 +31,11 @@ impl Tid { ...@@ -31,6 +31,11 @@ impl Tid {
} }
} }
/// Returns true if the ID string ends with the provided suffix.
pub fn has_id_suffix(&self, suffix: &str) -> bool {
self.id.ends_with(suffix)
}
/// Generate the ID of a block starting at the given address. /// Generate the ID of a block starting at the given address.
/// ///
/// Note that the block may not actually exist. /// Note that the block may not actually exist.
......
...@@ -10,7 +10,7 @@ use crate::utils::log::LogMessage; ...@@ -10,7 +10,7 @@ use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra}; use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path; use std::path::Path;
/// Disassemble the given binary and parse it to a [`Project`](crate::intermediate_representation::Project) struct. /// Disassemble the given binary and parse it to a [`Project`] struct.
/// ///
/// If successful, returns the binary file (as a byte vector), the parsed project struct, /// If successful, returns the binary file (as a byte vector), the parsed project struct,
/// and a vector of log messages generated during the process. /// and a vector of log messages generated during the process.
......
...@@ -450,6 +450,7 @@ mod tests { ...@@ -450,6 +450,7 @@ mod tests {
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason. mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "mingw32-gcc"); // TODO: Check reason for failure! Probably same as above? mark_skipped(&mut tests, "x86", "mingw32-gcc"); // TODO: Check reason for failure! Probably same as above?
mark_skipped(&mut tests, "x64", "mingw32-gcc"); // We find an additional false positive in unrelated code.
for test_case in tests { for test_case in tests {
let num_expected_occurences = 1; let num_expected_occurences = 1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment