Unverified Commit 281a0207 by Enkelmann Committed by GitHub

Domain refactor (#76)

Code and documentation improvements for all abstract domains
parent 0d2777b0
use super::{AbstractDomain, HasBitSize, HasTop, RegisterDomain};
use crate::bil::*;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
/// The main trait describing an abstract domain.
/// The `BitvectorDomain` is a simple abstract domain describing a bitvector of known length.
///
/// Each abstract domain is partially ordered and has a maximal element (which can be generated by `top()`).
/// Abstract domains of the same type can be merged.
///
/// TODO: Decide if and how to represent intersects and bottom values!
pub trait AbstractDomain: Sized + Eq + Clone {
/// The maximal value of a domain.
/// Usually it indicates a value for which nothing is known.
fn top(&self) -> Self;
/// As values it can only assume a known bitvector or *Top(bitsize)*.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum BitvectorDomain {
Top(BitSize),
Value(Bitvector),
}
impl AbstractDomain for BitvectorDomain {
/// merge two values. Returns *Top* if the values are not equal.
fn merge(&self, other: &Self) -> Self {
if self == other {
self.clone()
......@@ -21,48 +21,21 @@ pub trait AbstractDomain: Sized + Eq + Clone {
}
}
/// Returns whether the element represents the top element or not.
/// Check if the value is *Top*.
fn is_top(&self) -> bool {
*self == self.top()
matches!(self, Self::Top(_))
}
}
/// A trait for abstract domains that represent values that can be loaded into register or written onto the stack.
/// Every value has a determined and immutable length (in bits).
pub trait ValueDomain: AbstractDomain {
/// Returns the size of the value in bits
fn bitsize(&self) -> BitSize;
/// Return a new top element with the given bitsize
fn new_top(bitsize: BitSize) -> Self;
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self;
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self;
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
Self::new_top(high_bit - low_bit) // TODO: This needs a unit test whether the result has the correct bitwidth!
}
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self;
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self {
Self::new_top(self.bitsize() + other.bitsize())
impl HasTop for BitvectorDomain {
/// Return a *Top* value with the same bitsize as `self`.
fn top(&self) -> BitvectorDomain {
BitvectorDomain::Top(self.bitsize())
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum BitvectorDomain {
Top(BitSize),
Value(Bitvector),
}
impl ValueDomain for BitvectorDomain {
impl HasBitSize for BitvectorDomain {
/// Return the bitsize of `self`.
fn bitsize(&self) -> BitSize {
use BitvectorDomain::*;
match self {
......@@ -70,14 +43,17 @@ impl ValueDomain for BitvectorDomain {
Value(bitvec) => bitvec.width().to_usize() as u16,
}
}
}
impl RegisterDomain for BitvectorDomain {
/// Get a *Top* element with the given bitsize.
fn new_top(bitsize: BitSize) -> BitvectorDomain {
BitvectorDomain::Top(bitsize)
}
/// Evaluate the given binary operation.
/// Note that this function assumes that both values have the same bitsize.
/// If not, this function should panic.
///
/// For non-shift operations, this function will panic if the operands have different bitsizes.
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
use BinOpType::*;
match op {
......@@ -186,7 +162,6 @@ impl ValueDomain for BitvectorDomain {
/// Extract a sub-bitvector out of a bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
if let BitvectorDomain::Value(bitvec) = self {
// TODO: Check whether this is correct on a real world example and then write a unit test for it
BitvectorDomain::Value(
bitvec
.clone()
......@@ -200,6 +175,7 @@ impl ValueDomain for BitvectorDomain {
}
}
/// Perform a size-changing cast on a bitvector.
fn cast(&self, kind: CastType, width: BitSize) -> Self {
if let BitvectorDomain::Value(bitvec) = self {
use CastType::*;
......@@ -227,6 +203,7 @@ impl ValueDomain for BitvectorDomain {
}
}
/// Concatenate two bitvectors.
fn concat(&self, other: &Self) -> Self {
match (self, other) {
(BitvectorDomain::Value(left_bitvec), BitvectorDomain::Value(right_bitvec)) => {
......@@ -245,17 +222,10 @@ impl ValueDomain for BitvectorDomain {
}
}
impl AbstractDomain for BitvectorDomain {
fn top(&self) -> BitvectorDomain {
BitvectorDomain::Top(self.bitsize())
}
}
impl std::ops::Add for BitvectorDomain {
type Output = BitvectorDomain;
fn add(self, rhs: Self) -> Self {
assert_eq!(self.bitsize(), rhs.bitsize());
self.bin_op(crate::bil::BinOpType::PLUS, &rhs)
}
}
......@@ -264,7 +234,6 @@ impl std::ops::Sub for BitvectorDomain {
type Output = BitvectorDomain;
fn sub(self, rhs: Self) -> Self {
assert_eq!(self.bitsize(), rhs.bitsize());
self.bin_op(crate::bil::BinOpType::MINUS, &rhs)
}
}
......@@ -283,6 +252,16 @@ impl std::convert::From<Bitvector> for BitvectorDomain {
}
}
impl std::convert::TryFrom<&BitvectorDomain> for Bitvector {
type Error = ();
fn try_from(bitvec_domain: &BitvectorDomain) -> Result<Bitvector, ()> {
match bitvec_domain {
BitvectorDomain::Value(bitvec) => Ok(bitvec.clone()),
BitvectorDomain::Top(_) => Err(()),
}
}
}
impl std::fmt::Display for BitvectorDomain {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
......
use super::{
AbstractDomain, AbstractIdentifier, HasBitSize, HasTop, PointerDomain, RegisterDomain,
};
use crate::bil::*;
use crate::prelude::*;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Display;
/// An abstract domain representing either a pointer or a non-pointer value.
/// Both non-pointer values and offsets of pointers are represented by the same abstract domain `T`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum DataDomain<T: RegisterDomain> {
Top(BitSize),
Pointer(PointerDomain<T>),
Value(T),
}
impl<T: RegisterDomain> DataDomain<T> {
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &T,
) {
if let Self::Pointer(pointer) = self {
pointer.replace_abstract_id(old_id, new_id, offset_adjustment);
}
}
/// Return a set of all referenced abstract IDs. The set is empty if `self` is not a pointer.
pub fn referenced_ids(&self) -> BTreeSet<AbstractIdentifier> {
if let Self::Pointer(pointer) = self {
pointer.ids().cloned().collect()
} else {
BTreeSet::new()
}
}
/// If *self* is a pointer, remove all provided IDs from the target list of it.
/// If this would leave the pointer without any targets, replace it with *Top*.
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
if let Self::Pointer(pointer) = self {
let remaining_targets: BTreeMap<AbstractIdentifier, T> = pointer
.iter_targets()
.filter_map(|(id, offset)| {
if ids_to_remove.get(id).is_none() {
Some((id.clone(), offset.clone()))
} else {
None
}
})
.collect();
if remaining_targets.is_empty() {
*self = Self::new_top(self.bitsize());
} else {
*self = Self::Pointer(PointerDomain::with_targets(remaining_targets));
}
}
}
}
impl<T: RegisterDomain> HasBitSize for DataDomain<T> {
// Return the bitsize of `self`.
fn bitsize(&self) -> BitSize {
use DataDomain::*;
match self {
Top(size) => *size,
Pointer(pointer) => pointer.bitsize(),
Value(bitvec) => bitvec.bitsize(),
}
}
}
impl<T: RegisterDomain> HasTop for DataDomain<T> {
// Generate a new *Top* element with the same bitsize as `self`.
fn top(&self) -> Self {
DataDomain::new_top(self.bitsize())
}
}
impl<T: RegisterDomain> RegisterDomain for DataDomain<T> {
// Return a new *Top* element with the given bitsize
fn new_top(bitsize: BitSize) -> Self {
Self::Top(bitsize)
}
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
use BinOpType::*;
use DataDomain::*;
match (self, op, rhs) {
(Value(left), _, Value(right)) => Value(left.bin_op(op, right)),
(Pointer(pointer), PLUS, Value(value)) | (Value(value), PLUS, Pointer(pointer)) => {
Pointer(pointer.add_to_offset(value))
}
(Pointer(pointer), MINUS, Value(value)) => Pointer(pointer.sub_from_offset(value)),
(Pointer(pointer_lhs), MINUS, Pointer(pointer_rhs)) => {
if pointer_lhs.ids().len() == 1 && pointer_rhs.ids().len() == 1 {
let (id_lhs, offset_lhs) = pointer_lhs.iter_targets().next().unwrap();
let (id_rhs, offset_rhs) = pointer_rhs.iter_targets().next().unwrap();
if id_lhs == id_rhs {
Self::Value(offset_lhs.bin_op(MINUS, offset_rhs))
} else {
Self::Top(self.bitsize())
}
} else {
// We cannot be sure that both pointers point to the same target
Self::Top(self.bitsize())
}
}
(_, EQ, _) | (_, NEQ, _) | (_, LT, _) | (_, LE, _) | (_, SLT, _) | (_, SLE, _) => {
T::new_top(1).into()
}
(_, PLUS, _)
| (_, MINUS, _)
| (_, TIMES, _)
| (_, DIVIDE, _)
| (_, SDIVIDE, _)
| (_, MOD, _)
| (_, SMOD, _)
| (_, LSHIFT, _)
| (_, RSHIFT, _)
| (_, ARSHIFT, _)
| (_, AND, _)
| (_, OR, _)
| (_, XOR, _) => Self::new_top(self.bitsize()),
}
}
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self {
if let Self::Value(value) = self {
Self::Value(value.un_op(op))
} else {
Self::new_top(self.bitsize())
}
}
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
if let Self::Value(value) = self {
Self::Value(value.extract(low_bit, high_bit))
} else {
Self::new_top(high_bit - low_bit + 1)
}
}
/// Extend or shrink a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self {
if self.bitsize() == width {
// The cast is a no-op.
return self.clone();
}
if let Self::Value(value) = self {
Self::Value(value.cast(kind, width))
} else {
// The result of extending or shrinking pointers is undefined.
Self::new_top(width)
}
}
/// Concatenate two bitvectors.
fn concat(&self, other: &Self) -> Self {
if let (Self::Value(upper_bits), Self::Value(lower_bits)) = (self, other) {
Self::Value(upper_bits.concat(lower_bits))
} else {
Self::new_top(self.bitsize() + other.bitsize())
}
}
}
impl<T: RegisterDomain> AbstractDomain for DataDomain<T> {
// Merge `self` with `other`.
fn merge(&self, other: &Self) -> Self {
use DataDomain::*;
match (self, other) {
(Top(bitsize), _) | (_, Top(bitsize)) => Top(*bitsize),
(Pointer(pointer1), Pointer(pointer2)) => Pointer(pointer1.merge(pointer2)),
(Value(val1), Value(val2)) => Value(val1.merge(val2)),
(Pointer(_), Value(_)) | (Value(_), Pointer(_)) => Top(self.bitsize()),
}
}
/// Return whether the element represents a top element or not.
fn is_top(&self) -> bool {
matches!(self, Self::Top(_))
}
}
impl<T: RegisterDomain> From<PointerDomain<T>> for DataDomain<T> {
fn from(val: PointerDomain<T>) -> Self {
Self::Pointer(val)
}
}
impl<T: RegisterDomain> From<T> for DataDomain<T> {
fn from(value: T) -> Self {
Self::Value(value)
}
}
impl<T: RegisterDomain + From<Bitvector>> From<Bitvector> for DataDomain<T> {
fn from(bitvector: Bitvector) -> Self {
Self::Value(bitvector.into())
}
}
impl<T: RegisterDomain + Display> DataDomain<T> {
/// Get a more compact json-representation of the data domain.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
match self {
Self::Top(bitsize) => serde_json::Value::String(format!("Top:{}", bitsize)),
Self::Pointer(pointer) => {
let target_iter = pointer.iter_targets().map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
});
let targets = serde_json::Value::Object(target_iter.collect());
let mut obj_map = serde_json::Map::new();
obj_map.insert("Pointer".to_string(), targets);
serde_json::Value::Object(obj_map)
}
Self::Value(bitvector) => serde_json::Value::String(format!("Value: {}", bitvector)),
}
}
}
#[cfg(test)]
mod tests {
use super::super::*;
use super::*;
type Data = DataDomain<BitvectorDomain>;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("time0"),
AbstractLocation::Register(name.into(), 64),
)
}
fn new_pointer_domain(location: &str, offset: i64) -> PointerDomain<BitvectorDomain> {
let id = new_id(location);
PointerDomain::new(id, bv(offset))
}
fn new_pointer(location: &str, offset: i64) -> Data {
Data::Pointer(new_pointer_domain(location, offset))
}
fn new_value(value: i64) -> Data {
Data::Value(bv(value))
}
#[test]
fn data_abstract_domain() {
let pointer = new_pointer("Rax".into(), 0);
let data = new_value(42);
assert_eq!(pointer.merge(&pointer), pointer);
assert_eq!(pointer.merge(&data), Data::new_top(64));
assert_eq!(
data.merge(&new_value(41)),
Data::Value(BitvectorDomain::new_top(64))
);
let other_pointer = new_pointer("Rbx".into(), 0);
match pointer.merge(&other_pointer) {
Data::Pointer(_) => (),
_ => panic!(),
}
}
#[test]
fn data_register_domain() {
use crate::bil::BinOpType::*;
let data = new_value(42);
assert_eq!(data.bitsize(), 64);
let three = new_value(3);
let pointer = new_pointer("Rax".into(), 0);
assert_eq!(data.bin_op(PLUS, &three), new_value(45));
assert_eq!(pointer.bin_op(PLUS, &three), new_pointer("Rax".into(), 3));
assert_eq!(three.un_op(crate::bil::UnOpType::NEG), new_value(-3));
assert_eq!(
three.extract(0, 31),
Data::Value(BitvectorDomain::Value(Bitvector::from_i32(3)))
);
assert_eq!(data.cast(crate::bil::CastType::SIGNED, 128).bitsize(), 128);
let one = Data::Value(BitvectorDomain::Value(Bitvector::from_i32(1)));
let two = Data::Value(BitvectorDomain::Value(Bitvector::from_i32(2)));
let concat = new_value((1 << 32) + 2);
assert_eq!(one.concat(&two), concat);
}
#[test]
fn remove_ids() {
let mut targets = BTreeMap::new();
targets.insert(new_id("Rax"), bv(1));
targets.insert(new_id("Rbx"), bv(2));
let mut data: Data = PointerDomain::with_targets(targets).into();
let mut ids_to_remove = BTreeSet::new();
ids_to_remove.insert(new_id("Rbx"));
ids_to_remove.insert(new_id("Rcx"));
data.remove_ids(&ids_to_remove);
assert_eq!(
data.referenced_ids(),
vec![new_id("Rax")].into_iter().collect()
);
data = bv(42).into();
data.remove_ids(&ids_to_remove);
assert_eq!(data, bv(42).into());
}
}
......@@ -3,18 +3,29 @@ use crate::prelude::*;
use derive_more::Deref;
use std::sync::Arc;
// TODO: Right now abstract locations are used as giving the location where a pointer to an object is located.
// But it could also be used to point into the object (at offset 0).
// Can I solve this possible ambivalence in intended usage in a way such that accidentally wrong usage is prevented?
// If not, I have to document the intended usage with a big warning sign.
/// An abstract identifier is used to identify an object or a value in an abstract state.
///
/// Since many program states can be represented by the same abstract state in data-flow analysis,
/// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states.
/// Abstract identifier achieve this by identifying a *time*, i.e. a specific abstract state,
/// and a *location*, i.e. a recipe for abstracting a concrete value from any concrete state that is represented by the abstract state.
/// The value in question then serves as the identifier.
/// For example, a pointer may uniquely determine the memory object it is pointing to.
/// Or a value may represent the value of a variable at a certain time,
/// whereas the value of the variable in the current state is given as an offset to the value at the identified time.
///
/// Since program points may be visited several times during an execution trace (e.g. in loops),
/// the *time* component of an abstract identifier may not actually determine an unique point in time of an execution trace.
/// In this case the meaning of an abstract identifier depends upon its use case.
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited.
///
/// An abstract identifier is given by a time identifier and a location identifier.
///
/// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`.
/// If it is the Tid of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the Tid of a Def or Jmp, then it describes the point in time *after* the execution of the Def or Jmp.
/// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)]
#[deref(forward)]
pub struct AbstractIdentifier(Arc<AbstractIdentifierData>);
......@@ -75,6 +86,11 @@ impl AbstractLocation {
}
}
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation {
Location {
......
/*!
A memory region is an abstract domain representing a continuous region of memory.
For example, a stack domain containing values written to the stack can be represented with a memory region.
Design notes:
- The values do not need a fixed size.
Instead you need to provide the size of an element when adding it to the memory region.
- Whenever you try to read from an address that is not assigned to a value, the `Value::top()` element gets returned.
The reason behind this is that the value could be anything.
- Whenever adding an element intersects existing elements, the existing ones get removed from the memory region.
The reason is that reading the old positions afterwards could yield anything.
- Whenever a read from a correct position but with an incorrect size occurs, `Value::top()` gets returned.
That is because the value could be anything if the size read is too big and reading of partial values is not implemented for this type.
- An empty memory region could yield anything (in the sense of `Value::top`) at a read at any position.
In that regard, an empty memory region is actually the `top()` element of the domain.
- TODO: Implement the abstract domain trait for MemRegion.
- TODO: Remove the implicit saving of element sizes, as ValueDomains have now an intrinsic size.
Implementation needs is_top() to be a member function of the ValueDomain trait.
*/
use super::abstract_domain::*;
use super::{AbstractDomain, HasBitSize, HasTop, RegisterDomain};
use crate::bil::{BitSize, Bitvector};
use apint::{Int, Width};
use derive_more::Deref;
......@@ -27,33 +7,35 @@ use std::collections::BTreeMap;
use std::ops::DerefMut;
use std::sync::Arc;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
struct Element<T> {
size: i64,
value: T,
}
/// A memory region is an abstract domain representing a continuous region of memory, e.g. the stack frame of a function.
///
/// This implementation can only save values of one `RegisterDomain` type
/// and it can only track values with a known offset, i.e. it cannot handle arrays of any kind.
/// Offsets are internally saved as signed integers, which allows negative offsets,
/// e.g. for downward growing stack frames.
///
/// An empty memory region means that nothing is known about the values at any offset inside the region.
/// Thus an empty memory region actually represents the *Top* element of its abstract domain.
///
/// To allow cheap cloning of a `MemRegion`, the actual data is wrapped inside an `Arc`.
#[derive(Serialize, Deserialize, Debug, Hash, Clone, PartialEq, Eq, Deref)]
#[deref(forward)]
pub struct MemRegion<T: AbstractDomain + ValueDomain + std::fmt::Debug>(Arc<MemRegionData<T>>);
pub struct MemRegion<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug>(
Arc<MemRegionData<T>>,
);
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> DerefMut for MemRegion<T> {
impl<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug> DerefMut for MemRegion<T> {
fn deref_mut(&mut self) -> &mut MemRegionData<T> {
Arc::make_mut(&mut self.0)
}
}
// TODO: most of the functions in this impl block should be moved to MemRegionData (or removed, if they are only thin wrappers).
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegion<T> {
pub fn new(address_bitsize: BitSize) -> Self {
MemRegion(Arc::new(MemRegionData::new(address_bitsize)))
}
pub fn get_address_bitsize(&self) -> BitSize {
self.0.get_address_bitsize()
}
pub fn merge(&self, other: &Self) -> Self {
impl<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug> AbstractDomain
for MemRegion<T>
{
/// Short-circuting the `MemRegionData::merge` function if `self==other`,
/// to prevent unneccessary cloning.
fn merge(&self, other: &Self) -> Self {
if self == other {
self.clone()
} else {
......@@ -61,39 +43,34 @@ impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegion<T> {
}
}
pub fn add(&mut self, value: T, position: Bitvector) {
Arc::make_mut(&mut self.0).add(value, position)
}
pub fn get(&self, position: Bitvector, size_in_bytes: u64) -> T {
self.0.get(position, size_in_bytes)
}
pub fn remove(&mut self, position: Bitvector, size_in_bytes: Bitvector) {
Arc::make_mut(&mut self.0).remove(position, size_in_bytes)
}
pub fn iter_values(&self) -> std::collections::btree_map::Values<'_, i64, T> {
self.0.values.values()
/// The *Top* element is represented by an empty memory region.
fn is_top(&self) -> bool {
self.values.is_empty()
}
}
pub fn iter_values_mut(&mut self) -> std::collections::btree_map::ValuesMut<'_, i64, T> {
Arc::make_mut(&mut self.0).values.values_mut()
impl<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug> HasTop for MemRegion<T> {
/// Return a new, empty memory region with the same address bitsize as `self`, representing the *Top* element of the abstract domain.
fn top(&self) -> Self {
Self::new(self.get_address_bitsize())
}
}
pub fn iter(&self) -> std::collections::btree_map::Iter<i64, T> {
self.0.values.iter()
impl<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug> MemRegion<T> {
// Create a new, empty memory region.
pub fn new(address_bitsize: BitSize) -> Self {
MemRegion(Arc::new(MemRegionData::new(address_bitsize)))
}
}
/// An abstract domain representing a continuous region of memory. See the module level description for more.
/// The internal data of a memory region. See the description of `MemRegion` for more.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct MemRegionData<T: AbstractDomain + ValueDomain + std::fmt::Debug> {
pub struct MemRegionData<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug> {
address_bitsize: BitSize,
values: BTreeMap<i64, T>,
}
impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegionData<T> {
impl<T: AbstractDomain + HasBitSize + RegisterDomain + std::fmt::Debug> MemRegionData<T> {
/// create a new, empty MemRegion
pub fn new(address_bitsize: BitSize) -> MemRegionData<T> {
MemRegionData {
......@@ -102,6 +79,7 @@ impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegionData<T> {
}
}
/// Get the bitsize of pointers for the address space that the memory region belongs to.
pub fn get_address_bitsize(&self) -> BitSize {
self.address_bitsize
}
......@@ -147,7 +125,7 @@ impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegionData<T> {
}
/// Get the value at the given position.
/// If there is no value at the position or the size of the element is not the same as the provided size, return `T::top()`.
/// If there is no value at the position or the size of the element is not the same as the provided size, return `T::new_top()`.
pub fn get(&self, position: Bitvector, size_in_bytes: u64) -> T {
assert_eq!(position.width().to_usize(), self.address_bitsize as usize);
let position = Int::from(position).try_to_i64().unwrap();
......@@ -200,6 +178,44 @@ impl<T: AbstractDomain + ValueDomain + std::fmt::Debug> MemRegionData<T> {
values: merged_values,
}
}
/// Get an iterator over all elements together with their offset into the memory region.
pub fn iter(&self) -> std::collections::btree_map::Iter<i64, T> {
self.values.iter()
}
/// Get an iterator over all values in the memory region
pub fn values(&self) -> std::collections::btree_map::Values<i64, T> {
self.values.values()
}
/// Get an iterator over all values in the memory region for in-place manipulation.
/// Note that one can changes values to *Top* using the iterator.
/// These values should be removed from the memory region using `clear_top_values()`.
pub fn values_mut(&mut self) -> std::collections::btree_map::ValuesMut<i64, T> {
self.values.values_mut()
}
/// Remove all values representing the *Top* element from the internal value store,
/// as these should not be saved in the internal representation.
pub fn clear_top_values(&mut self) {
let indices_to_remove: Vec<i64> = self
.values
.iter()
.filter_map(
|(index, value)| {
if value.is_top() {
Some(*index)
} else {
None
}
},
)
.collect();
for index in indices_to_remove {
self.values.remove(&index);
}
}
}
#[cfg(test)]
......@@ -210,16 +226,33 @@ mod tests {
struct MockDomain(i64, BitSize);
impl AbstractDomain for MockDomain {
fn top(&self) -> MockDomain {
MockDomain::new_top(self.1)
fn merge(&self, other: &Self) -> Self {
assert_eq!(self.1, other.1);
if self == other {
self.clone()
} else {
self.top()
}
}
fn is_top(&self) -> bool {
self == &self.top()
}
}
impl ValueDomain for MockDomain {
impl HasBitSize for MockDomain {
fn bitsize(&self) -> BitSize {
self.1
}
}
impl HasTop for MockDomain {
fn top(&self) -> Self {
Self::new_top(self.1)
}
}
impl RegisterDomain for MockDomain {
fn new_top(bitsize: BitSize) -> MockDomain {
MockDomain(0, bitsize)
}
......@@ -235,6 +268,14 @@ mod tests {
fn cast(&self, _kind: crate::bil::CastType, width: BitSize) -> Self {
Self::new_top(width)
}
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
Self::new_top(high_bit - low_bit + 1)
}
fn concat(&self, other: &Self) -> Self {
Self::new_top(self.bitsize() + other.bitsize())
}
}
fn mock(val: i64, bitsize: BitSize) -> MockDomain {
......@@ -287,4 +328,36 @@ mod tests {
assert_eq!(other_region.values.len(), 1);
assert_eq!(merged_region.values.len(), 0);
}
#[test]
fn value_removals() {
let mut region: MemRegionData<MockDomain> = MemRegionData::new(64);
region.add(mock(1, 64), bv(0));
region.add(mock(2, 64), bv(8));
region.add(mock(3, 64), bv(16));
region.add(mock(4, 64), bv(24));
region.add(mock(5, 64), bv(32));
assert_eq!(region.values.len(), 5);
region.remove(bv(2), bv(3));
assert_eq!(region.values.len(), 4);
region.remove(bv(7), bv(1));
assert_eq!(region.values.len(), 4);
region.remove(bv(7), bv(2));
assert_eq!(region.values.len(), 3);
region.clear_interval(15, 1);
assert_eq!(region.values.len(), 3);
region.clear_interval(15, 3);
assert_eq!(region.values.len(), 2);
for val in region.values_mut() {
if *val == mock(5, 64) {
*val = mock(0, 64); // This is a *Top* element
}
}
region.clear_top_values();
assert_eq!(region.values.len(), 1);
assert_eq!(region.get(bv(24), 8), mock(4, 64));
}
}
//! This module defines traits describing general properties of abstract domains
//! as well as several abstract domain types implementing these traits.
use crate::bil::*;
mod bitvector;
pub use bitvector::*;
mod identifier;
pub use identifier::*;
mod pointer;
pub use pointer::*;
mod data;
pub use data::*;
mod mem_region;
pub use mem_region::*;
/// The main trait describing an abstract domain.
///
/// Each abstract domain is partially ordered and has a maximal element (which can be generated by `top()`).
/// Abstract domains of the same type can be merged.
pub trait AbstractDomain: Sized + Eq + Clone {
fn merge(&self, other: &Self) -> Self;
/// Returns whether the element represents the top element or not.
fn is_top(&self) -> bool;
}
/// A trait for types representing values with a fixed size (in bits).
///
/// For abstract domains, the bitsize is a parameter of the domain itself,
/// i.e. you cannot merge values of different bitsizes,
/// since they lie in different posets (one for each bitsize).
pub trait HasBitSize {
/// Return the size of the represented value in bits.
fn bitsize(&self) -> BitSize;
}
/// An abstract domain implementing this trait has a global maximum, i.e. a *Top* element.
pub trait HasTop {
/// Return an instance of the *Top* element.
///
/// Since an abstract domain type may represent a whole family of abstract domains,
/// this function takes an instance of the domain as a parameter,
/// so it can return the *Top* element of the same family member that the provided instance belongs to.
fn top(&self) -> Self;
}
/// A trait for abstract domains that can represent values loaded into CPU register.
///
/// The domain implements all general operations used to manipulate register values.
/// The domain is parametrized by its bitsize (which represents the size of the register).
/// It has a *Top* element, which is only characterized by its bitsize.
pub trait RegisterDomain: AbstractDomain + HasBitSize + HasTop {
/// Return a new top element with the given bitsize
fn new_top(bitsize: BitSize) -> Self;
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self;
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self;
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self;
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self;
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self;
}
use super::{AbstractDomain, AbstractIdentifier, HasBitSize, RegisterDomain};
use crate::bil::BinOpType;
use crate::prelude::*;
use std::collections::BTreeMap;
use std::fmt::Display;
/// An abstract value representing a pointer given as a map from an abstract identifier
/// to the offset in the pointed to object. The offset itself is also a member of an abstract domain.
///
/// If the map contains more than one key,
/// it indicates that the pointer may point to any of the contained objects.
///
/// A `PointerDomain` value always has at least one target.
/// Trying to create a pointer without targets should always lead to panics.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct PointerDomain<T: RegisterDomain>(BTreeMap<AbstractIdentifier, T>);
impl<T: RegisterDomain> AbstractDomain for PointerDomain<T> {
/// Merge two pointers.
///
/// The merged pointer contains all targets of `self` and `other`.
/// For targets, that are contained in both, the offsets are merged.
fn merge(&self, other: &Self) -> Self {
let mut merged_map = self.0.clone();
for (location, offset) in other.0.iter() {
if merged_map.contains_key(location) {
merged_map.insert(location.clone(), merged_map[location].merge(offset));
} else {
merged_map.insert(location.clone(), offset.clone());
}
}
PointerDomain(merged_map)
}
/// Returns false, as PointerDomain has no *Top* element.
fn is_top(&self) -> bool {
false
}
}
impl<T: RegisterDomain> HasBitSize for PointerDomain<T> {
/// Return the bitsize of the pointer.
/// Should always equal the pointer size of the CPU architecture.
fn bitsize(&self) -> BitSize {
self.0
.values()
.next()
.expect("Pointer without targets encountered")
.bitsize()
}
}
impl<T: RegisterDomain> PointerDomain<T> {
/// Create a new pointer with exactly one target.
pub fn new(target: AbstractIdentifier, offset: T) -> PointerDomain<T> {
let mut map = BTreeMap::new();
map.insert(target, offset);
PointerDomain(map)
}
/// Create a new pointer with a set of targets. Panics if no targets are provided.
pub fn with_targets(targets: BTreeMap<AbstractIdentifier, T>) -> PointerDomain<T> {
assert!(!targets.is_empty());
PointerDomain(targets)
}
/// Add a new target to the pointer.
/// If the pointer already contains a target with the same abstract identifier, the offsets of both targets get merged.
pub fn add_target(&mut self, target: AbstractIdentifier, offset: T) {
if let Some(old_offset) = self.0.get(&target) {
let merged_offset = old_offset.merge(&offset);
self.0.insert(target, merged_offset);
} else {
self.0.insert(target, offset);
}
}
/// Replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &T,
) {
if let Some(old_offset) = self.0.get(&old_id) {
let new_offset = old_offset.bin_op(BinOpType::PLUS, offset_adjustment);
self.0.remove(old_id);
self.0.insert(new_id.clone(), new_offset);
}
}
/// add a value to the offset
pub fn add_to_offset(&self, value: &T) -> PointerDomain<T> {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::PLUS, value);
}
result
}
/// subtract a value from the offset
pub fn sub_from_offset(&self, value: &T) -> PointerDomain<T> {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::MINUS, value);
}
result
}
/// Get an iterator over all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn iter_targets(&self) -> std::collections::btree_map::Iter<AbstractIdentifier, T> {
self.0.iter()
}
/// Get an iterator over all abstract IDs that the pointer may target.
pub fn ids(&self) -> std::collections::btree_map::Keys<AbstractIdentifier, T> {
self.0.keys()
}
}
impl<T: RegisterDomain + Display> PointerDomain<T> {
/// Get a more compact json-representation of the pointer.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
serde_json::Value::Object(
self.0
.iter()
.map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
})
.collect(),
)
}
}
#[cfg(test)]
mod tests {
use super::super::{AbstractLocation, BitvectorDomain};
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("time0"),
AbstractLocation::Register(name.into(), 64),
)
}
fn new_pointer_domain(location: &str, offset: i64) -> PointerDomain<BitvectorDomain> {
let id = new_id(location);
PointerDomain::new(id, bv(offset))
}
#[test]
fn pointer_domain() {
let pointer = new_pointer_domain("Rax".into(), 0);
let offset = bv(3);
let pointer_plus = new_pointer_domain("Rax".into(), 3);
let pointer_minus = new_pointer_domain("Rax".into(), -3);
assert_eq!(pointer.add_to_offset(&offset), pointer_plus);
assert_eq!(pointer.sub_from_offset(&offset), pointer_minus);
let other_pointer = new_pointer_domain("Rbx".into(), 5);
let merged = pointer.merge(&other_pointer);
assert_eq!(merged.0.len(), 2);
assert_eq!(merged.0.get(&new_id("Rax".into())), Some(&bv(0)));
assert_eq!(merged.0.get(&new_id("Rbx".into())), Some(&bv(5)));
}
#[test]
fn replace_abstract_id() {
let mut targets = BTreeMap::new();
targets.insert(new_id("Rax"), bv(5));
targets.insert(new_id("Rbx"), bv(7));
let mut pointer = PointerDomain::with_targets(targets);
pointer.replace_abstract_id(&new_id("Rax"), &new_id("replacement"), &bv(5));
let mut new_targets = BTreeMap::new();
new_targets.insert(new_id("replacement"), bv(10));
new_targets.insert(new_id("Rbx"), bv(7));
assert_eq!(pointer.0, new_targets);
}
}
pub mod abstract_domain;
pub mod fixpoint;
pub mod graph;
pub mod interprocedural_fixpoint;
pub mod mem_region;
pub mod pointer_inference;
use crate::analysis::abstract_domain::*;
use crate::abstract_domain::*;
use crate::analysis::graph::Graph;
use crate::bil::Expression;
use crate::prelude::*;
......@@ -7,9 +7,8 @@ use crate::term::*;
use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet, HashSet};
use super::data::Data;
use super::identifier::*;
use super::state::State;
use super::Data;
pub struct Context<'a> {
pub graph: Graph<'a>,
......@@ -202,7 +201,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
self.log_debug(
callee_state.set_register(
&self.project.stack_pointer_register,
super::data::PointerDomain::new(
PointerDomain::new(
callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap())
.into(),
......@@ -311,7 +310,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
self.log_debug(
new_state.set_register(
stack_register,
stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &Data::bitvector(offset)),
stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &offset.into()),
),
Some(&call.tid),
);
......@@ -371,7 +370,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
super::object::ObjectType::Heap,
address_bitsize,
);
let pointer = super::data::PointerDomain::new(
let pointer = PointerDomain::new(
object_id,
Bitvector::zero((address_bitsize as usize).into()).into(),
);
......@@ -597,7 +596,7 @@ mod tests {
#[test]
fn context_problem_implementation() {
use crate::analysis::interprocedural_fixpoint::Problem;
use crate::analysis::pointer_inference::data::*;
use crate::analysis::pointer_inference::Data;
use crate::bil::*;
use Expression::*;
......@@ -765,8 +764,8 @@ mod tests {
#[test]
fn update_return() {
use crate::analysis::interprocedural_fixpoint::Problem;
use crate::analysis::pointer_inference::data::*;
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
......
use super::identifier::*;
use crate::analysis::abstract_domain::*;
use crate::bil::*;
use crate::prelude::*;
use std::collections::{BTreeMap, BTreeSet};
use std::convert::TryFrom;
/// An abstract value representing either a pointer or a constant value.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub enum Data {
Top(BitSize),
Pointer(PointerDomain),
Value(BitvectorDomain),
}
impl Data {
pub fn bitvector(bitv: Bitvector) -> Data {
Data::Value(BitvectorDomain::Value(bitv))
}
/// For pointer values replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
if let Self::Pointer(pointer) = self {
pointer.replace_abstract_id(old_id, new_id, offset_adjustment);
}
}
pub fn referenced_ids(&self) -> BTreeSet<AbstractIdentifier> {
if let Self::Pointer(pointer) = self {
pointer.0.keys().cloned().collect()
} else {
BTreeSet::new()
}
}
/// If *self* is a pointer, remove all provided IDs from the target list of it.
/// If this would leave the pointer without any targets, replace it with Data::Top(..).
pub fn remove_ids(&mut self, ids_to_remove: &BTreeSet<AbstractIdentifier>) {
// TODO: Some callers don't want to get Top(..) values. Probably has to be handled at the respective callsites.
if let Data::Pointer(pointer) = self {
let remaining_targets: BTreeMap<AbstractIdentifier, BitvectorDomain> = pointer
.iter_targets()
.filter_map(|(id, offset)| {
if ids_to_remove.get(id).is_none() {
Some((id.clone(), offset.clone()))
} else {
None
}
})
.collect();
if remaining_targets.is_empty() {
*self = Data::new_top(self.bitsize());
} else {
*self = Data::Pointer(PointerDomain::with_targets(remaining_targets));
}
}
}
}
impl Data {
pub fn to_json_compact(&self) -> serde_json::Value {
match self {
Self::Top(bitsize) => serde_json::Value::String(format!("Top:{}", bitsize)),
Self::Pointer(pointer) => {
let target_iter = pointer.iter_targets().map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
});
let targets = serde_json::Value::Object(target_iter.collect());
let mut obj_map = serde_json::Map::new();
obj_map.insert("Pointer".to_string(), targets);
serde_json::Value::Object(obj_map)
}
Self::Value(bitvector) => serde_json::Value::String(format!("Value: {}", bitvector)),
}
}
}
impl<'a> TryFrom<&'a Data> for &'a Bitvector {
type Error = ();
fn try_from(value: &'a Data) -> Result<&'a Bitvector, Self::Error> {
if let Data::Value(BitvectorDomain::Value(bitvector)) = value {
Ok(bitvector)
} else {
Err(())
}
}
}
impl From<BitvectorDomain> for Data {
fn from(value: BitvectorDomain) -> Data {
Data::Value(value)
}
}
/// An abstract value representing a pointer given as a map from an abstract identifier
/// to the offset in the pointed to object.
///
/// The map should never be empty. If the map contains more than one key,
/// it indicates that the pointer may point to any of the contained objects.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct PointerDomain(BTreeMap<AbstractIdentifier, BitvectorDomain>);
impl PointerDomain {
pub fn new(target: AbstractIdentifier, offset: BitvectorDomain) -> PointerDomain {
let mut map = BTreeMap::new();
map.insert(target, offset);
PointerDomain(map)
}
pub fn with_targets(targets: BTreeMap<AbstractIdentifier, BitvectorDomain>) -> PointerDomain {
PointerDomain(targets)
}
/// get the bitsize of the pointer
pub fn bitsize(&self) -> BitSize {
let some_elem = self.0.values().next().unwrap();
some_elem.bitsize()
}
pub fn merge(&self, other: &PointerDomain) -> PointerDomain {
let mut merged_map = self.0.clone();
for (location, offset) in other.0.iter() {
if merged_map.contains_key(location) {
merged_map.insert(location.clone(), merged_map[location].merge(offset));
} else {
merged_map.insert(location.clone(), offset.clone());
}
}
PointerDomain(merged_map)
}
/// Add a new target to the pointer.
/// If the pointer already contains a target with the same abstract identifier, the offsets of both targets get merged.
pub fn add_target(&mut self, target: AbstractIdentifier, offset: BitvectorDomain) {
if let Some(old_offset) = self.0.get(&target) {
let merged_offset = old_offset.merge(&offset);
self.0.insert(target, merged_offset);
} else {
self.0.insert(target, offset);
}
}
/// Replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
if let Some(old_offset) = self.0.get(&old_id) {
let new_offset = old_offset.clone() + offset_adjustment.clone();
self.0.remove(old_id);
self.0.insert(new_id.clone(), new_offset);
}
}
/// add a value to the offset
pub fn add_to_offset(&self, value: &BitvectorDomain) -> PointerDomain {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::PLUS, value);
}
result
}
/// subtract a value from the offset
pub fn sub_from_offset(&self, value: &BitvectorDomain) -> PointerDomain {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::MINUS, value);
}
result
}
/// Get an iterator over all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn iter_targets(
&self,
) -> std::collections::btree_map::Iter<AbstractIdentifier, BitvectorDomain> {
self.0.iter()
}
pub fn get_target_ids(&self) -> BTreeSet<AbstractIdentifier> {
self.0.keys().cloned().collect()
}
}
impl PointerDomain {
pub fn to_json_compact(&self) -> serde_json::Value {
serde_json::Value::Object(
self.0
.iter()
.map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
})
.collect(),
)
}
}
impl ValueDomain for Data {
fn bitsize(&self) -> BitSize {
use Data::*;
match self {
Top(size) => *size,
Pointer(pointer) => pointer.bitsize(),
Value(bitvec) => bitvec.bitsize(),
}
}
fn new_top(bitsize: BitSize) -> Data {
Data::Top(bitsize)
}
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
use BinOpType::*;
use Data::*;
match (self, op, rhs) {
(Value(left), _, Value(right)) => Value(left.bin_op(op, right)),
(Pointer(pointer), PLUS, Value(value)) | (Value(value), PLUS, Pointer(pointer)) => {
Pointer(pointer.add_to_offset(value))
}
(Pointer(pointer), MINUS, Value(value)) => Pointer(pointer.sub_from_offset(value)),
// TODO: AND and OR binops may be used to compute pointers when alignment information about the pointer is known.
(_, EQ, _) | (_, NEQ, _) | (_, LT, _) | (_, LE, _) | (_, SLT, _) | (_, SLE, _) => {
BitvectorDomain::new_top(1).into()
}
(_, PLUS, _)
| (_, MINUS, _)
| (_, TIMES, _)
| (_, DIVIDE, _)
| (_, SDIVIDE, _)
| (_, MOD, _)
| (_, SMOD, _)
| (_, LSHIFT, _)
| (_, RSHIFT, _)
| (_, ARSHIFT, _)
| (_, AND, _)
| (_, OR, _)
| (_, XOR, _) => Data::new_top(self.bitsize()),
}
}
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self {
if let Data::Value(value) = self {
Data::Value(value.un_op(op))
} else {
Data::new_top(self.bitsize())
}
}
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
if let Data::Value(value) = self {
Data::Value(value.extract(low_bit, high_bit))
} else {
Data::new_top(high_bit - low_bit + 1)
}
}
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self {
if self.bitsize() == width {
// The cast is a no-op.
return self.clone();
}
if let Data::Value(value) = self {
Data::Value(value.cast(kind, width))
} else {
Data::new_top(width)
}
}
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self {
if let (Data::Value(upper_bits), Data::Value(lower_bits)) = (self, other) {
Data::Value(upper_bits.concat(lower_bits))
} else {
Data::new_top(self.bitsize() + other.bitsize())
}
}
}
impl AbstractDomain for Data {
fn top(&self) -> Self {
Data::Top(self.bitsize())
}
fn merge(&self, other: &Self) -> Self {
use Data::*;
match (self, other) {
(Top(bitsize), _) | (_, Top(bitsize)) => Top(*bitsize),
(Pointer(pointer1), Pointer(pointer2)) => Pointer(pointer1.merge(pointer2)),
(Value(val1), Value(val2)) => Value(val1.merge(val2)),
(Pointer(_), Value(_)) | (Value(_), Pointer(_)) => Top(self.bitsize()),
}
}
/// Return whether the element represents a top element or not.
fn is_top(&self) -> bool {
matches!(self, Self::Top(_))
}
}
impl From<PointerDomain> for Data {
fn from(val: PointerDomain) -> Data {
Data::Pointer(val)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: String) -> AbstractIdentifier {
AbstractIdentifier::new(Tid::new("time0"), AbstractLocation::Register(name, 64))
}
fn new_pointer_domain(location: String, offset: i64) -> PointerDomain {
let id = new_id(location);
PointerDomain::new(id, bv(offset))
}
fn new_pointer(location: String, offset: i64) -> Data {
Data::Pointer(new_pointer_domain(location, offset))
}
fn new_value(value: i64) -> Data {
Data::Value(bv(value))
}
#[test]
fn data_abstract_domain() {
let pointer = new_pointer("Rax".into(), 0);
let data = new_value(42);
assert_eq!(pointer.merge(&pointer), pointer);
assert_eq!(pointer.merge(&data), Data::new_top(64));
assert_eq!(
data.merge(&new_value(41)),
Data::Value(BitvectorDomain::new_top(64))
);
let other_pointer = new_pointer("Rbx".into(), 0);
match pointer.merge(&other_pointer) {
Data::Pointer(_) => (),
_ => panic!(),
}
}
#[test]
fn data_value_domain() {
use crate::bil::BinOpType::*;
let data = new_value(42);
assert_eq!(data.bitsize(), 64);
let three = new_value(3);
let pointer = new_pointer("Rax".into(), 0);
assert_eq!(data.bin_op(PLUS, &three), new_value(45));
assert_eq!(pointer.bin_op(PLUS, &three), new_pointer("Rax".into(), 3));
assert_eq!(three.un_op(crate::bil::UnOpType::NEG), new_value(-3));
assert_eq!(
three.extract(0, 31),
Data::Value(BitvectorDomain::Value(Bitvector::from_i32(3)))
);
assert_eq!(data.cast(crate::bil::CastType::SIGNED, 128).bitsize(), 128);
let one = Data::Value(BitvectorDomain::Value(Bitvector::from_i32(1)));
let two = Data::Value(BitvectorDomain::Value(Bitvector::from_i32(2)));
let concat = new_value((1 << 32) + 2);
assert_eq!(one.concat(&two), concat);
}
#[test]
fn pointer_domain() {
let pointer = new_pointer_domain("Rax".into(), 0);
let offset = bv(3);
let pointer_plus = new_pointer_domain("Rax".into(), 3);
let pointer_minus = new_pointer_domain("Rax".into(), -3);
assert_eq!(pointer.add_to_offset(&offset), pointer_plus);
assert_eq!(pointer.sub_from_offset(&offset), pointer_minus);
let other_pointer = new_pointer_domain("Rbx".into(), 5);
let merged = pointer.merge(&other_pointer);
assert_eq!(merged.0.len(), 2);
assert_eq!(merged.0.get(&new_id("Rax".into())), Some(&bv(0)));
assert_eq!(merged.0.get(&new_id("Rbx".into())), Some(&bv(5)));
}
}
use super::interprocedural_fixpoint::{Computation, NodeValue};
use crate::abstract_domain::{BitvectorDomain, DataDomain};
use crate::analysis::graph::{Graph, Node};
use crate::term::*;
use crate::utils::log::*;
......@@ -8,8 +9,6 @@ use petgraph::Direction;
use std::collections::HashMap;
mod context;
mod data;
mod identifier;
mod object;
mod object_list;
mod state;
......@@ -17,6 +16,9 @@ mod state;
use context::Context;
use state::State;
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
......
use super::data::*;
use super::identifier::AbstractIdentifier;
use crate::analysis::abstract_domain::*;
use crate::analysis::mem_region::MemRegion;
use super::Data;
use crate::abstract_domain::*;
use crate::bil::Bitvector;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
......@@ -215,7 +213,7 @@ impl AbstractObjectInfo {
fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> {
let mut targets = self.pointer_targets.clone();
for elem in self.memory.iter_values() {
for elem in self.memory.values() {
if let Data::Pointer(pointer) = elem {
for (id, _) in pointer.iter_targets() {
targets.insert(id.clone());
......@@ -233,9 +231,10 @@ impl AbstractObjectInfo {
new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain,
) {
for elem in self.memory.iter_values_mut() {
for elem in self.memory.values_mut() {
elem.replace_abstract_id(old_id, new_id, offset_adjustment);
}
self.memory.clear_top_values();
if self.pointer_targets.get(&old_id).is_some() {
self.pointer_targets.remove(&old_id);
self.pointer_targets.insert(new_id.clone());
......@@ -258,13 +257,14 @@ impl AbstractObjectInfo {
.difference(ids_to_remove)
.cloned()
.collect();
for value in self.memory.iter_values_mut() {
value.remove_ids(ids_to_remove);
for value in self.memory.values_mut() {
value.remove_ids(ids_to_remove); // TODO: This may leave *Top* values in the memory object. Remove them.
}
self.memory.clear_top_values()
}
}
impl AbstractDomain for AbstractObjectInfo {
impl HasTop for AbstractObjectInfo {
fn top(&self) -> Self {
AbstractObjectInfo {
pointer_targets: BTreeSet::new(),
......@@ -274,7 +274,9 @@ impl AbstractDomain for AbstractObjectInfo {
memory: MemRegion::new(self.memory.get_address_bitsize()),
}
}
}
impl AbstractDomain for AbstractObjectInfo {
fn merge(&self, other: &Self) -> Self {
AbstractObjectInfo {
pointer_targets: self
......@@ -288,6 +290,11 @@ impl AbstractDomain for AbstractObjectInfo {
memory: self.memory.merge(&other.memory),
}
}
/// The domain has no *Top* element, thus this function always returns false.
fn is_top(&self) -> bool {
false
}
}
fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T> {
......
use super::data::*;
use super::identifier::AbstractIdentifier;
use super::object::*;
use crate::analysis::abstract_domain::*;
use super::Data;
use crate::abstract_domain::*;
use crate::bil::Bitvector;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
......@@ -97,7 +96,11 @@ impl AbstractObjectList {
/// Returns an error if the gitven address has no targets.
/// If the address has more than one target, all targets are merged to one untracked object.
// TODO: Implement write-merging to still tracked objects!
pub fn set_value(&mut self, pointer: PointerDomain, value: Data) -> Result<(), Error> {
pub fn set_value(
&mut self,
pointer: PointerDomain<BitvectorDomain>,
value: Data,
) -> Result<(), Error> {
let mut target_object_set: BTreeSet<usize> = BTreeSet::new();
for (id, _offset) in pointer.iter_targets() {
target_object_set.insert(self.ids.get(id).unwrap().0);
......@@ -303,9 +306,9 @@ impl AbstractObjectList {
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain,
object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<AbstractIdentifier>> {
let ids = object_pointer.get_target_ids();
let ids: BTreeSet<AbstractIdentifier> = object_pointer.ids().cloned().collect();
let mut possible_double_free_ids = Vec::new();
if ids.len() > 1 {
for id in ids {
......@@ -455,7 +458,6 @@ impl AbstractObjectList {
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::pointer_inference::identifier::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
......
use super::data::*;
use super::identifier::{AbstractIdentifier, AbstractLocation};
use super::object_list::AbstractObjectList;
use crate::analysis::abstract_domain::*;
use super::Data;
use crate::abstract_domain::*;
use crate::bil::*;
use crate::prelude::*;
use crate::term::symbol::ExternSymbol;
......@@ -154,7 +153,7 @@ impl State {
use Expression::*;
match expression {
Var(variable) => self.get_register(&variable),
Const(bitvector) => Ok(Data::bitvector(bitvector.clone())),
Const(bitvector) => Ok(bitvector.clone().into()),
// TODO: implement handling of endianness for loads and writes!
Load {
memory: _,
......@@ -386,7 +385,7 @@ impl State {
// then these values at positive offsets get overshadowed by the new callers,
// but they get not properly merged with the values from the other callers!
if let Data::Pointer(pointer) = address {
let mut new_targets = PointerDomain::with_targets(BTreeMap::new());
let mut new_targets = BTreeMap::new();
for (id, offset) in pointer.iter_targets() {
if *id == self.stack_id {
match offset {
......@@ -395,26 +394,26 @@ impl State {
&& !self.caller_stack_ids.is_empty()
{
for caller_id in self.caller_stack_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone());
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that the id of the current stack frame was *not* added.
} else {
new_targets.add_target(id.clone(), offset.clone());
new_targets.insert(id.clone(), offset.clone());
}
}
BitvectorDomain::Top(_bitsize) => {
for caller_id in self.caller_stack_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone());
new_targets.insert(caller_id.clone(), offset.clone());
}
// Note that we also add the id of the current stack frame
new_targets.add_target(id.clone(), offset.clone());
new_targets.insert(id.clone(), offset.clone());
}
}
} else {
new_targets.add_target(id.clone(), offset.clone());
new_targets.insert(id.clone(), offset.clone());
}
}
Data::Pointer(new_targets)
Data::Pointer(PointerDomain::with_targets(new_targets))
} else {
address.clone()
}
......@@ -517,7 +516,7 @@ impl State {
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &PointerDomain,
object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<AbstractIdentifier>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
......@@ -539,7 +538,7 @@ impl State {
let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove);
register_value.remove_ids(&ids_to_remove); // TODO: This may leave *Top* elements in the register_value map. Should I remove them?
}
self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new();
......
......@@ -7,6 +7,7 @@ Parts of the cwe_checker that are written in Rust.
#[macro_use]
extern crate ocaml;
pub mod abstract_domain;
pub mod analysis;
pub mod bil;
pub mod ffi;
......@@ -21,11 +22,3 @@ mod prelude {
pub use crate::term::Tid;
pub use anyhow::{anyhow, Error};
}
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment