Unverified Commit 281a0207 by Enkelmann Committed by GitHub

Domain refactor (#76)

Code and documentation improvements for all abstract domains
parent 0d2777b0
use super::{AbstractDomain, HasBitSize, HasTop, RegisterDomain};
use crate::bil::*; use crate::bil::*;
use crate::prelude::*; use crate::prelude::*;
use serde::{Deserialize, Serialize};
/// The main trait describing an abstract domain. /// The `BitvectorDomain` is a simple abstract domain describing a bitvector of known length.
/// ///
/// Each abstract domain is partially ordered and has a maximal element (which can be generated by `top()`). /// As values it can only assume a known bitvector or *Top(bitsize)*.
/// Abstract domains of the same type can be merged. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
/// pub enum BitvectorDomain {
/// TODO: Decide if and how to represent intersects and bottom values! Top(BitSize),
pub trait AbstractDomain: Sized + Eq + Clone { Value(Bitvector),
/// The maximal value of a domain. }
/// Usually it indicates a value for which nothing is known.
fn top(&self) -> Self;
impl AbstractDomain for BitvectorDomain {
/// merge two values. Returns *Top* if the values are not equal.
fn merge(&self, other: &Self) -> Self { fn merge(&self, other: &Self) -> Self {
if self == other { if self == other {
self.clone() self.clone()
...@@ -21,48 +21,21 @@ pub trait AbstractDomain: Sized + Eq + Clone { ...@@ -21,48 +21,21 @@ pub trait AbstractDomain: Sized + Eq + Clone {
} }
} }
/// Returns whether the element represents the top element or not. /// Check if the value is *Top*.
fn is_top(&self) -> bool { fn is_top(&self) -> bool {
*self == self.top() matches!(self, Self::Top(_))
} }
} }
/// A trait for abstract domains that represent values that can be loaded into register or written onto the stack. impl HasTop for BitvectorDomain {
/// Every value has a determined and immutable length (in bits). /// Return a *Top* value with the same bitsize as `self`.
pub trait ValueDomain: AbstractDomain { fn top(&self) -> BitvectorDomain {
/// Returns the size of the value in bits BitvectorDomain::Top(self.bitsize())
fn bitsize(&self) -> BitSize;
/// Return a new top element with the given bitsize
fn new_top(bitsize: BitSize) -> Self;
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self;
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self;
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
Self::new_top(high_bit - low_bit) // TODO: This needs a unit test whether the result has the correct bitwidth!
}
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self;
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self {
Self::new_top(self.bitsize() + other.bitsize())
} }
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] impl HasBitSize for BitvectorDomain {
pub enum BitvectorDomain { /// Return the bitsize of `self`.
Top(BitSize),
Value(Bitvector),
}
impl ValueDomain for BitvectorDomain {
fn bitsize(&self) -> BitSize { fn bitsize(&self) -> BitSize {
use BitvectorDomain::*; use BitvectorDomain::*;
match self { match self {
...@@ -70,14 +43,17 @@ impl ValueDomain for BitvectorDomain { ...@@ -70,14 +43,17 @@ impl ValueDomain for BitvectorDomain {
Value(bitvec) => bitvec.width().to_usize() as u16, Value(bitvec) => bitvec.width().to_usize() as u16,
} }
} }
}
impl RegisterDomain for BitvectorDomain {
/// Get a *Top* element with the given bitsize.
fn new_top(bitsize: BitSize) -> BitvectorDomain { fn new_top(bitsize: BitSize) -> BitvectorDomain {
BitvectorDomain::Top(bitsize) BitvectorDomain::Top(bitsize)
} }
/// Evaluate the given binary operation. /// Evaluate the given binary operation.
/// Note that this function assumes that both values have the same bitsize. ///
/// If not, this function should panic. /// For non-shift operations, this function will panic if the operands have different bitsizes.
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self { fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
use BinOpType::*; use BinOpType::*;
match op { match op {
...@@ -186,7 +162,6 @@ impl ValueDomain for BitvectorDomain { ...@@ -186,7 +162,6 @@ impl ValueDomain for BitvectorDomain {
/// Extract a sub-bitvector out of a bitvector /// Extract a sub-bitvector out of a bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self { fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self {
if let BitvectorDomain::Value(bitvec) = self { if let BitvectorDomain::Value(bitvec) = self {
// TODO: Check whether this is correct on a real world example and then write a unit test for it
BitvectorDomain::Value( BitvectorDomain::Value(
bitvec bitvec
.clone() .clone()
...@@ -200,6 +175,7 @@ impl ValueDomain for BitvectorDomain { ...@@ -200,6 +175,7 @@ impl ValueDomain for BitvectorDomain {
} }
} }
/// Perform a size-changing cast on a bitvector.
fn cast(&self, kind: CastType, width: BitSize) -> Self { fn cast(&self, kind: CastType, width: BitSize) -> Self {
if let BitvectorDomain::Value(bitvec) = self { if let BitvectorDomain::Value(bitvec) = self {
use CastType::*; use CastType::*;
...@@ -227,6 +203,7 @@ impl ValueDomain for BitvectorDomain { ...@@ -227,6 +203,7 @@ impl ValueDomain for BitvectorDomain {
} }
} }
/// Concatenate two bitvectors.
fn concat(&self, other: &Self) -> Self { fn concat(&self, other: &Self) -> Self {
match (self, other) { match (self, other) {
(BitvectorDomain::Value(left_bitvec), BitvectorDomain::Value(right_bitvec)) => { (BitvectorDomain::Value(left_bitvec), BitvectorDomain::Value(right_bitvec)) => {
...@@ -245,17 +222,10 @@ impl ValueDomain for BitvectorDomain { ...@@ -245,17 +222,10 @@ impl ValueDomain for BitvectorDomain {
} }
} }
impl AbstractDomain for BitvectorDomain {
fn top(&self) -> BitvectorDomain {
BitvectorDomain::Top(self.bitsize())
}
}
impl std::ops::Add for BitvectorDomain { impl std::ops::Add for BitvectorDomain {
type Output = BitvectorDomain; type Output = BitvectorDomain;
fn add(self, rhs: Self) -> Self { fn add(self, rhs: Self) -> Self {
assert_eq!(self.bitsize(), rhs.bitsize());
self.bin_op(crate::bil::BinOpType::PLUS, &rhs) self.bin_op(crate::bil::BinOpType::PLUS, &rhs)
} }
} }
...@@ -264,7 +234,6 @@ impl std::ops::Sub for BitvectorDomain { ...@@ -264,7 +234,6 @@ impl std::ops::Sub for BitvectorDomain {
type Output = BitvectorDomain; type Output = BitvectorDomain;
fn sub(self, rhs: Self) -> Self { fn sub(self, rhs: Self) -> Self {
assert_eq!(self.bitsize(), rhs.bitsize());
self.bin_op(crate::bil::BinOpType::MINUS, &rhs) self.bin_op(crate::bil::BinOpType::MINUS, &rhs)
} }
} }
...@@ -283,6 +252,16 @@ impl std::convert::From<Bitvector> for BitvectorDomain { ...@@ -283,6 +252,16 @@ impl std::convert::From<Bitvector> for BitvectorDomain {
} }
} }
impl std::convert::TryFrom<&BitvectorDomain> for Bitvector {
type Error = ();
fn try_from(bitvec_domain: &BitvectorDomain) -> Result<Bitvector, ()> {
match bitvec_domain {
BitvectorDomain::Value(bitvec) => Ok(bitvec.clone()),
BitvectorDomain::Top(_) => Err(()),
}
}
}
impl std::fmt::Display for BitvectorDomain { impl std::fmt::Display for BitvectorDomain {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self { match self {
......
...@@ -3,18 +3,29 @@ use crate::prelude::*; ...@@ -3,18 +3,29 @@ use crate::prelude::*;
use derive_more::Deref; use derive_more::Deref;
use std::sync::Arc; use std::sync::Arc;
// TODO: Right now abstract locations are used as giving the location where a pointer to an object is located. /// An abstract identifier is used to identify an object or a value in an abstract state.
// But it could also be used to point into the object (at offset 0). ///
// Can I solve this possible ambivalence in intended usage in a way such that accidentally wrong usage is prevented? /// Since many program states can be represented by the same abstract state in data-flow analysis,
// If not, I have to document the intended usage with a big warning sign. /// one sometimes needs a way to uniquely identify a variable or a memory object in all of the represented program states.
/// Abstract identifier achieve this by identifying a *time*, i.e. a specific abstract state,
/// and a *location*, i.e. a recipe for abstracting a concrete value from any concrete state that is represented by the abstract state.
/// The value in question then serves as the identifier.
/// For example, a pointer may uniquely determine the memory object it is pointing to.
/// Or a value may represent the value of a variable at a certain time,
/// whereas the value of the variable in the current state is given as an offset to the value at the identified time.
///
/// Since program points may be visited several times during an execution trace (e.g. in loops),
/// the *time* component of an abstract identifier may not actually determine an unique point in time of an execution trace.
/// In this case the meaning of an abstract identifier depends upon its use case.
/// E.g. it may represent the union of all values at the specific *location* for each time the program point is visited during an execution trace
/// or it may only represent the value at the last time the program point was visited.
///
/// An abstract identifier is given by a time identifier and a location identifier. /// An abstract identifier is given by a time identifier and a location identifier.
/// ///
/// For the location identifier see `AbstractLocation`. /// For the location identifier see `AbstractLocation`.
/// The time identifier is given by a `Tid`. /// The time identifier is given by a `Tid`.
/// If it is the Tid of a basic block, then it describes the point in time *before* execution of the first instruction in the block. /// If it is the `Tid` of a basic block, then it describes the point in time *before* execution of the first instruction in the block.
/// If it is the Tid of a Def or Jmp, then it describes the point in time *after* the execution of the Def or Jmp. /// If it is the `Tid` of a `Def` or `Jmp`, then it describes the point in time *after* the execution of the `Def` or `Jmp`.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Deref)]
#[deref(forward)] #[deref(forward)]
pub struct AbstractIdentifier(Arc<AbstractIdentifierData>); pub struct AbstractIdentifier(Arc<AbstractIdentifierData>);
...@@ -75,6 +86,11 @@ impl AbstractLocation { ...@@ -75,6 +86,11 @@ impl AbstractLocation {
} }
} }
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
/// or an offset to a pointer to another memory location,
/// where the value can be found by (recursively) following the embedded `target` memory location.
///
/// The offset and size variables are given in bytes.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum AbstractMemoryLocation { pub enum AbstractMemoryLocation {
Location { Location {
......
//! This module defines traits describing general properties of abstract domains
//! as well as several abstract domain types implementing these traits.
use crate::bil::*;
mod bitvector;
pub use bitvector::*;
mod identifier;
pub use identifier::*;
mod pointer;
pub use pointer::*;
mod data;
pub use data::*;
mod mem_region;
pub use mem_region::*;
/// The main trait describing an abstract domain.
///
/// Each abstract domain is partially ordered and has a maximal element (which can be generated by `top()`).
/// Abstract domains of the same type can be merged.
pub trait AbstractDomain: Sized + Eq + Clone {
fn merge(&self, other: &Self) -> Self;
/// Returns whether the element represents the top element or not.
fn is_top(&self) -> bool;
}
/// A trait for types representing values with a fixed size (in bits).
///
/// For abstract domains, the bitsize is a parameter of the domain itself,
/// i.e. you cannot merge values of different bitsizes,
/// since they lie in different posets (one for each bitsize).
pub trait HasBitSize {
/// Return the size of the represented value in bits.
fn bitsize(&self) -> BitSize;
}
/// An abstract domain implementing this trait has a global maximum, i.e. a *Top* element.
pub trait HasTop {
/// Return an instance of the *Top* element.
///
/// Since an abstract domain type may represent a whole family of abstract domains,
/// this function takes an instance of the domain as a parameter,
/// so it can return the *Top* element of the same family member that the provided instance belongs to.
fn top(&self) -> Self;
}
/// A trait for abstract domains that can represent values loaded into CPU register.
///
/// The domain implements all general operations used to manipulate register values.
/// The domain is parametrized by its bitsize (which represents the size of the register).
/// It has a *Top* element, which is only characterized by its bitsize.
pub trait RegisterDomain: AbstractDomain + HasBitSize + HasTop {
/// Return a new top element with the given bitsize
fn new_top(bitsize: BitSize) -> Self;
/// Compute the (abstract) result of a binary operation
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self;
/// Compute the (abstract) result of a unary operation
fn un_op(&self, op: UnOpType) -> Self;
/// extract a sub-bitvector
fn extract(&self, low_bit: BitSize, high_bit: BitSize) -> Self;
/// Extend a bitvector using the given cast type
fn cast(&self, kind: CastType, width: BitSize) -> Self;
/// Concatenate two bitvectors
fn concat(&self, other: &Self) -> Self;
}
use super::{AbstractDomain, AbstractIdentifier, HasBitSize, RegisterDomain};
use crate::bil::BinOpType;
use crate::prelude::*;
use std::collections::BTreeMap;
use std::fmt::Display;
/// An abstract value representing a pointer given as a map from an abstract identifier
/// to the offset in the pointed to object. The offset itself is also a member of an abstract domain.
///
/// If the map contains more than one key,
/// it indicates that the pointer may point to any of the contained objects.
///
/// A `PointerDomain` value always has at least one target.
/// Trying to create a pointer without targets should always lead to panics.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct PointerDomain<T: RegisterDomain>(BTreeMap<AbstractIdentifier, T>);
impl<T: RegisterDomain> AbstractDomain for PointerDomain<T> {
/// Merge two pointers.
///
/// The merged pointer contains all targets of `self` and `other`.
/// For targets, that are contained in both, the offsets are merged.
fn merge(&self, other: &Self) -> Self {
let mut merged_map = self.0.clone();
for (location, offset) in other.0.iter() {
if merged_map.contains_key(location) {
merged_map.insert(location.clone(), merged_map[location].merge(offset));
} else {
merged_map.insert(location.clone(), offset.clone());
}
}
PointerDomain(merged_map)
}
/// Returns false, as PointerDomain has no *Top* element.
fn is_top(&self) -> bool {
false
}
}
impl<T: RegisterDomain> HasBitSize for PointerDomain<T> {
/// Return the bitsize of the pointer.
/// Should always equal the pointer size of the CPU architecture.
fn bitsize(&self) -> BitSize {
self.0
.values()
.next()
.expect("Pointer without targets encountered")
.bitsize()
}
}
impl<T: RegisterDomain> PointerDomain<T> {
/// Create a new pointer with exactly one target.
pub fn new(target: AbstractIdentifier, offset: T) -> PointerDomain<T> {
let mut map = BTreeMap::new();
map.insert(target, offset);
PointerDomain(map)
}
/// Create a new pointer with a set of targets. Panics if no targets are provided.
pub fn with_targets(targets: BTreeMap<AbstractIdentifier, T>) -> PointerDomain<T> {
assert!(!targets.is_empty());
PointerDomain(targets)
}
/// Add a new target to the pointer.
/// If the pointer already contains a target with the same abstract identifier, the offsets of both targets get merged.
pub fn add_target(&mut self, target: AbstractIdentifier, offset: T) {
if let Some(old_offset) = self.0.get(&target) {
let merged_offset = old_offset.merge(&offset);
self.0.insert(target, merged_offset);
} else {
self.0.insert(target, offset);
}
}
/// Replace an abstract identifier with another one and add the offset_adjustment to the pointer offset.
/// This is needed to adjust stack pointer on call and return instructions.
pub fn replace_abstract_id(
&mut self,
old_id: &AbstractIdentifier,
new_id: &AbstractIdentifier,
offset_adjustment: &T,
) {
if let Some(old_offset) = self.0.get(&old_id) {
let new_offset = old_offset.bin_op(BinOpType::PLUS, offset_adjustment);
self.0.remove(old_id);
self.0.insert(new_id.clone(), new_offset);
}
}
/// add a value to the offset
pub fn add_to_offset(&self, value: &T) -> PointerDomain<T> {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::PLUS, value);
}
result
}
/// subtract a value from the offset
pub fn sub_from_offset(&self, value: &T) -> PointerDomain<T> {
let mut result = self.clone();
for offset in result.0.values_mut() {
*offset = offset.bin_op(BinOpType::MINUS, value);
}
result
}
/// Get an iterator over all possible abstract targets (together with the offset in the target) the pointer may point to.
pub fn iter_targets(&self) -> std::collections::btree_map::Iter<AbstractIdentifier, T> {
self.0.iter()
}
/// Get an iterator over all abstract IDs that the pointer may target.
pub fn ids(&self) -> std::collections::btree_map::Keys<AbstractIdentifier, T> {
self.0.keys()
}
}
impl<T: RegisterDomain + Display> PointerDomain<T> {
/// Get a more compact json-representation of the pointer.
/// Intended for pretty printing, not useable for serialization/deserialization.
pub fn to_json_compact(&self) -> serde_json::Value {
serde_json::Value::Object(
self.0
.iter()
.map(|(id, offset)| {
(
format!("{}", id),
serde_json::Value::String(format!("{}", offset)),
)
})
.collect(),
)
}
}
#[cfg(test)]
mod tests {
use super::super::{AbstractLocation, BitvectorDomain};
use super::*;
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("time0"),
AbstractLocation::Register(name.into(), 64),
)
}
fn new_pointer_domain(location: &str, offset: i64) -> PointerDomain<BitvectorDomain> {
let id = new_id(location);
PointerDomain::new(id, bv(offset))
}
#[test]
fn pointer_domain() {
let pointer = new_pointer_domain("Rax".into(), 0);
let offset = bv(3);
let pointer_plus = new_pointer_domain("Rax".into(), 3);
let pointer_minus = new_pointer_domain("Rax".into(), -3);
assert_eq!(pointer.add_to_offset(&offset), pointer_plus);
assert_eq!(pointer.sub_from_offset(&offset), pointer_minus);
let other_pointer = new_pointer_domain("Rbx".into(), 5);
let merged = pointer.merge(&other_pointer);
assert_eq!(merged.0.len(), 2);
assert_eq!(merged.0.get(&new_id("Rax".into())), Some(&bv(0)));
assert_eq!(merged.0.get(&new_id("Rbx".into())), Some(&bv(5)));
}
#[test]
fn replace_abstract_id() {
let mut targets = BTreeMap::new();
targets.insert(new_id("Rax"), bv(5));
targets.insert(new_id("Rbx"), bv(7));
let mut pointer = PointerDomain::with_targets(targets);
pointer.replace_abstract_id(&new_id("Rax"), &new_id("replacement"), &bv(5));
let mut new_targets = BTreeMap::new();
new_targets.insert(new_id("replacement"), bv(10));
new_targets.insert(new_id("Rbx"), bv(7));
assert_eq!(pointer.0, new_targets);
}
}
pub mod abstract_domain;
pub mod fixpoint; pub mod fixpoint;
pub mod graph; pub mod graph;
pub mod interprocedural_fixpoint; pub mod interprocedural_fixpoint;
pub mod mem_region;
pub mod pointer_inference; pub mod pointer_inference;
use crate::analysis::abstract_domain::*; use crate::abstract_domain::*;
use crate::analysis::graph::Graph; use crate::analysis::graph::Graph;
use crate::bil::Expression; use crate::bil::Expression;
use crate::prelude::*; use crate::prelude::*;
...@@ -7,9 +7,8 @@ use crate::term::*; ...@@ -7,9 +7,8 @@ use crate::term::*;
use crate::utils::log::*; use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use super::data::Data;
use super::identifier::*;
use super::state::State; use super::state::State;
use super::Data;
pub struct Context<'a> { pub struct Context<'a> {
pub graph: Graph<'a>, pub graph: Graph<'a>,
...@@ -202,7 +201,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> ...@@ -202,7 +201,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
self.log_debug( self.log_debug(
callee_state.set_register( callee_state.set_register(
&self.project.stack_pointer_register, &self.project.stack_pointer_register,
super::data::PointerDomain::new( PointerDomain::new(
callee_stack_id.clone(), callee_stack_id.clone(),
Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap()) Bitvector::zero(apint::BitWidth::new(address_bitsize as usize).unwrap())
.into(), .into(),
...@@ -311,7 +310,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> ...@@ -311,7 +310,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
self.log_debug( self.log_debug(
new_state.set_register( new_state.set_register(
stack_register, stack_register,
stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &Data::bitvector(offset)), stack_pointer.bin_op(crate::bil::BinOpType::PLUS, &offset.into()),
), ),
Some(&call.tid), Some(&call.tid),
); );
...@@ -371,7 +370,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a> ...@@ -371,7 +370,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Problem<'a> for Context<'a>
super::object::ObjectType::Heap, super::object::ObjectType::Heap,
address_bitsize, address_bitsize,
); );
let pointer = super::data::PointerDomain::new( let pointer = PointerDomain::new(
object_id, object_id,
Bitvector::zero((address_bitsize as usize).into()).into(), Bitvector::zero((address_bitsize as usize).into()).into(),
); );
...@@ -597,7 +596,7 @@ mod tests { ...@@ -597,7 +596,7 @@ mod tests {
#[test] #[test]
fn context_problem_implementation() { fn context_problem_implementation() {
use crate::analysis::interprocedural_fixpoint::Problem; use crate::analysis::interprocedural_fixpoint::Problem;
use crate::analysis::pointer_inference::data::*; use crate::analysis::pointer_inference::Data;
use crate::bil::*; use crate::bil::*;
use Expression::*; use Expression::*;
...@@ -765,8 +764,8 @@ mod tests { ...@@ -765,8 +764,8 @@ mod tests {
#[test] #[test]
fn update_return() { fn update_return() {
use crate::analysis::interprocedural_fixpoint::Problem; use crate::analysis::interprocedural_fixpoint::Problem;
use crate::analysis::pointer_inference::data::*;
use crate::analysis::pointer_inference::object::ObjectType; use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let project = mock_project(); let project = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded(); let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded(); let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
......
use super::interprocedural_fixpoint::{Computation, NodeValue}; use super::interprocedural_fixpoint::{Computation, NodeValue};
use crate::abstract_domain::{BitvectorDomain, DataDomain};
use crate::analysis::graph::{Graph, Node}; use crate::analysis::graph::{Graph, Node};
use crate::term::*; use crate::term::*;
use crate::utils::log::*; use crate::utils::log::*;
...@@ -8,8 +9,6 @@ use petgraph::Direction; ...@@ -8,8 +9,6 @@ use petgraph::Direction;
use std::collections::HashMap; use std::collections::HashMap;
mod context; mod context;
mod data;
mod identifier;
mod object; mod object;
mod object_list; mod object_list;
mod state; mod state;
...@@ -17,6 +16,9 @@ mod state; ...@@ -17,6 +16,9 @@ mod state;
use context::Context; use context::Context;
use state::State; use state::State;
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
pub struct PointerInference<'a> { pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>, computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>, log_collector: crossbeam_channel::Sender<LogMessage>,
......
use super::data::*; use super::Data;
use super::identifier::AbstractIdentifier; use crate::abstract_domain::*;
use crate::analysis::abstract_domain::*;
use crate::analysis::mem_region::MemRegion;
use crate::bil::Bitvector; use crate::bil::Bitvector;
use crate::prelude::*; use crate::prelude::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
...@@ -215,7 +213,7 @@ impl AbstractObjectInfo { ...@@ -215,7 +213,7 @@ impl AbstractObjectInfo {
fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> { fn get_all_possible_pointer_targets(&self) -> BTreeSet<AbstractIdentifier> {
let mut targets = self.pointer_targets.clone(); let mut targets = self.pointer_targets.clone();
for elem in self.memory.iter_values() { for elem in self.memory.values() {
if let Data::Pointer(pointer) = elem { if let Data::Pointer(pointer) = elem {
for (id, _) in pointer.iter_targets() { for (id, _) in pointer.iter_targets() {
targets.insert(id.clone()); targets.insert(id.clone());
...@@ -233,9 +231,10 @@ impl AbstractObjectInfo { ...@@ -233,9 +231,10 @@ impl AbstractObjectInfo {
new_id: &AbstractIdentifier, new_id: &AbstractIdentifier,
offset_adjustment: &BitvectorDomain, offset_adjustment: &BitvectorDomain,
) { ) {
for elem in self.memory.iter_values_mut() { for elem in self.memory.values_mut() {
elem.replace_abstract_id(old_id, new_id, offset_adjustment); elem.replace_abstract_id(old_id, new_id, offset_adjustment);
} }
self.memory.clear_top_values();
if self.pointer_targets.get(&old_id).is_some() { if self.pointer_targets.get(&old_id).is_some() {
self.pointer_targets.remove(&old_id); self.pointer_targets.remove(&old_id);
self.pointer_targets.insert(new_id.clone()); self.pointer_targets.insert(new_id.clone());
...@@ -258,13 +257,14 @@ impl AbstractObjectInfo { ...@@ -258,13 +257,14 @@ impl AbstractObjectInfo {
.difference(ids_to_remove) .difference(ids_to_remove)
.cloned() .cloned()
.collect(); .collect();
for value in self.memory.iter_values_mut() { for value in self.memory.values_mut() {
value.remove_ids(ids_to_remove); value.remove_ids(ids_to_remove); // TODO: This may leave *Top* values in the memory object. Remove them.
} }
self.memory.clear_top_values()
} }
} }
impl AbstractDomain for AbstractObjectInfo { impl HasTop for AbstractObjectInfo {
fn top(&self) -> Self { fn top(&self) -> Self {
AbstractObjectInfo { AbstractObjectInfo {
pointer_targets: BTreeSet::new(), pointer_targets: BTreeSet::new(),
...@@ -274,7 +274,9 @@ impl AbstractDomain for AbstractObjectInfo { ...@@ -274,7 +274,9 @@ impl AbstractDomain for AbstractObjectInfo {
memory: MemRegion::new(self.memory.get_address_bitsize()), memory: MemRegion::new(self.memory.get_address_bitsize()),
} }
} }
}
impl AbstractDomain for AbstractObjectInfo {
fn merge(&self, other: &Self) -> Self { fn merge(&self, other: &Self) -> Self {
AbstractObjectInfo { AbstractObjectInfo {
pointer_targets: self pointer_targets: self
...@@ -288,6 +290,11 @@ impl AbstractDomain for AbstractObjectInfo { ...@@ -288,6 +290,11 @@ impl AbstractDomain for AbstractObjectInfo {
memory: self.memory.merge(&other.memory), memory: self.memory.merge(&other.memory),
} }
} }
/// The domain has no *Top* element, thus this function always returns false.
fn is_top(&self) -> bool {
false
}
} }
fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T> { fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T> {
......
use super::data::*;
use super::identifier::AbstractIdentifier;
use super::object::*; use super::object::*;
use crate::analysis::abstract_domain::*; use super::Data;
use crate::abstract_domain::*;
use crate::bil::Bitvector; use crate::bil::Bitvector;
use crate::prelude::*; use crate::prelude::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
...@@ -97,7 +96,11 @@ impl AbstractObjectList { ...@@ -97,7 +96,11 @@ impl AbstractObjectList {
/// Returns an error if the gitven address has no targets. /// Returns an error if the gitven address has no targets.
/// If the address has more than one target, all targets are merged to one untracked object. /// If the address has more than one target, all targets are merged to one untracked object.
// TODO: Implement write-merging to still tracked objects! // TODO: Implement write-merging to still tracked objects!
pub fn set_value(&mut self, pointer: PointerDomain, value: Data) -> Result<(), Error> { pub fn set_value(
&mut self,
pointer: PointerDomain<BitvectorDomain>,
value: Data,
) -> Result<(), Error> {
let mut target_object_set: BTreeSet<usize> = BTreeSet::new(); let mut target_object_set: BTreeSet<usize> = BTreeSet::new();
for (id, _offset) in pointer.iter_targets() { for (id, _offset) in pointer.iter_targets() {
target_object_set.insert(self.ids.get(id).unwrap().0); target_object_set.insert(self.ids.get(id).unwrap().0);
...@@ -303,9 +306,9 @@ impl AbstractObjectList { ...@@ -303,9 +306,9 @@ impl AbstractObjectList {
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status. /// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
pub fn mark_mem_object_as_freed( pub fn mark_mem_object_as_freed(
&mut self, &mut self,
object_pointer: &PointerDomain, object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<AbstractIdentifier>> { ) -> Result<(), Vec<AbstractIdentifier>> {
let ids = object_pointer.get_target_ids(); let ids: BTreeSet<AbstractIdentifier> = object_pointer.ids().cloned().collect();
let mut possible_double_free_ids = Vec::new(); let mut possible_double_free_ids = Vec::new();
if ids.len() > 1 { if ids.len() > 1 {
for id in ids { for id in ids {
...@@ -455,7 +458,6 @@ impl AbstractObjectList { ...@@ -455,7 +458,6 @@ impl AbstractObjectList {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::analysis::pointer_inference::identifier::*;
fn bv(value: i64) -> BitvectorDomain { fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value)) BitvectorDomain::Value(Bitvector::from_i64(value))
......
use super::data::*;
use super::identifier::{AbstractIdentifier, AbstractLocation};
use super::object_list::AbstractObjectList; use super::object_list::AbstractObjectList;
use crate::analysis::abstract_domain::*; use super::Data;
use crate::abstract_domain::*;
use crate::bil::*; use crate::bil::*;
use crate::prelude::*; use crate::prelude::*;
use crate::term::symbol::ExternSymbol; use crate::term::symbol::ExternSymbol;
...@@ -154,7 +153,7 @@ impl State { ...@@ -154,7 +153,7 @@ impl State {
use Expression::*; use Expression::*;
match expression { match expression {
Var(variable) => self.get_register(&variable), Var(variable) => self.get_register(&variable),
Const(bitvector) => Ok(Data::bitvector(bitvector.clone())), Const(bitvector) => Ok(bitvector.clone().into()),
// TODO: implement handling of endianness for loads and writes! // TODO: implement handling of endianness for loads and writes!
Load { Load {
memory: _, memory: _,
...@@ -386,7 +385,7 @@ impl State { ...@@ -386,7 +385,7 @@ impl State {
// then these values at positive offsets get overshadowed by the new callers, // then these values at positive offsets get overshadowed by the new callers,
// but they get not properly merged with the values from the other callers! // but they get not properly merged with the values from the other callers!
if let Data::Pointer(pointer) = address { if let Data::Pointer(pointer) = address {
let mut new_targets = PointerDomain::with_targets(BTreeMap::new()); let mut new_targets = BTreeMap::new();
for (id, offset) in pointer.iter_targets() { for (id, offset) in pointer.iter_targets() {
if *id == self.stack_id { if *id == self.stack_id {
match offset { match offset {
...@@ -395,26 +394,26 @@ impl State { ...@@ -395,26 +394,26 @@ impl State {
&& !self.caller_stack_ids.is_empty() && !self.caller_stack_ids.is_empty()
{ {
for caller_id in self.caller_stack_ids.iter() { for caller_id in self.caller_stack_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone()); new_targets.insert(caller_id.clone(), offset.clone());
} }
// Note that the id of the current stack frame was *not* added. // Note that the id of the current stack frame was *not* added.
} else { } else {
new_targets.add_target(id.clone(), offset.clone()); new_targets.insert(id.clone(), offset.clone());
} }
} }
BitvectorDomain::Top(_bitsize) => { BitvectorDomain::Top(_bitsize) => {
for caller_id in self.caller_stack_ids.iter() { for caller_id in self.caller_stack_ids.iter() {
new_targets.add_target(caller_id.clone(), offset.clone()); new_targets.insert(caller_id.clone(), offset.clone());
} }
// Note that we also add the id of the current stack frame // Note that we also add the id of the current stack frame
new_targets.add_target(id.clone(), offset.clone()); new_targets.insert(id.clone(), offset.clone());
} }
} }
} else { } else {
new_targets.add_target(id.clone(), offset.clone()); new_targets.insert(id.clone(), offset.clone());
} }
} }
Data::Pointer(new_targets) Data::Pointer(PointerDomain::with_targets(new_targets))
} else { } else {
address.clone() address.clone()
} }
...@@ -517,7 +516,7 @@ impl State { ...@@ -517,7 +516,7 @@ impl State {
/// an error with the list of possibly already freed objects is returned. /// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed( pub fn mark_mem_object_as_freed(
&mut self, &mut self,
object_pointer: &PointerDomain, object_pointer: &PointerDomain<BitvectorDomain>,
) -> Result<(), Vec<AbstractIdentifier>> { ) -> Result<(), Vec<AbstractIdentifier>> {
self.memory.mark_mem_object_as_freed(object_pointer) self.memory.mark_mem_object_as_freed(object_pointer)
} }
...@@ -539,7 +538,7 @@ impl State { ...@@ -539,7 +538,7 @@ impl State {
let mut ids_to_remove = self.caller_stack_ids.clone(); let mut ids_to_remove = self.caller_stack_ids.clone();
ids_to_remove.remove(caller_id); ids_to_remove.remove(caller_id);
for register_value in self.register.values_mut() { for register_value in self.register.values_mut() {
register_value.remove_ids(&ids_to_remove); register_value.remove_ids(&ids_to_remove); // TODO: This may leave *Top* elements in the register_value map. Should I remove them?
} }
self.memory.remove_ids(&ids_to_remove); self.memory.remove_ids(&ids_to_remove);
self.caller_stack_ids = BTreeSet::new(); self.caller_stack_ids = BTreeSet::new();
......
...@@ -7,6 +7,7 @@ Parts of the cwe_checker that are written in Rust. ...@@ -7,6 +7,7 @@ Parts of the cwe_checker that are written in Rust.
#[macro_use] #[macro_use]
extern crate ocaml; extern crate ocaml;
pub mod abstract_domain;
pub mod analysis; pub mod analysis;
pub mod bil; pub mod bil;
pub mod ffi; pub mod ffi;
...@@ -21,11 +22,3 @@ mod prelude { ...@@ -21,11 +22,3 @@ mod prelude {
pub use crate::term::Tid; pub use crate::term::Tid;
pub use anyhow::{anyhow, Error}; pub use anyhow::{anyhow, Error};
} }
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment