Unverified Commit 2f0c1e49 by Enkelmann Committed by GitHub

Pointer inference cleanup (#328)

parent 3cc6cf5b
......@@ -228,9 +228,6 @@
"calloc",
"realloc",
"xmalloc"
],
"deallocation_symbols": [
"free"
]
},
"StringAbstraction": {
......
......@@ -7,7 +7,6 @@ use crate::utils::log::*;
use std::collections::{BTreeMap, BTreeSet};
use super::state::State;
use super::ValueDomain;
use super::{Config, Data, VERSION};
/// Contains methods of the `Context` struct that deal with the manipulation of abstract IDs.
......@@ -36,8 +35,6 @@ pub struct Context<'a> {
pub log_collector: crossbeam_channel::Sender<LogThreadMsg>,
/// Names of `malloc`-like extern functions.
pub allocation_symbols: Vec<String>,
/// Names of `free`-like extern functions.
pub deallocation_symbols: Vec<String>,
}
impl<'a> Context<'a> {
......@@ -55,7 +52,6 @@ impl<'a> Context<'a> {
fn_signatures: analysis_results.function_signatures.unwrap(),
log_collector,
allocation_symbols: config.allocation_symbols,
deallocation_symbols: config.deallocation_symbols,
}
}
......@@ -120,61 +116,16 @@ impl<'a> Context<'a> {
}
}
/// If the given `extern_symbol` is a call to a known allocation function
/// return the size of the memory object allocated by it.
///
/// The function returns a `Top` element if the size could not be determined.
/// Known allocation functions: `malloc`, `realloc`, `calloc`.
fn get_allocation_size_of_alloc_call(
&self,
state: &State,
extern_symbol: &ExternSymbol,
) -> ValueDomain {
let address_bytesize = self.project.get_pointer_bytesize();
let object_size = match extern_symbol.name.as_str() {
"malloc" => {
let size_parameter = extern_symbol.parameters.get(0).unwrap();
state
.eval_parameter_arg(size_parameter, &self.project.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize))
}
"realloc" => {
let size_parameter = extern_symbol.parameters.get(1).unwrap();
state
.eval_parameter_arg(size_parameter, &self.project.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize))
}
"calloc" => {
let size_param1 = extern_symbol.parameters.get(0).unwrap();
let size_param2 = extern_symbol.parameters.get(1).unwrap();
let param1_value = state
.eval_parameter_arg(size_param1, &self.project.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize));
let param2_value = state
.eval_parameter_arg(size_param2, &self.project.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize));
param1_value.bin_op(BinOpType::IntMult, &param2_value)
}
_ => DataDomain::new_top(address_bytesize),
};
object_size
.get_if_absolute_value()
.cloned()
.unwrap_or_else(|| ValueDomain::new_top(address_bytesize))
}
/// Add a new abstract object and a pointer to it in the return register of an extern call.
/// This models the behaviour of `malloc`-like functions,
/// except that we cannot represent possible `NULL` pointers as return values yet.
fn add_new_object_in_call_return_register(
&self,
state: &State,
mut new_state: State,
mut state: State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) -> State {
let address_bytesize = self.project.get_pointer_bytesize();
let object_size = self.get_allocation_size_of_alloc_call(state, extern_symbol);
match extern_symbol.get_unique_return_register() {
Ok(return_register) => {
......@@ -182,168 +133,22 @@ impl<'a> Context<'a> {
call.tid.clone(),
AbstractLocation::from_var(return_register).unwrap(),
);
new_state.memory.add_abstract_object(
state.memory.add_abstract_object(
object_id.clone(),
address_bytesize,
Some(super::object::ObjectType::Heap),
);
new_state.memory.set_lower_index_bound(
&object_id,
&Bitvector::zero(address_bytesize.into()).into(),
);
new_state.memory.set_upper_index_bound(
&object_id,
&(object_size - Bitvector::one(address_bytesize.into()).into()),
);
let pointer = Data::from_target(
object_id,
Bitvector::zero(apint::BitWidth::from(address_bytesize)).into(),
);
new_state.set_register(return_register, pointer);
new_state
state.set_register(return_register, pointer);
state
}
Err(err) => {
// We cannot track the new object, since we do not know where to store the pointer to it.
self.log_debug(Err(err), Some(&call.tid));
new_state
}
}
}
/// Mark the object that the parameter of a call is pointing to as freed.
/// If the object may have been already freed, generate a CWE warning.
/// This models the behaviour of `free` and similar functions.
fn mark_parameter_object_as_freed(
&self,
state: &State,
mut new_state: State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) -> State {
match extern_symbol.get_unique_parameter() {
Ok(parameter) => {
let parameter_value =
state.eval_parameter_arg(parameter, &self.project.runtime_memory_image);
match parameter_value {
Ok(memory_object_pointer) => {
if let Err(possible_double_frees) =
new_state.mark_mem_object_as_freed(&memory_object_pointer)
{
let warning = CweWarning {
name: "CWE415".to_string(),
version: VERSION.to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: vec![possible_double_frees
.into_iter()
.map(|(id, err)| format!("{}: {}", id, err))
.collect()],
description: format!(
"(Double Free) Object may have been freed before at {}",
call.tid.address
),
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
new_state.remove_unreferenced_objects();
new_state
}
Err(err) => {
self.log_debug(Err(err), Some(&call.tid));
new_state
}
}
}
Err(err) => {
// We do not know which memory object to free
self.log_debug(Err(err), Some(&call.tid));
new_state
}
}
}
/// Check all parameter registers of a call for dangling pointers and report possible use-after-frees.
fn check_parameter_register_for_dangling_pointer<'iter, I>(
&self,
state: &mut State,
call: &Term<Jmp>,
parameters: I,
) where
I: Iterator<Item = &'iter Arg>,
{
for parameter in parameters {
match state.eval_parameter_arg(parameter, &self.project.runtime_memory_image) {
Ok(value) => {
if state.memory.is_dangling_pointer(&value, true) {
state
.memory
.mark_dangling_pointer_targets_as_flagged(&value);
let warning = CweWarning {
name: "CWE416".to_string(),
version: VERSION.to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Call at {} may access freed memory",
call.tid.address
),
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
}
Err(err) => self.log_debug(
Err(err.context(format!(
"Function parameter {:?} could not be evaluated",
parameter
))),
Some(&call.tid),
),
}
}
}
/// Check whether a parameter of a call to an extern symbol may point outside of the bounds of a memory object.
/// If yes, generate a CWE-warning,
/// since the pointer may be used for an out-of-bounds memory access by the function.
fn check_parameter_register_for_out_of_bounds_pointer(
&self,
state: &State,
call: &Term<Jmp>,
extern_symbol: &ExternSymbol,
) {
for parameter in extern_symbol.parameters.iter() {
match state.eval_parameter_arg(parameter, &self.project.runtime_memory_image) {
Ok(data) => {
if state.pointer_contains_out_of_bounds_target(
&data,
&self.project.runtime_memory_image,
) {
let warning = CweWarning {
name: "CWE119".to_string(),
version: VERSION.to_string(),
addresses: vec![call.tid.address.clone()],
tids: vec![format!("{}", call.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Buffer Overflow) Call to {} at {} may access out-of-bounds memory",
extern_symbol.name,
call.tid.address
),
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
}
Err(err) => self.log_debug(
Err(err.context(format!(
"Function parameter {:?} could not be evaluated",
parameter
))),
Some(&call.tid),
),
state
}
}
}
......
use super::super::ValueDomain;
use super::*;
fn bv(value: i64) -> ValueDomain {
......@@ -56,7 +57,6 @@ fn mock_project() -> (Project, Config) {
project,
Config {
allocation_symbols: vec!["malloc".into()],
deallocation_symbols: vec!["free".into()],
},
)
}
......
......@@ -16,22 +16,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
/// Update the state according to the effects of the given `Def` term.
fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Option<Self::Value> {
let mut new_state = state.clone();
// first check for use-after-frees
if new_state.contains_access_of_dangling_memory(&def.term) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: VERSION.to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Access through a dangling pointer at {}",
def.tid.address
),
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
// check for null dereferences
match new_state.check_def_for_null_dereferences(def) {
Err(_) => {
......@@ -41,38 +25,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
Ok(true) => self.report_null_deref(&def.tid),
Ok(false) => (), // no null dereference detected
}
// check for out-of-bounds memory access
if new_state
.contains_out_of_bounds_mem_access(&def.term, &self.project.runtime_memory_image)
{
let (warning_name, warning_description) = match &def.term {
Def::Load { .. } => (
"CWE125",
format!(
"(Out-of-bounds Read) Memory load at {} may be out of bounds",
def.tid.address,
),
),
Def::Store { .. } => (
"CWE787",
format!(
"(Out-of-bounds Write) Memory write at {} may be out of bounds",
def.tid.address,
),
),
Def::Assign { .. } => panic!(),
};
let warning = CweWarning {
name: warning_name.to_string(),
version: VERSION.to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: warning_description,
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
match &def.term {
Def::Store { address, value } => {
......@@ -118,23 +70,12 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
/// The resulting state is the state at the start of the call target function.
fn update_call(
&self,
state: &State,
_state: &State,
call_term: &Term<Jmp>,
_target_node: &crate::analysis::graph::Node,
_calling_convention: &Option<String>,
) -> Option<State> {
if let Jmp::Call {
target: ref callee_tid,
return_: _,
} = call_term.term
{
// Check call parameters for dangling pointers
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap();
self.check_parameter_register_for_dangling_pointer(
&mut state.clone(),
call_term,
callee_fn_sig.parameters.keys(),
);
if let Jmp::Call { .. } = call_term.term {
// No information flows from caller to the callee in the analysis.
None
} else if let Jmp::CallInd { .. } = call_term.term {
......@@ -287,20 +228,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
};
let mut new_state = state.clone();
if let Some(extern_symbol) = self.extern_symbol_map.get(call_target) {
// Generate a CWE-message if some argument is an out-of-bounds pointer.
self.check_parameter_register_for_out_of_bounds_pointer(state, call, extern_symbol);
// Check parameter for possible use-after-frees (except for possible double frees, which are handled later)
if !self
.deallocation_symbols
.iter()
.any(|free_like_fn| free_like_fn == extern_symbol.name.as_str())
{
self.check_parameter_register_for_dangling_pointer(
&mut new_state,
call,
extern_symbol.parameters.iter(),
);
}
// Clear non-callee-saved registers from the state.
let cconv = self.project.get_calling_convention(extern_symbol);
new_state.clear_non_callee_saved_register(&cconv.callee_saved_register[..]);
......@@ -310,15 +237,11 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
match extern_symbol.name.as_str() {
malloc_like_fn if self.allocation_symbols.iter().any(|x| x == malloc_like_fn) => {
Some(self.add_new_object_in_call_return_register(
state,
new_state,
call,
extern_symbol,
))
}
free_like_fn if self.deallocation_symbols.iter().any(|x| x == free_like_fn) => {
Some(self.mark_parameter_object_as_freed(state, new_state, call, extern_symbol))
}
_ => Some(self.handle_generic_extern_call(state, new_state, call, extern_symbol)),
}
} else {
......
......@@ -2,21 +2,19 @@
//!
//! The goal of the pointer inference analysis is to keep track of all memory objects and pointers
//! that the program knows about at specific program points during execution.
//! It is a combination of a points-to-analysis and a value-set-analysis.
//! The results of the pointer inference analysis are made available to other analyses,
//! which can use them to look up points-to and value set information.
//!
//! If the **Memory** check is enabled,
//! then the analysis also detects and reports possible memory management errors,
//! like "Use after free"-errors, to the user.
//! The result of the pointer inference analysis is also used as input for other analyses,
//! which allows them to keep track of memory objects and pointers.
//! then the analysis also reports some possible memory management errors,
//! like Null pointer dereferences, to the user.
//!
//! ## The Memory Check
//!
//! If the **Memory** check is enabled, the pointer inference detects instances of the following CWEs:
//! - [CWE-119](https://cwe.mitre.org/data/definitions/119.html) Buffer Overflow (generic case)
//! - [CWE-125](https://cwe.mitre.org/data/definitions/125.html) Buffer Overflow: Out-of-bounds Read
//! - [CWE-415](https://cwe.mitre.org/data/definitions/415.html): Double Free
//! - [CWE-416](https://cwe.mitre.org/data/definitions/416.html): Use After Free
//! - [CWE-476](https://cwe.mitre.org/data/definitions/476.html): NULL Pointer Dereference
//! - [CWE-787](https://cwe.mitre.org/data/definitions/787.html): Buffer Overflow: Out-of-bounds Write
//! If the **Memory** check is enabled, the pointer inference reports instances
//! of [CWE-476](https://cwe.mitre.org/data/definitions/476.html) (NULL Pointer Dereference)
//! that were detected during the analysis.
//!
//! The analysis operates on a best-effort basis.
//! In cases where we cannot know
......@@ -71,11 +69,6 @@ pub struct Config {
/// Names of extern functions that are `malloc`-like,
/// i.e. the unique return value is a pointer to a newly allocated chunk of memory or a NULL pointer.
pub allocation_symbols: Vec<String>,
/// Names of extern functions that are `free`-like,
/// i.e. the memory chunk that the unique parameter of the function points to gets deallocated.
/// Note that the analysis currently does not detect mismatching allocation-deallocation pairs,
/// i.e. it cannot distinguish between memory allocated by `malloc` and memory allocated by `new`.
pub deallocation_symbols: Vec<String>,
}
/// A wrapper struct for the pointer inference computation object.
......@@ -477,7 +470,6 @@ mod tests {
let analysis_results: &'a AnalysisResults = Box::leak(analysis_results);
let config = Config {
allocation_symbols: vec!["malloc".to_string()],
deallocation_symbols: vec!["free".to_string()],
};
let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(analysis_results, config, log_sender, false)
......
......@@ -37,20 +37,10 @@ struct Inner {
pointer_targets: BTreeSet<AbstractIdentifier>,
/// Tracks whether this may represent more than one actual memory object.
is_unique: bool,
/// Is the object alive or already destroyed
state: ObjectState,
/// Is the object a stack frame or a heap object
type_: Option<ObjectType>,
/// The actual content of the memory object
memory: MemRegion<Data>,
/// The smallest index still contained in the memory region.
/// A `Top` value represents an unknown bound.
/// The bound is not enforced, i.e. reading and writing to indices violating the bound is still allowed.
lower_index_bound: BitvectorDomain,
/// The largest index still contained in the memory region.
/// A `Top` value represents an unknown bound.
/// The bound is not enforced, i.e. reading and writing to indices violating the bound is still allowed.
upper_index_bound: BitvectorDomain,
}
/// An object is either a stack or a heap object.
......@@ -62,20 +52,6 @@ pub enum ObjectType {
Heap,
}
/// An object is either alive or dangling (because the memory was freed or a function return invalidated the stack frame).
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy, PartialOrd, Ord)]
pub enum ObjectState {
/// The object is alive.
Alive,
/// The object is dangling, i.e. the memory has been freed already.
Dangling,
/// The state of the object is unknown (due to merging different object states).
Unknown,
/// The object was referenced in an "use-after-free" or "double-free" CWE-warning.
/// This state is meant to be temporary to prevent obvious subsequent CWE-warnings with the same root cause.
Flagged,
}
#[allow(clippy::from_over_into)]
impl std::convert::Into<AbstractObject> for Inner {
fn into(self) -> AbstractObject {
......@@ -91,11 +67,8 @@ impl AbstractObject {
let inner = Inner {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: ObjectState::Alive,
type_,
memory: MemRegion::new(address_bytesize),
lower_index_bound: BitvectorDomain::Top(address_bytesize),
upper_index_bound: BitvectorDomain::Top(address_bytesize),
};
inner.into()
}
......@@ -112,86 +85,11 @@ impl AbstractObject {
inner.is_unique = false;
}
/// Set the lower index bound that is still considered to be contained in the abstract object.
pub fn set_lower_index_bound(&mut self, lower_bound: BitvectorDomain) {
let inner = Arc::make_mut(&mut self.inner);
inner.lower_index_bound = lower_bound;
}
/// Set the upper index bound that is still considered to be contained in the abstract object.
pub fn set_upper_index_bound(&mut self, upper_bound: BitvectorDomain) {
let inner = Arc::make_mut(&mut self.inner);
inner.upper_index_bound = upper_bound;
}
/// Add an offset to the upper index bound that is still considered to be contained in the abstract object.
pub fn add_to_upper_index_bound(&mut self, offset: i64) {
let inner = Arc::make_mut(&mut self.inner);
let offset =
Bitvector::from_i64(offset).into_resize_signed(inner.upper_index_bound.bytesize());
inner.upper_index_bound = inner.upper_index_bound.clone() + offset.into();
}
/// Get the state of the memory object.
pub fn get_state(&self) -> ObjectState {
self.inner.state
}
/// If `self.is_unique()==true`, set the state of the object. Else merge the new state with the old.
pub fn set_state(&mut self, new_state: ObjectState) {
let inner = Arc::make_mut(&mut self.inner);
if inner.is_unique {
inner.state = new_state;
} else {
inner.state = inner.state.merge(new_state);
}
}
/// Get the type of the memory object.
pub fn get_object_type(&self) -> Option<ObjectType> {
self.inner.type_
}
/// Mark the memory object as freed.
/// Returns an error if a possible double free is detected.
pub fn mark_as_freed(&mut self) -> Result<(), Error> {
let inner = Arc::make_mut(&mut self.inner);
match (inner.is_unique, inner.state) {
(true, ObjectState::Alive) | (true, ObjectState::Flagged) => {
inner.state = ObjectState::Dangling;
Ok(())
}
(false, ObjectState::Flagged) => {
inner.state = ObjectState::Unknown;
Ok(())
}
(true, _) | (false, ObjectState::Dangling) => {
inner.state = ObjectState::Flagged;
Err(anyhow!("Object may already have been freed"))
}
(false, _) => {
inner.state = ObjectState::Unknown;
Ok(())
}
}
}
/// Mark the memory object as possibly (but not definitely) freed.
/// Returns an error if the object was definitely freed before.
pub fn mark_as_maybe_freed(&mut self) -> Result<(), Error> {
let inner = Arc::make_mut(&mut self.inner);
match inner.state {
ObjectState::Dangling => {
inner.state = ObjectState::Flagged;
Err(anyhow!("Object may already have been freed"))
}
_ => {
inner.state = ObjectState::Unknown;
Ok(())
}
}
}
/// Overwrite the values in `self` with those in `other`
/// under the assumption that the zero offset in `other` corresponds to the offset `offset_other` in `self`.
///
......@@ -204,8 +102,6 @@ impl AbstractObject {
/// This approximates the fact that we currently do not track exactly which indices
/// in `other` were overwritten with a `Top` element and which indices simply were not
/// accessed at all in `other`.
///
/// The upper and lower index bounds of `self` are kept and not overwritten.
pub fn overwrite_with(&mut self, other: &AbstractObject, offset_other: &ValueDomain) {
if let Ok(obj_offset) = offset_other.try_to_offset() {
if self.inner.is_unique {
......@@ -219,11 +115,9 @@ impl AbstractObject {
}
// Merge all other properties with those of other.
inner.is_unique &= other.inner.is_unique;
inner.state = inner.state.merge(other.inner.state);
inner
.pointer_targets
.append(&mut other.inner.pointer_targets.clone());
// TODO: We should log cases where the index bounds are violated by `other`.
} else {
let inner = Arc::make_mut(&mut self.inner);
let mut other = other.clone();
......@@ -232,17 +126,14 @@ impl AbstractObject {
inner.memory = inner.memory.merge(&other_inner.memory);
inner.is_unique &= other.inner.is_unique;
inner.state = inner.state.merge(other.inner.state);
inner
.pointer_targets
.append(&mut other.inner.pointer_targets.clone());
// TODO: We should log cases where the index bounds are violated by `other`.
}
} else {
let inner = Arc::make_mut(&mut self.inner);
inner.memory.mark_all_values_as_top();
inner.is_unique &= other.inner.is_unique;
inner.state = inner.state.merge(other.inner.state);
inner
.pointer_targets
.append(&mut other.inner.pointer_targets.clone());
......@@ -250,19 +141,12 @@ impl AbstractObject {
}
/// Add an offset to all values contained in the abstract object.
/// The offset is also added to the lower and upper index bounds.
pub fn add_offset_to_all_indices(&mut self, offset: &ValueDomain) {
let inner = Arc::make_mut(&mut self.inner);
if let Ok(offset) = offset.try_to_offset() {
inner.memory.add_offset_to_all_indices(offset);
let offset =
Bitvector::from_i64(offset).into_resize_signed(inner.lower_index_bound.bytesize());
inner.lower_index_bound = inner.lower_index_bound.clone() + offset.clone().into();
inner.upper_index_bound = inner.upper_index_bound.clone() + offset.into();
} else {
inner.memory = MemRegion::new(inner.memory.get_address_bytesize());
inner.lower_index_bound = inner.lower_index_bound.top();
inner.upper_index_bound = inner.upper_index_bound.top();
}
}
}
......@@ -281,17 +165,8 @@ impl AbstractDomain for AbstractObject {
.cloned()
.collect(),
is_unique: self.inner.is_unique && other.inner.is_unique,
state: self.inner.state.merge(other.inner.state),
type_: same_or_none(&self.inner.type_, &other.inner.type_),
memory: self.inner.memory.merge(&other.inner.memory),
lower_index_bound: self
.inner
.lower_index_bound
.merge(&other.inner.lower_index_bound),
upper_index_bound: self
.inner
.upper_index_bound
.merge(&other.inner.upper_index_bound),
}
.into()
}
......@@ -313,21 +188,9 @@ impl AbstractObject {
serde_json::Value::String(format!("{}", self.inner.is_unique)),
),
(
"state".to_string(),
serde_json::Value::String(format!("{:?}", self.inner.state)),
),
(
"type".to_string(),
serde_json::Value::String(format!("{:?}", self.inner.type_)),
),
(
"lower_index_bound".to_string(),
serde_json::Value::String(format!("{}", self.inner.lower_index_bound)),
),
(
"upper_index_bound".to_string(),
serde_json::Value::String(format!("{}", self.inner.upper_index_bound)),
),
];
let memory = self
.inner
......@@ -351,20 +214,5 @@ fn same_or_none<T: Eq + Clone>(left: &Option<T>, right: &Option<T>) -> Option<T>
}
}
impl ObjectState {
/// Merge two object states.
/// If one of the two states is `Flagged`, then the resulting state is the other object state.
pub fn merge(self, other: Self) -> Self {
use ObjectState::*;
match (self, other) {
(Flagged, state) | (state, Flagged) => state,
(Unknown, _) | (_, Unknown) => Unknown,
(Alive, Alive) => Alive,
(Dangling, Dangling) => Dangling,
(Alive, Dangling) | (Dangling, Alive) => Unknown,
}
}
}
#[cfg(test)]
mod tests;
......@@ -6,11 +6,8 @@ fn new_abstract_object() -> AbstractObject {
let inner = Inner {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: ObjectState::Alive,
type_: Some(ObjectType::Heap),
memory: MemRegion::new(ByteSize::new(8)),
lower_index_bound: Bitvector::from_u64(0).into(),
upper_index_bound: Bitvector::from_u64(99).into(),
};
inner.into()
}
......@@ -99,15 +96,6 @@ fn remove_ids() {
}
#[test]
fn access_contained_in_bounds() {
let object = new_abstract_object();
assert!(object.access_contained_in_bounds(&IntervalDomain::mock(0, 99), ByteSize::new(1)));
assert!(!object.access_contained_in_bounds(&IntervalDomain::mock(-1, -1), ByteSize::new(8)));
assert!(object.access_contained_in_bounds(&IntervalDomain::mock(92, 92), ByteSize::new(8)));
assert!(!object.access_contained_in_bounds(&IntervalDomain::mock(93, 93), ByteSize::new(8)));
}
#[test]
fn overwrite_with() {
let mut object = new_abstract_object();
object.set_value(bv(1).into(), &bv(0).into()).unwrap();
......
use super::*;
impl AbstractObject {
/// Check whether a memory access to the abstract object at the given offset
/// and with the given size of the accessed value is contained in the bounds of the memory object.
/// If `offset` contains more than one possible index value,
/// then only return `true` if the access is contained in the abstract object for all possible offset values.
///
/// If `offset` is a `Top` value, then the function assumes this to be due to analysis inaccuracies
/// and does not flag them as possible out-of-bounds access.
pub fn access_contained_in_bounds(&self, offset: &ValueDomain, size: ByteSize) -> bool {
if offset.is_top() {
// Currently TOP offsets happen a lot due to inaccuracies in the analysis.
// So for the time being we do not flag them as possible CWEs.
return true;
}
if let Ok(offset_interval) = offset.try_to_interval() {
if let Ok(lower_bound) = self.inner.lower_index_bound.try_to_bitvec() {
if lower_bound.checked_sgt(&offset_interval.start).unwrap() {
return false;
}
}
if let Ok(upper_bound) = self.inner.upper_index_bound.try_to_bitvec() {
let mut size_as_bitvec = Bitvector::from_u64(u64::from(size));
match offset.bytesize().cmp(&size_as_bitvec.bytesize()) {
std::cmp::Ordering::Less => size_as_bitvec.truncate(offset.bytesize()).unwrap(),
std::cmp::Ordering::Greater => {
size_as_bitvec.sign_extend(offset.bytesize()).unwrap()
}
std::cmp::Ordering::Equal => (),
}
let max_index = if let Some(val) = offset_interval
.end
.signed_add_overflow_checked(&size_as_bitvec)
{
val - &Bitvector::one(offset.bytesize().into())
} else {
return false; // The max index already causes an integer overflow
};
if upper_bound.checked_slt(&max_index).unwrap() {
return false;
}
}
true
} else {
false
}
}
/// Read the value at the given offset of the given size inside the memory region.
pub fn get_value(&self, offset: Bitvector, bytesize: ByteSize) -> Data {
self.inner.memory.get(offset, bytesize)
......
//! Methods of [`AbstractObjectList`] that manage memory access rules
//! or check whether they are violated.
//! E.g. checks for use-after-free or buffer overflow checks.
use crate::intermediate_representation::RuntimeMemoryImage;
use super::*;
impl AbstractObjectList {
/// Check the state of a memory object at a given address.
/// Returns `true` if at least one of the targets of the pointer is dangling.
/// If `report_unknown_states` is `true`,
/// then objects with unknown states get reported if they are unique.
/// I.e. objects representing more than one actual object (e.g. an array of object) will not get reported,
/// even if their state is unknown and `report_unknown_states` is `true`.
pub fn is_dangling_pointer(&self, address: &Data, report_unknown_states: bool) -> bool {
for id in address.referenced_ids() {
if let Some(object) = self.objects.get(id) {
match (report_unknown_states, object.get_state()) {
(_, ObjectState::Dangling) => return true,
(true, ObjectState::Unknown) => {
if object.is_unique() {
return true;
}
}
_ => (),
}
}
}
// No dangling pointer found
false
}
/// Mark all memory objects targeted by the given `address` pointer,
/// whose state is either dangling or unknown,
/// as flagged.
pub fn mark_dangling_pointer_targets_as_flagged(&mut self, address: &Data) {
for id in address.referenced_ids() {
let object = self.objects.get_mut(id).unwrap();
if matches!(
object.get_state(),
ObjectState::Unknown | ObjectState::Dangling
) {
object.set_state(ObjectState::Flagged);
}
}
}
/// Check whether a memory access at the given address (and accessing `size` many bytes)
/// may be an out-of-bounds memory access.
///
/// Note that `Top` values as addresses are not marked as out-of-bounds,
/// since they are more likely due to analysis imprecision than to actual out-of-bounds access.
pub fn is_out_of_bounds_mem_access(
&self,
address: &Data,
size: ByteSize,
global_data: &RuntimeMemoryImage,
) -> bool {
if let Some(value) = address.get_absolute_value() {
if let Ok((start, end)) = value.try_to_offset_interval() {
if start < 0 || end < start {
return true;
}
if global_data
.is_interval_readable(start as u64, end as u64 + u64::from(size) - 1)
.is_err()
{
return true;
}
}
}
for (id, offset) in address.get_relative_values() {
if let Some(object) = self.objects.get(id) {
if !object.access_contained_in_bounds(offset, size) {
return true;
}
}
}
false
}
/// Set the lower index bound for indices to be considered inside the memory object.
/// The bound is inclusive, i.e. the bound index itself is also considered to be inside the memory object.
///
/// Any `bound` value other than a constant bitvector is interpreted as the memory object not having a lower bound.
pub fn set_lower_index_bound(&mut self, object_id: &AbstractIdentifier, bound: &ValueDomain) {
let object = self.objects.get_mut(object_id).unwrap();
let bound = bound
.clone()
.try_to_bitvec()
.map(|bitvec| bitvec.into())
.unwrap_or_else(|_| BitvectorDomain::new_top(bound.bytesize()));
object.set_lower_index_bound(bound);
}
/// Set the upper index bound for indices to be considered inside the memory object.
/// The bound is inclusive, i.e. the bound index itself is also considered to be inside the memory object.
///
/// Any `bound` value other than a constant bitvector is interpreted as the memory object not having an upper bound.
pub fn set_upper_index_bound(&mut self, object_id: &AbstractIdentifier, bound: &ValueDomain) {
let object = self.objects.get_mut(object_id).unwrap();
let bound = bound
.clone()
.try_to_bitvec()
.map(|bitvec| bitvec.into())
.unwrap_or_else(|_| BitvectorDomain::new_top(bound.bytesize()));
object.set_upper_index_bound(bound);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
///
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
/// Returns either a non-empty list of detected errors (like possible double frees) or `OK(())` if no errors were found.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &Data,
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
let ids: Vec<AbstractIdentifier> = object_pointer.referenced_ids().cloned().collect();
let mut possible_double_free_ids = Vec::new();
if ids.len() > 1
|| object_pointer.contains_top()
|| object_pointer.get_absolute_value().is_some()
{
for id in ids {
if let Err(error) = self.objects.get_mut(&id).unwrap().mark_as_maybe_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
} else if let Some(id) = ids.get(0) {
if let Err(error) = self.objects.get_mut(id).unwrap().mark_as_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
if possible_double_free_ids.is_empty() {
Ok(())
} else {
Err(possible_double_free_ids)
}
}
}
use super::object::*;
use super::{Data, ValueDomain};
use super::Data;
use crate::abstract_domain::*;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
mod cwe_helpers;
mod id_manipulation;
mod list_manipulation;
......@@ -30,8 +29,7 @@ impl AbstractObjectList {
address_bytesize: ByteSize,
) -> AbstractObjectList {
let mut objects = BTreeMap::new();
let mut stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize);
stack_object.set_upper_index_bound(Bitvector::zero(address_bytesize.into()).into());
let stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize);
objects.insert(stack_id, stack_object);
AbstractObjectList { objects }
}
......
use crate::intermediate_representation::Variable;
use super::super::ValueDomain;
use super::*;
fn bv(value: i64) -> ValueDomain {
......@@ -100,17 +101,4 @@ fn abstract_object_list() {
other_obj_list.objects.iter().next().unwrap().0,
&new_id("RAX".into())
);
assert_eq!(
other_obj_list.objects.values().next().unwrap().get_state(),
crate::analysis::pointer_inference::object::ObjectState::Alive
);
let modified_heap_pointer = DataDomain::from_target(new_id("RAX".into()), bv(8));
other_obj_list
.mark_mem_object_as_freed(&modified_heap_pointer)
.unwrap();
assert_eq!(
other_obj_list.objects.values().next().unwrap().get_state(),
crate::analysis::pointer_inference::object::ObjectState::Dangling
);
}
......@@ -180,54 +180,6 @@ impl State {
}
}
/// Check if an expression contains a use-after-free.
/// If yes, mark the corresponding memory objects as flagged.
pub fn contains_access_of_dangling_memory(&mut self, def: &Def) -> bool {
match def {
Def::Load { address, .. } | Def::Store { address, .. } => {
let address_value = self.eval(address);
if self.memory.is_dangling_pointer(&address_value, true) {
self.memory
.mark_dangling_pointer_targets_as_flagged(&address_value);
true
} else {
false
}
}
_ => false,
}
}
/// Returns `true` if the given `Def` is a load or store instruction
/// which may access a memory object outside its bounds.
pub fn contains_out_of_bounds_mem_access(
&self,
def: &Def,
global_data: &RuntimeMemoryImage,
) -> bool {
let (address, size) = match def {
Def::Load { address, var } => (self.eval(address), var.size),
Def::Store { address, value } => (self.eval(address), value.bytesize()),
_ => return false,
};
self.memory
.is_out_of_bounds_mem_access(&address, size, global_data)
}
/// Returns `true` if `data` is a pointer pointing outside of the bounds of a memory buffer.
/// Does not check whether `data` may represent an out-of-bounds access to global memory,
/// since this function assumes that all absolute values are not pointers.
pub fn pointer_contains_out_of_bounds_target(
&self,
data: &Data,
global_data: &RuntimeMemoryImage,
) -> bool {
let mut data = data.clone();
data.set_absolute_value(None); // Do not check absolute_values
self.memory
.is_out_of_bounds_mem_access(&data, ByteSize::new(1), global_data)
}
/// Check whether the given `def` could result in a memory access through a NULL pointer.
///
/// If no NULL pointer dereference is detected then `Ok(false)` is returned.
......
......@@ -58,17 +58,6 @@ impl State {
) -> State {
let mock_global_memory = RuntimeMemoryImage::empty(true);
let mut state = State::new(stack_register, function_tid.clone());
// Adjust the upper bound of the stack frame to include all stack parameters
// (and the return address at stack offset 0 for x86).
let stack_upper_bound: i64 = match stack_register.name.as_str() {
"ESP" => 4,
"RSP" => 8,
_ => 0,
};
let stack_upper_bound =
std::cmp::max(stack_upper_bound, fn_sig.get_stack_params_total_size());
let stack_obj = state.memory.get_object_mut(&state.stack_id).unwrap();
stack_obj.add_to_upper_index_bound(stack_upper_bound);
// Set parameter values and create parameter memory objects.
for (arg, access_pattern) in &fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(&function_tid, arg);
......@@ -189,18 +178,6 @@ impl State {
self.memory.remove_unused_objects(&referenced_ids);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
/// If this may cause double frees (i.e. the object in question may have been freed already),
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &Data,
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all knowledge about the contents of non-callee-saved registers from the state.
pub fn remove_non_callee_saved_register(&mut self, cconv: &CallingConvention) {
let mut callee_saved_register = BTreeMap::new();
......
......@@ -834,52 +834,6 @@ fn specialize_by_unsigned_comparison_op() {
}
#[test]
fn out_of_bounds_access_recognition() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
let global_data = RuntimeMemoryImage::mock();
let heap_obj_id = new_id("heap_malloc", "RAX");
state.memory.add_abstract_object(
heap_obj_id.clone(),
ByteSize::new(8),
Some(ObjectType::Heap),
);
state
.memory
.set_lower_index_bound(&heap_obj_id, &Bitvector::from_u64(0).into());
state
.memory
.set_upper_index_bound(&heap_obj_id, &Bitvector::from_u64(7).into());
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_i64(-1).into());
assert!(state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(0).into());
assert!(!state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(7).into());
assert!(!state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(8).into());
assert!(state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(0).into());
state.set_register(&Variable::mock("RAX", 8), address);
let load_def = Def::load(
"tid",
Variable::mock("RBX", 8),
Expression::Var(Variable::mock("RAX", 8)),
);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(0).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(1).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(state.stack_id.clone(), Bitvector::from_i64(-8).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
}
#[test]
fn specialize_pointer_comparison() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
let interval = IntervalDomain::mock(-5, 10);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment