Unverified Commit 2f0c1e49 by Enkelmann Committed by GitHub

Pointer inference cleanup (#328)

parent 3cc6cf5b
......@@ -228,9 +228,6 @@
"calloc",
"realloc",
"xmalloc"
],
"deallocation_symbols": [
"free"
]
},
"StringAbstraction": {
......
use super::super::ValueDomain;
use super::*;
fn bv(value: i64) -> ValueDomain {
......@@ -56,7 +57,6 @@ fn mock_project() -> (Project, Config) {
project,
Config {
allocation_symbols: vec!["malloc".into()],
deallocation_symbols: vec!["free".into()],
},
)
}
......
......@@ -16,22 +16,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
/// Update the state according to the effects of the given `Def` term.
fn update_def(&self, state: &Self::Value, def: &Term<Def>) -> Option<Self::Value> {
let mut new_state = state.clone();
// first check for use-after-frees
if new_state.contains_access_of_dangling_memory(&def.term) {
let warning = CweWarning {
name: "CWE416".to_string(),
version: VERSION.to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: format!(
"(Use After Free) Access through a dangling pointer at {}",
def.tid.address
),
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
// check for null dereferences
match new_state.check_def_for_null_dereferences(def) {
Err(_) => {
......@@ -41,38 +25,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
Ok(true) => self.report_null_deref(&def.tid),
Ok(false) => (), // no null dereference detected
}
// check for out-of-bounds memory access
if new_state
.contains_out_of_bounds_mem_access(&def.term, &self.project.runtime_memory_image)
{
let (warning_name, warning_description) = match &def.term {
Def::Load { .. } => (
"CWE125",
format!(
"(Out-of-bounds Read) Memory load at {} may be out of bounds",
def.tid.address,
),
),
Def::Store { .. } => (
"CWE787",
format!(
"(Out-of-bounds Write) Memory write at {} may be out of bounds",
def.tid.address,
),
),
Def::Assign { .. } => panic!(),
};
let warning = CweWarning {
name: warning_name.to_string(),
version: VERSION.to_string(),
addresses: vec![def.tid.address.clone()],
tids: vec![format!("{}", def.tid)],
symbols: Vec::new(),
other: Vec::new(),
description: warning_description,
};
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
match &def.term {
Def::Store { address, value } => {
......@@ -118,23 +70,12 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
/// The resulting state is the state at the start of the call target function.
fn update_call(
&self,
state: &State,
_state: &State,
call_term: &Term<Jmp>,
_target_node: &crate::analysis::graph::Node,
_calling_convention: &Option<String>,
) -> Option<State> {
if let Jmp::Call {
target: ref callee_tid,
return_: _,
} = call_term.term
{
// Check call parameters for dangling pointers
let callee_fn_sig = self.fn_signatures.get(callee_tid).unwrap();
self.check_parameter_register_for_dangling_pointer(
&mut state.clone(),
call_term,
callee_fn_sig.parameters.keys(),
);
if let Jmp::Call { .. } = call_term.term {
// No information flows from caller to the callee in the analysis.
None
} else if let Jmp::CallInd { .. } = call_term.term {
......@@ -287,20 +228,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
};
let mut new_state = state.clone();
if let Some(extern_symbol) = self.extern_symbol_map.get(call_target) {
// Generate a CWE-message if some argument is an out-of-bounds pointer.
self.check_parameter_register_for_out_of_bounds_pointer(state, call, extern_symbol);
// Check parameter for possible use-after-frees (except for possible double frees, which are handled later)
if !self
.deallocation_symbols
.iter()
.any(|free_like_fn| free_like_fn == extern_symbol.name.as_str())
{
self.check_parameter_register_for_dangling_pointer(
&mut new_state,
call,
extern_symbol.parameters.iter(),
);
}
// Clear non-callee-saved registers from the state.
let cconv = self.project.get_calling_convention(extern_symbol);
new_state.clear_non_callee_saved_register(&cconv.callee_saved_register[..]);
......@@ -310,15 +237,11 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
match extern_symbol.name.as_str() {
malloc_like_fn if self.allocation_symbols.iter().any(|x| x == malloc_like_fn) => {
Some(self.add_new_object_in_call_return_register(
state,
new_state,
call,
extern_symbol,
))
}
free_like_fn if self.deallocation_symbols.iter().any(|x| x == free_like_fn) => {
Some(self.mark_parameter_object_as_freed(state, new_state, call, extern_symbol))
}
_ => Some(self.handle_generic_extern_call(state, new_state, call, extern_symbol)),
}
} else {
......
......@@ -2,21 +2,19 @@
//!
//! The goal of the pointer inference analysis is to keep track of all memory objects and pointers
//! that the program knows about at specific program points during execution.
//! It is a combination of a points-to-analysis and a value-set-analysis.
//! The results of the pointer inference analysis are made available to other analyses,
//! which can use them to look up points-to and value set information.
//!
//! If the **Memory** check is enabled,
//! then the analysis also detects and reports possible memory management errors,
//! like "Use after free"-errors, to the user.
//! The result of the pointer inference analysis is also used as input for other analyses,
//! which allows them to keep track of memory objects and pointers.
//! then the analysis also reports some possible memory management errors,
//! like Null pointer dereferences, to the user.
//!
//! ## The Memory Check
//!
//! If the **Memory** check is enabled, the pointer inference detects instances of the following CWEs:
//! - [CWE-119](https://cwe.mitre.org/data/definitions/119.html) Buffer Overflow (generic case)
//! - [CWE-125](https://cwe.mitre.org/data/definitions/125.html) Buffer Overflow: Out-of-bounds Read
//! - [CWE-415](https://cwe.mitre.org/data/definitions/415.html): Double Free
//! - [CWE-416](https://cwe.mitre.org/data/definitions/416.html): Use After Free
//! - [CWE-476](https://cwe.mitre.org/data/definitions/476.html): NULL Pointer Dereference
//! - [CWE-787](https://cwe.mitre.org/data/definitions/787.html): Buffer Overflow: Out-of-bounds Write
//! If the **Memory** check is enabled, the pointer inference reports instances
//! of [CWE-476](https://cwe.mitre.org/data/definitions/476.html) (NULL Pointer Dereference)
//! that were detected during the analysis.
//!
//! The analysis operates on a best-effort basis.
//! In cases where we cannot know
......@@ -71,11 +69,6 @@ pub struct Config {
/// Names of extern functions that are `malloc`-like,
/// i.e. the unique return value is a pointer to a newly allocated chunk of memory or a NULL pointer.
pub allocation_symbols: Vec<String>,
/// Names of extern functions that are `free`-like,
/// i.e. the memory chunk that the unique parameter of the function points to gets deallocated.
/// Note that the analysis currently does not detect mismatching allocation-deallocation pairs,
/// i.e. it cannot distinguish between memory allocated by `malloc` and memory allocated by `new`.
pub deallocation_symbols: Vec<String>,
}
/// A wrapper struct for the pointer inference computation object.
......@@ -477,7 +470,6 @@ mod tests {
let analysis_results: &'a AnalysisResults = Box::leak(analysis_results);
let config = Config {
allocation_symbols: vec!["malloc".to_string()],
deallocation_symbols: vec!["free".to_string()],
};
let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(analysis_results, config, log_sender, false)
......
......@@ -6,11 +6,8 @@ fn new_abstract_object() -> AbstractObject {
let inner = Inner {
pointer_targets: BTreeSet::new(),
is_unique: true,
state: ObjectState::Alive,
type_: Some(ObjectType::Heap),
memory: MemRegion::new(ByteSize::new(8)),
lower_index_bound: Bitvector::from_u64(0).into(),
upper_index_bound: Bitvector::from_u64(99).into(),
};
inner.into()
}
......@@ -99,15 +96,6 @@ fn remove_ids() {
}
#[test]
fn access_contained_in_bounds() {
let object = new_abstract_object();
assert!(object.access_contained_in_bounds(&IntervalDomain::mock(0, 99), ByteSize::new(1)));
assert!(!object.access_contained_in_bounds(&IntervalDomain::mock(-1, -1), ByteSize::new(8)));
assert!(object.access_contained_in_bounds(&IntervalDomain::mock(92, 92), ByteSize::new(8)));
assert!(!object.access_contained_in_bounds(&IntervalDomain::mock(93, 93), ByteSize::new(8)));
}
#[test]
fn overwrite_with() {
let mut object = new_abstract_object();
object.set_value(bv(1).into(), &bv(0).into()).unwrap();
......
use super::*;
impl AbstractObject {
/// Check whether a memory access to the abstract object at the given offset
/// and with the given size of the accessed value is contained in the bounds of the memory object.
/// If `offset` contains more than one possible index value,
/// then only return `true` if the access is contained in the abstract object for all possible offset values.
///
/// If `offset` is a `Top` value, then the function assumes this to be due to analysis inaccuracies
/// and does not flag them as possible out-of-bounds access.
pub fn access_contained_in_bounds(&self, offset: &ValueDomain, size: ByteSize) -> bool {
if offset.is_top() {
// Currently TOP offsets happen a lot due to inaccuracies in the analysis.
// So for the time being we do not flag them as possible CWEs.
return true;
}
if let Ok(offset_interval) = offset.try_to_interval() {
if let Ok(lower_bound) = self.inner.lower_index_bound.try_to_bitvec() {
if lower_bound.checked_sgt(&offset_interval.start).unwrap() {
return false;
}
}
if let Ok(upper_bound) = self.inner.upper_index_bound.try_to_bitvec() {
let mut size_as_bitvec = Bitvector::from_u64(u64::from(size));
match offset.bytesize().cmp(&size_as_bitvec.bytesize()) {
std::cmp::Ordering::Less => size_as_bitvec.truncate(offset.bytesize()).unwrap(),
std::cmp::Ordering::Greater => {
size_as_bitvec.sign_extend(offset.bytesize()).unwrap()
}
std::cmp::Ordering::Equal => (),
}
let max_index = if let Some(val) = offset_interval
.end
.signed_add_overflow_checked(&size_as_bitvec)
{
val - &Bitvector::one(offset.bytesize().into())
} else {
return false; // The max index already causes an integer overflow
};
if upper_bound.checked_slt(&max_index).unwrap() {
return false;
}
}
true
} else {
false
}
}
/// Read the value at the given offset of the given size inside the memory region.
pub fn get_value(&self, offset: Bitvector, bytesize: ByteSize) -> Data {
self.inner.memory.get(offset, bytesize)
......
//! Methods of [`AbstractObjectList`] that manage memory access rules
//! or check whether they are violated.
//! E.g. checks for use-after-free or buffer overflow checks.
use crate::intermediate_representation::RuntimeMemoryImage;
use super::*;
impl AbstractObjectList {
/// Check the state of a memory object at a given address.
/// Returns `true` if at least one of the targets of the pointer is dangling.
/// If `report_unknown_states` is `true`,
/// then objects with unknown states get reported if they are unique.
/// I.e. objects representing more than one actual object (e.g. an array of object) will not get reported,
/// even if their state is unknown and `report_unknown_states` is `true`.
pub fn is_dangling_pointer(&self, address: &Data, report_unknown_states: bool) -> bool {
for id in address.referenced_ids() {
if let Some(object) = self.objects.get(id) {
match (report_unknown_states, object.get_state()) {
(_, ObjectState::Dangling) => return true,
(true, ObjectState::Unknown) => {
if object.is_unique() {
return true;
}
}
_ => (),
}
}
}
// No dangling pointer found
false
}
/// Mark all memory objects targeted by the given `address` pointer,
/// whose state is either dangling or unknown,
/// as flagged.
pub fn mark_dangling_pointer_targets_as_flagged(&mut self, address: &Data) {
for id in address.referenced_ids() {
let object = self.objects.get_mut(id).unwrap();
if matches!(
object.get_state(),
ObjectState::Unknown | ObjectState::Dangling
) {
object.set_state(ObjectState::Flagged);
}
}
}
/// Check whether a memory access at the given address (and accessing `size` many bytes)
/// may be an out-of-bounds memory access.
///
/// Note that `Top` values as addresses are not marked as out-of-bounds,
/// since they are more likely due to analysis imprecision than to actual out-of-bounds access.
pub fn is_out_of_bounds_mem_access(
&self,
address: &Data,
size: ByteSize,
global_data: &RuntimeMemoryImage,
) -> bool {
if let Some(value) = address.get_absolute_value() {
if let Ok((start, end)) = value.try_to_offset_interval() {
if start < 0 || end < start {
return true;
}
if global_data
.is_interval_readable(start as u64, end as u64 + u64::from(size) - 1)
.is_err()
{
return true;
}
}
}
for (id, offset) in address.get_relative_values() {
if let Some(object) = self.objects.get(id) {
if !object.access_contained_in_bounds(offset, size) {
return true;
}
}
}
false
}
/// Set the lower index bound for indices to be considered inside the memory object.
/// The bound is inclusive, i.e. the bound index itself is also considered to be inside the memory object.
///
/// Any `bound` value other than a constant bitvector is interpreted as the memory object not having a lower bound.
pub fn set_lower_index_bound(&mut self, object_id: &AbstractIdentifier, bound: &ValueDomain) {
let object = self.objects.get_mut(object_id).unwrap();
let bound = bound
.clone()
.try_to_bitvec()
.map(|bitvec| bitvec.into())
.unwrap_or_else(|_| BitvectorDomain::new_top(bound.bytesize()));
object.set_lower_index_bound(bound);
}
/// Set the upper index bound for indices to be considered inside the memory object.
/// The bound is inclusive, i.e. the bound index itself is also considered to be inside the memory object.
///
/// Any `bound` value other than a constant bitvector is interpreted as the memory object not having an upper bound.
pub fn set_upper_index_bound(&mut self, object_id: &AbstractIdentifier, bound: &ValueDomain) {
let object = self.objects.get_mut(object_id).unwrap();
let bound = bound
.clone()
.try_to_bitvec()
.map(|bitvec| bitvec.into())
.unwrap_or_else(|_| BitvectorDomain::new_top(bound.bytesize()));
object.set_upper_index_bound(bound);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
///
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
/// Returns either a non-empty list of detected errors (like possible double frees) or `OK(())` if no errors were found.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &Data,
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
let ids: Vec<AbstractIdentifier> = object_pointer.referenced_ids().cloned().collect();
let mut possible_double_free_ids = Vec::new();
if ids.len() > 1
|| object_pointer.contains_top()
|| object_pointer.get_absolute_value().is_some()
{
for id in ids {
if let Err(error) = self.objects.get_mut(&id).unwrap().mark_as_maybe_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
} else if let Some(id) = ids.get(0) {
if let Err(error) = self.objects.get_mut(id).unwrap().mark_as_freed() {
possible_double_free_ids.push((id.clone(), error));
}
}
if possible_double_free_ids.is_empty() {
Ok(())
} else {
Err(possible_double_free_ids)
}
}
}
use super::object::*;
use super::{Data, ValueDomain};
use super::Data;
use crate::abstract_domain::*;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
mod cwe_helpers;
mod id_manipulation;
mod list_manipulation;
......@@ -30,8 +29,7 @@ impl AbstractObjectList {
address_bytesize: ByteSize,
) -> AbstractObjectList {
let mut objects = BTreeMap::new();
let mut stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize);
stack_object.set_upper_index_bound(Bitvector::zero(address_bytesize.into()).into());
let stack_object = AbstractObject::new(Some(ObjectType::Stack), address_bytesize);
objects.insert(stack_id, stack_object);
AbstractObjectList { objects }
}
......
use crate::intermediate_representation::Variable;
use super::super::ValueDomain;
use super::*;
fn bv(value: i64) -> ValueDomain {
......@@ -100,17 +101,4 @@ fn abstract_object_list() {
other_obj_list.objects.iter().next().unwrap().0,
&new_id("RAX".into())
);
assert_eq!(
other_obj_list.objects.values().next().unwrap().get_state(),
crate::analysis::pointer_inference::object::ObjectState::Alive
);
let modified_heap_pointer = DataDomain::from_target(new_id("RAX".into()), bv(8));
other_obj_list
.mark_mem_object_as_freed(&modified_heap_pointer)
.unwrap();
assert_eq!(
other_obj_list.objects.values().next().unwrap().get_state(),
crate::analysis::pointer_inference::object::ObjectState::Dangling
);
}
......@@ -180,54 +180,6 @@ impl State {
}
}
/// Check if an expression contains a use-after-free.
/// If yes, mark the corresponding memory objects as flagged.
pub fn contains_access_of_dangling_memory(&mut self, def: &Def) -> bool {
match def {
Def::Load { address, .. } | Def::Store { address, .. } => {
let address_value = self.eval(address);
if self.memory.is_dangling_pointer(&address_value, true) {
self.memory
.mark_dangling_pointer_targets_as_flagged(&address_value);
true
} else {
false
}
}
_ => false,
}
}
/// Returns `true` if the given `Def` is a load or store instruction
/// which may access a memory object outside its bounds.
pub fn contains_out_of_bounds_mem_access(
&self,
def: &Def,
global_data: &RuntimeMemoryImage,
) -> bool {
let (address, size) = match def {
Def::Load { address, var } => (self.eval(address), var.size),
Def::Store { address, value } => (self.eval(address), value.bytesize()),
_ => return false,
};
self.memory
.is_out_of_bounds_mem_access(&address, size, global_data)
}
/// Returns `true` if `data` is a pointer pointing outside of the bounds of a memory buffer.
/// Does not check whether `data` may represent an out-of-bounds access to global memory,
/// since this function assumes that all absolute values are not pointers.
pub fn pointer_contains_out_of_bounds_target(
&self,
data: &Data,
global_data: &RuntimeMemoryImage,
) -> bool {
let mut data = data.clone();
data.set_absolute_value(None); // Do not check absolute_values
self.memory
.is_out_of_bounds_mem_access(&data, ByteSize::new(1), global_data)
}
/// Check whether the given `def` could result in a memory access through a NULL pointer.
///
/// If no NULL pointer dereference is detected then `Ok(false)` is returned.
......
......@@ -58,17 +58,6 @@ impl State {
) -> State {
let mock_global_memory = RuntimeMemoryImage::empty(true);
let mut state = State::new(stack_register, function_tid.clone());
// Adjust the upper bound of the stack frame to include all stack parameters
// (and the return address at stack offset 0 for x86).
let stack_upper_bound: i64 = match stack_register.name.as_str() {
"ESP" => 4,
"RSP" => 8,
_ => 0,
};
let stack_upper_bound =
std::cmp::max(stack_upper_bound, fn_sig.get_stack_params_total_size());
let stack_obj = state.memory.get_object_mut(&state.stack_id).unwrap();
stack_obj.add_to_upper_index_bound(stack_upper_bound);
// Set parameter values and create parameter memory objects.
for (arg, access_pattern) in &fn_sig.parameters {
let param_id = AbstractIdentifier::from_arg(&function_tid, arg);
......@@ -189,18 +178,6 @@ impl State {
self.memory.remove_unused_objects(&referenced_ids);
}
/// Mark a memory object as already freed (i.e. pointers to it are dangling).
/// If the object cannot be identified uniquely, all possible targets are marked as having an unknown status.
///
/// If this may cause double frees (i.e. the object in question may have been freed already),
/// an error with the list of possibly already freed objects is returned.
pub fn mark_mem_object_as_freed(
&mut self,
object_pointer: &Data,
) -> Result<(), Vec<(AbstractIdentifier, Error)>> {
self.memory.mark_mem_object_as_freed(object_pointer)
}
/// Remove all knowledge about the contents of non-callee-saved registers from the state.
pub fn remove_non_callee_saved_register(&mut self, cconv: &CallingConvention) {
let mut callee_saved_register = BTreeMap::new();
......
......@@ -834,52 +834,6 @@ fn specialize_by_unsigned_comparison_op() {
}
#[test]
fn out_of_bounds_access_recognition() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
let global_data = RuntimeMemoryImage::mock();
let heap_obj_id = new_id("heap_malloc", "RAX");
state.memory.add_abstract_object(
heap_obj_id.clone(),
ByteSize::new(8),
Some(ObjectType::Heap),
);
state
.memory
.set_lower_index_bound(&heap_obj_id, &Bitvector::from_u64(0).into());
state
.memory
.set_upper_index_bound(&heap_obj_id, &Bitvector::from_u64(7).into());
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_i64(-1).into());
assert!(state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(0).into());
assert!(!state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(7).into());
assert!(!state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let pointer = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(8).into());
assert!(state.pointer_contains_out_of_bounds_target(&pointer, &global_data));
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(0).into());
state.set_register(&Variable::mock("RAX", 8), address);
let load_def = Def::load(
"tid",
Variable::mock("RBX", 8),
Expression::Var(Variable::mock("RAX", 8)),
);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(0).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(heap_obj_id.clone(), Bitvector::from_u64(1).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
let address = Data::from_target(state.stack_id.clone(), Bitvector::from_i64(-8).into());
state.set_register(&Variable::mock("RAX", 8), address);
assert!(!state.contains_out_of_bounds_mem_access(&load_def.term, &global_data));
}
#[test]
fn specialize_pointer_comparison() {
let mut state = State::new(&register("RSP"), Tid::new("func_tid"));
let interval = IntervalDomain::mock(-5, 10);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment