Unverified Commit d7cb4c95 by Enkelmann Committed by GitHub

Reduce false positives in CWE-416 check (#433)

parent f6ced95c
use crate::intermediate_representation::*;
use crate::prelude::*;
use derive_more::Deref;
use std::ops::Deref;
use std::sync::Arc;
mod location;
......@@ -150,6 +151,24 @@ impl AbstractIdentifier {
pub fn bytesize(&self) -> ByteSize {
self.location.bytesize()
}
/// If the abstract location of `self` has a parent location
/// then return the ID one gets when replacing the abstract location in `self` with its parent location.
pub fn get_id_with_parent_location(
&self,
generic_pointer_size: ByteSize,
) -> Option<AbstractIdentifier> {
if let Ok((parent_location, _)) = self.location.get_parent_location(generic_pointer_size) {
let id_data = AbstractIdentifierData {
time: self.deref().time.clone(),
location: parent_location,
path_hints: self.deref().path_hints.clone(),
};
Some(AbstractIdentifier(Arc::new(id_data)))
} else {
None
}
}
}
impl std::fmt::Display for AbstractIdentifier {
......
......@@ -234,6 +234,16 @@ impl<T: Context> Computation<T> {
&self.node_values
}
/// Get a mutable iterator over all node values.
/// Also add all nodes that have values to the worklist, because one can change all their values through the iterator.
pub fn node_values_mut(&mut self) -> impl Iterator<Item = &mut T::NodeValue> {
for node in self.node_values.keys() {
let priority = self.node_priority_list[node.index()];
self.worklist.insert(priority);
}
self.node_values.values_mut()
}
/// Get a reference to the underlying graph
pub fn get_graph(&self) -> &DiGraph<T::NodeLabel, T::EdgeLabel> {
self.fp_context.get_graph()
......
......@@ -68,6 +68,7 @@ fn test_call_stub_handling() {
&Tid::new("func"),
&project.stack_pointer_register,
project.get_standard_calling_convention().unwrap(),
2,
);
let extern_symbol = ExternSymbol::mock_malloc_symbol_arm();
let call_tid = Tid::new("call_malloc");
......@@ -93,6 +94,7 @@ fn test_call_stub_handling() {
&Tid::new("func"),
&project.stack_pointer_register,
project.get_standard_calling_convention().unwrap(),
2,
);
// Set the format string param register to a pointer to the string 'cat %s %s %s %s'.
state.set_register(&variable!("r1:4"), bitvec!("0x6000:4").into());
......
......@@ -31,6 +31,9 @@
//! Nevertheless, it should result in an overapproximation of parameters and their access patterns in most cases.
//! * The nesting depth for tracked nested parameters is limited
//! to avoid generating infinitely many parameters for recursive types like linked lists.
//! * For arrays no parameters should be created for the array elements.
//! However, if only a particular element in an array is accessed without iteration over the array,
//! then a parameter might be generated for that element.
use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::AbstractLocation;
......@@ -55,16 +58,12 @@ mod global_var_propagation;
use global_var_propagation::propagate_globals;
pub mod stubs;
/// The recursion depth limit for abstract locations to be tracked by the function signature analysis,
/// i.e. how many dereference operations an abstract location is allowed to contain
/// before the analysis stops tracking the location.
const POINTER_RECURSION_DEPTH_LIMIT: u64 = 2;
/// Generate the computation object for the fixpoint computation
/// and set the node values for all function entry nodes.
fn generate_fixpoint_computation<'a>(
project: &'a Project,
graph: &'a Graph,
pointer_recursion_depth_limit: u64,
) -> Computation<GeneralizedContext<'a, Context<'a>>> {
let context = Context::new(project, graph);
let mut computation = create_computation(context, None);
......@@ -81,6 +80,7 @@ fn generate_fixpoint_computation<'a>(
&sub.tid,
&project.stack_pointer_register,
calling_convention,
pointer_recursion_depth_limit,
);
if project.cpu_architecture.contains("MIPS") {
let _ = fn_start_state
......@@ -152,8 +152,35 @@ pub fn compute_function_signatures<'a>(
project: &'a Project,
graph: &'a Graph,
) -> (BTreeMap<Tid, FunctionSignature>, Vec<LogMessage>) {
let mut computation = generate_fixpoint_computation(project, graph);
let max_pointer_recursion_depth_limit: u64 = 2;
// We gradually increase the recursion depth limit used in the fixpoint computation.
// The idea is that for array accesses the offset has time to converge to `Top` before IDs for nested objects are created.
// Otherwise the algorithm would generate an object for the first element of an array
// before it can check that it is an array access that we want to ignore.
let mut computation = generate_fixpoint_computation(project, graph, 0);
computation.compute_with_max_steps(100);
for pointer_recursion_limit in 1..=max_pointer_recursion_depth_limit {
for node_value in computation.node_values_mut() {
match node_value {
NodeValue::Value(state) => {
state.set_pointer_recursion_depth_limit(pointer_recursion_limit)
}
NodeValue::CallFlowCombinator {
call_stub,
interprocedural_flow,
} => {
for state in call_stub.iter_mut() {
state.set_pointer_recursion_depth_limit(pointer_recursion_limit);
}
for state in interprocedural_flow.iter_mut() {
state.set_pointer_recursion_depth_limit(pointer_recursion_limit);
}
}
}
}
computation.compute_with_max_steps(100);
}
let mut fn_sig_map = extract_fn_signatures_from_fixpoint(project, graph, computation);
// Sanitize the parameters
let mut logs = Vec::new();
......
use super::State;
use super::POINTER_RECURSION_DEPTH_LIMIT;
use crate::abstract_domain::*;
use crate::intermediate_representation::*;
......@@ -48,7 +47,7 @@ impl State {
Err(_) => DataDomain::new_top(size),
}
} else if let (true, Ok(constant_offset)) = (
id.get_location().recursion_depth() < POINTER_RECURSION_DEPTH_LIMIT,
id.get_location().recursion_depth() < self.pointer_recursion_depth_limit,
offset.try_to_offset(),
) {
// Extend the abstract location string
......
use super::AccessPattern;
use super::POINTER_RECURSION_DEPTH_LIMIT;
use crate::abstract_domain::*;
use crate::intermediate_representation::*;
use crate::prelude::*;
......@@ -27,6 +26,10 @@ pub struct State {
stack: MemRegion<DataDomain<BitvectorDomain>>,
/// Maps each tracked ID to an [`AccessPattern`], which tracks known access patterns to the object.
tracked_ids: DomainMap<AbstractIdentifier, AccessPattern, UnionMergeStrategy>,
/// The recursion depth limit for abstract locations to be tracked by the function signature analysis,
/// i.e. how many dereference operations an abstract location is allowed to contain
/// before the analysis stops tracking the location.
pointer_recursion_depth_limit: u64,
}
impl State {
......@@ -37,6 +40,7 @@ impl State {
func_tid: &Tid,
stack_register: &Variable,
calling_convention: &CallingConvention,
pointer_recursion_depth_limit: u64,
) -> State {
let mut register_map = BTreeMap::new();
let mut tracked_ids = BTreeMap::new();
......@@ -64,9 +68,18 @@ impl State {
stack_id,
stack,
tracked_ids: DomainMap::from(tracked_ids),
pointer_recursion_depth_limit,
}
}
/// Set the pointer recursion depth limit to the provided value.
///
/// Note that one should call this function for all states,
/// because merging two states with different depth limits will panic.
pub fn set_pointer_recursion_depth_limit(&mut self, limit: u64) {
self.pointer_recursion_depth_limit = limit;
}
/// Set the MIPS link register `t9` to the address of the function TID.
///
/// According to the System V ABI for MIPS the caller has to save the callee address in register `t9`
......@@ -219,7 +232,7 @@ impl State {
}
};
location.extend(mem_location, self.stack_id.bytesize());
if location.recursion_depth() <= POINTER_RECURSION_DEPTH_LIMIT {
if location.recursion_depth() <= self.pointer_recursion_depth_limit {
eval_result = eval_result.merge(&DataDomain::from_target(
AbstractIdentifier::new(id.get_tid().clone(), location),
Bitvector::zero(target_size.into()).into(),
......@@ -382,6 +395,10 @@ fn get_pointer_inputs_vars_of_address_expression(expr: &Expression) -> Vec<&Vari
impl AbstractDomain for State {
/// Merge two states
fn merge(&self, other: &Self) -> Self {
assert_eq!(
self.pointer_recursion_depth_limit,
other.pointer_recursion_depth_limit
);
let stack_id = self.stack_id.clone();
let stack = self.stack.merge(&other.stack);
State {
......@@ -389,6 +406,7 @@ impl AbstractDomain for State {
stack_id,
stack,
tracked_ids: self.tracked_ids.merge(&other.tracked_ids),
pointer_recursion_depth_limit: self.pointer_recursion_depth_limit,
}
}
......
use super::*;
use crate::{bitvec, expr, variable};
use crate::{
analysis::{
forward_interprocedural_fixpoint::Context as _, function_signature::context::Context,
},
bitvec, defs, expr, variable,
};
impl State {
/// Generate a mock state for an ARM-32 state.
......@@ -8,6 +13,7 @@ impl State {
&Tid::new("mock_fn"),
&variable!("sp:4"),
&CallingConvention::mock_arm32(),
2,
)
}
......@@ -17,6 +23,7 @@ impl State {
&Tid::new(tid_name),
&variable!("RSP:8"),
&CallingConvention::mock_x64(),
2,
)
}
}
......@@ -137,3 +144,52 @@ fn test_substitute_global_mem_address() {
DataDomain::from_target(expected_global_id, bitvec!("0x0:4").into())
);
}
#[test]
fn test_pointer_recursion_depth_limit_handling() {
let project = Project::mock_arm32();
let graph = crate::analysis::graph::get_program_cfg(&project.program);
let context = Context::new(&project, &graph);
// Test interaction of gradually increasing the pointer recursion depth limit with a loop that
// - iterates over an array
// - recursively dereferences a variable
let mut state = State::mock_arm32();
let defs = defs![
"instr_1: Store at r1:4 := r0:4",
"instr_2: r1:4 = r1:4 + 0x1:4",
"instr_3: r3:4 := Load from r3:4"
];
let array_elem_location = AbstractLocation::mock("r1:4", &[0], 4);
let array_elem_id = AbstractIdentifier::new(
state.get_current_function_tid().clone(),
array_elem_location,
);
let recursive_elem_location = AbstractLocation::mock("r3:4", &[0], 4);
let recursive_elem_id = AbstractIdentifier::new(
state.get_current_function_tid().clone(),
recursive_elem_location,
);
// Iteration with depth limit 0
state.pointer_recursion_depth_limit = 0;
let prev_state = state.clone();
for def in &defs {
state = context.update_def(&state, def).unwrap();
}
state = state.merge(&prev_state);
// No array element ID should have been created.
assert!(state.tracked_ids.get(&array_elem_id).is_none());
// No recursive access ID should have been created.
assert!(state.tracked_ids.get(&recursive_elem_id).is_none());
// Iteration with depth limit 1
state.pointer_recursion_depth_limit = 1;
let prev_state = state.clone();
for def in &defs {
state = context.update_def(&state, def).unwrap();
}
state = state.merge(&prev_state);
// No array element ID should have been created.
assert!(state.tracked_ids.get(&array_elem_id).is_none());
// But the recursive access ID should now exist.
assert!(state.tracked_ids.get(&recursive_elem_id).is_some());
}
......@@ -47,6 +47,13 @@
//! - Memory objects not tracked by the Pointer Inference analysis or pointer targets missed by the Pointer Inference
//! may lead to missed CWEs in this check.
//! - Pointers freed by other operations than calls to the deallocation symbols contained in the config.json will be missed by the analysis.
//! - Pointers freed and flagged in the same call are not marked as freed in the caller.
//! This reduces false positives and duplicates, but may also result in some false negatives.
//! - Objects freed in the same call where they are created are not marked as freed in the caller.
//! This reduces false positives, but may also result in some false negatives.
//! - Pointers to recursively defined data structures like linked lists are heuristically identified and ignored.
//! This reduces false positives generated when such structures are recursively freed in a loop,
//! but also prevents detection of bugs involving such pointers.
use crate::abstract_domain::AbstractIdentifier;
use crate::prelude::*;
......
......@@ -4,7 +4,7 @@ use crate::{
analysis::pointer_inference::Data,
prelude::*,
};
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
/// The state of a memory object for which at least one possible call to a `free`-like function was detected.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
......@@ -159,6 +159,9 @@ impl State {
/// All TIDs that the given `param` may point to are marked as freed, i.e. pointers to them are dangling.
/// For each ID that was already marked as dangling return a string describing the root cause of a possible double free bug.
///
/// The function heuristically detects IDs related to recursive data structures (e.g. linked lists).
/// Such IDs are ignored when marking objects as freed.
pub fn handle_param_of_free_call(
&mut self,
call_tid: &Tid,
......@@ -168,7 +171,9 @@ impl State {
// FIXME: This function could also generate debug log messages whenever nonsensical information is detected.
// E.g. stack frame IDs or non-zero ID offsets can be indicators of other bugs.
let mut warnings = Vec::new();
for id in param.get_relative_values().keys() {
let generic_pointer_size = pi_state.stack_id.bytesize();
// Heuristically ignore recursive IDs
for id in get_non_recursive_ids(param, generic_pointer_size) {
if let Some(warning_data) = self.mark_as_freed(id, vec![call_tid.clone()], pi_state) {
warnings.push(warning_data);
}
......@@ -182,6 +187,10 @@ impl State {
/// Add objects that were freed in the callee of a function call to the list of dangling pointers of `self`.
/// Note that this function does not check for double frees.
///
/// The function heuristically detects when parameter values contain IDs
/// corresponding to recursive data structures (e.g. linked lists).
/// Such IDs are ignored, i.e. their object status is not transferred from the callee.
pub fn collect_freed_objects_from_called_function(
&mut self,
state_before_return: &State,
......@@ -189,19 +198,33 @@ impl State {
call_tid: &Tid,
pi_state: &PiState,
) {
let generic_pointer_size = pi_state.stack_id.bytesize();
let call_tid_with_suffix = call_tid.clone().with_id_suffix("_param");
for (callee_id, callee_object_state) in state_before_return.dangling_objects.iter() {
if let Some(caller_value) = id_replacement_map.get(callee_id) {
for caller_id in caller_value.get_relative_values().keys() {
// Heuristically filter out recursive IDs
for caller_id in get_non_recursive_ids(caller_value, generic_pointer_size) {
if caller_id.get_tid() == call_tid
|| caller_id.get_tid() == &call_tid_with_suffix
{
// FIXME: We heuristically ignore free operations if they happen in the same call as the creation of the object.
// This reduces false positives, but also produces false negatives for some returned dangling pointers.
continue;
}
match callee_object_state {
ObjectState::Dangling(callee_free_path)
| ObjectState::AlreadyFlagged(callee_free_path) => {
ObjectState::Dangling(callee_free_path) => {
let mut free_id_path = callee_free_path.clone();
free_id_path.push(call_tid.clone());
// FIXME: If the object was not created in the callee and it is also marked as flagged in the callee
// then one could interpret accesses in the caller as duplicates and mark the object ID as already flagged.
// But analysis errors in the callee could mask real Use-After-Frees in the caller if we do that.
let _ = self.mark_as_freed(caller_id, free_id_path, pi_state);
}
// FIXME: To reduce false positives and duplicates we heuristically assume
// that if an object is flagged in the callee
// then Use After Frees in the caller are duplicates from the flagged access in the callee.
// And that the corresponding dangling objects do not reach the caller in this case.
// Note that this heuristic will produce false negatives in some cases.
ObjectState::AlreadyFlagged(_) => (),
}
}
}
......@@ -277,10 +300,34 @@ impl State {
}
}
/// Return the set of relative IDs contained in the input `data` after filtering out recursive IDs.
///
/// An ID is *recursive*, i.e. assumed to correspond to a recursive data structure like a linked list,
/// if its parent abstract location is also contained as an ID in `data`
/// or if some ID contained in `data` has this ID as its parent.
fn get_non_recursive_ids(
data: &Data,
generic_pointer_size: ByteSize,
) -> BTreeSet<&AbstractIdentifier> {
let ids: BTreeSet<_> = data.get_relative_values().keys().collect();
let mut filtered_ids = ids.clone();
for id in &ids {
if let Some(parent_id) = id.get_id_with_parent_location(generic_pointer_size) {
if ids.contains(&parent_id) {
filtered_ids.remove(*id);
filtered_ids.remove(&parent_id);
}
}
}
filtered_ids
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::{bitvec, intermediate_representation::parsing, variable};
use crate::{
abstract_domain::DataDomain, bitvec, intermediate_representation::parsing, variable,
};
use std::collections::BTreeSet;
#[test]
......@@ -386,4 +433,34 @@ pub mod tests {
&ObjectState::Dangling(vec![Tid::new("free_tid"), Tid::new("call_tid")])
);
}
#[test]
fn test_filtering_of_recursive_ids() {
let data = DataDomain::mock_from_target_map(BTreeMap::from([
(
AbstractIdentifier::mock_nested("time1", "r0:4", &[], 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock_nested("time1", "r0:4", &[0], 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock_nested("unique1", "r0:4", &[], 4),
bitvec!("0x0:4").into(),
),
(
AbstractIdentifier::mock_nested("unique2", "r0:4", &[0], 4),
bitvec!("0x0:4").into(),
),
]));
let filtered_ids = get_non_recursive_ids(&data, ByteSize::new(4));
assert_eq!(
filtered_ids,
BTreeSet::from([
&AbstractIdentifier::mock_nested("unique1", "r0:4", &[], 4),
&AbstractIdentifier::mock_nested("unique2", "r0:4", &[0], 4)
])
);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment