Unverified Commit 270b4d4e by Enkelmann Committed by GitHub

refactor CWE-416 check (#429)

parent 2e11e843
0.8-dev
===
- Improve logic and context information generation of CWE-416 (use-after-free) check (PRs #423, #429)
0.7 (2023-06)
====
......
[workspace]
members = ["src/cwe_checker_lib", "src/caller", "test", "src/installer"]
resolver = "2"
......@@ -76,7 +76,7 @@ impl<'a> Context<'a> {
state: &mut State,
call_tid: &Tid,
call_params: impl IntoIterator<Item = &'b Arg>,
) -> Option<Vec<(AbstractIdentifier, Tid)>> {
) -> Option<Vec<(AbstractIdentifier, Vec<Tid>)>> {
let mut warnings = Vec::new();
for arg in call_params {
if let Some(arg_value) = self
......@@ -174,7 +174,7 @@ impl<'a> Context<'a> {
name: &str,
description: String,
location: &Tid,
warning_causes: Vec<(AbstractIdentifier, Tid)>,
warning_causes: Vec<(AbstractIdentifier, Vec<Tid>)>,
root_function: &Tid,
) {
let cwe_warning = CweWarning {
......
......@@ -48,21 +48,13 @@
//! may lead to missed CWEs in this check.
//! - Pointers freed by other operations than calls to the deallocation symbols contained in the config.json will be missed by the analysis.
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::collections::HashSet;
use crate::abstract_domain::AbstractDomain;
use crate::abstract_domain::AbstractIdentifier;
use crate::analysis::fixpoint::Computation;
use crate::analysis::forward_interprocedural_fixpoint::GeneralizedContext;
use crate::analysis::graph::Node;
use crate::analysis::interprocedural_fixpoint_generic::NodeValue;
use crate::analysis::pointer_inference::PointerInference;
use crate::prelude::*;
use crate::utils::log::CweWarning;
use crate::utils::log::LogMessage;
use crate::CweModule;
use std::collections::BTreeSet;
use std::collections::HashSet;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
......@@ -123,16 +115,13 @@ pub fn check_cwe(
fixpoint_computation.compute_with_max_steps(100);
let mut warnings = HashSet::new();
let mut warnings = BTreeSet::new();
while let Ok(warning) = cwe_warning_receiver.try_recv() {
warnings.insert(warning);
}
let return_site_states = collect_return_site_states(&fixpoint_computation);
let cwes = generate_context_information_for_warnings(
return_site_states,
warnings,
config.always_include_full_path_to_free_site,
analysis_results.pointer_inference.unwrap(),
);
let mut logs = BTreeSet::new();
......@@ -151,18 +140,15 @@ pub struct WarningContext {
cwe: CweWarning,
/// The TID of the function in which the CWE warning was generated.
root_function: Tid,
/// Pairs of object IDs and the sites where the object was freed.
/// If the free-site is the same function call from which the object ID originates
/// then the CWE needs to be post-processed to give more exact information about the
/// free-site inside the function call.
object_and_free_ids: Vec<(AbstractIdentifier, Tid)>,
/// Pairs of object IDs and the paths to the actual free sites.
object_and_free_ids: Vec<(AbstractIdentifier, Vec<Tid>)>,
}
impl WarningContext {
/// Generate a new warning context object.
pub fn new(
cwe: CweWarning,
object_and_free_ids: Vec<(AbstractIdentifier, Tid)>,
object_and_free_ids: Vec<(AbstractIdentifier, Vec<Tid>)>,
root_function: Tid,
) -> Self {
WarningContext {
......@@ -173,203 +159,95 @@ impl WarningContext {
}
}
/// For each function call TID collect the state of the callee just before returning to the caller.
fn collect_return_site_states<'a>(
fixpoint: &Computation<GeneralizedContext<'a, Context<'a>>>,
) -> HashMap<Tid, State> {
let mut call_tid_to_return_state_map: HashMap<Tid, State> = HashMap::new();
let graph = fixpoint.get_graph();
for node in graph.node_indices() {
let call_tid = match graph[node] {
Node::CallReturn { call, return_: _ } => call.0.term.jmps[0].tid.clone(),
_ => continue,
};
let node_value = match fixpoint.get_node_value(node) {
Some(value) => value,
None => continue,
};
let return_state = match node_value {
NodeValue::CallFlowCombinator {
call_stub: _,
interprocedural_flow,
} => {
if let Some(state) = interprocedural_flow {
state.clone()
} else {
continue;
}
}
_ => panic!("Unexpexted NodeValue type encountered."),
};
// There exists one CallReturn node for each return instruction in the callee,
// so we have to merge the corresponding states here.
call_tid_to_return_state_map
.entry(call_tid)
.and_modify(|saved_return_state| {
*saved_return_state = saved_return_state.merge(&return_state)
})
.or_insert(return_state);
}
call_tid_to_return_state_map
}
/// If the ID of the "free"-site is the same call from which the object ID originates from
/// then (recursively) identify the real "free"-site inside the call.
/// Also return a list of call TIDs that lead to the real "free"-site.
///
/// The function returns an error if the source object was already flagged in some of the callees.
/// In such a case the corresponding CWE warning should be removed,
/// since there already exists another CWE warning with the same root cause.
/// Shorten the path to the "free"-site so that it ends in the first call
/// that is not contained in the path to the object origin.
fn get_shortended_path_to_source_of_free(
object_id: &AbstractIdentifier,
free_id: &Tid,
return_site_states: &HashMap<Tid, State>,
) -> Result<(Tid, Vec<Tid>), ()> {
if let (inner_object, Some(path_hint_id)) = object_id.without_last_path_hint() {
if path_hint_id == *free_id {
if let Some(return_state) = return_site_states.get(free_id) {
if return_state.is_id_already_flagged(&inner_object) {
return Err(());
}
if let Some(inner_free) = return_state.get_free_tid_if_dangling(&inner_object) {
let (root_free, mut callgraph_ids) = get_shortended_path_to_source_of_free(
&inner_object,
inner_free,
return_site_states,
)?;
callgraph_ids.push(path_hint_id);
return Ok((root_free, callgraph_ids));
}
free_path: &[Tid],
) -> Vec<Tid> {
let mut object_id = object_id.clone();
let mut shortened_free_path = free_path.to_vec();
while let (shortened_id, Some(last_path_hint)) = object_id.without_last_path_hint() {
if Some(&last_path_hint) == shortened_free_path.last() {
object_id = shortened_id;
shortened_free_path.pop();
} else {
break;
}
}
// Return the free path without the shortened free path
if shortened_free_path.is_empty() {
free_path.to_vec()
} else {
free_path[(shortened_free_path.len() - 1)..].to_vec()
}
// No inner source apart from the given free_id could be identified
Ok((free_id.clone(), Vec::new()))
}
/// Get the full path in the call-graph connecting the `object_id` to the site where it gets freed.
/// Note that there may be several paths to "free" sites in the call-graph.
/// This function returns just one (random) path to such a "free" site.
///
/// When calling this function non-recursively, the `collectect_callgraph_ids` should be empty.
///
/// The function returns an error if the source object was already flagged in some of the callees.
/// In such a case the corresponding CWE warning should be removed,
/// since there already exists another CWE warning with the same root cause.
fn get_full_path_to_source_of_free<'a>(
/// Get the part of the path to the "free"-site that is not shared with the path to the object origin site.
fn get_root_cause_for_returned_dangling_pointers(
object_id: &AbstractIdentifier,
free_id: &Tid,
return_site_states: &HashMap<Tid, State>,
pointer_inference: &'a PointerInference<'a>,
mut collected_callgraph_ids: Vec<Tid>,
) -> Result<(Tid, Vec<Tid>), ()> {
if collected_callgraph_ids.contains(free_id) {
// This path is recursive and thus not a (shortest) path to an actual `free`-site.
return Err(());
}
// Get callee information. If unsuccessful, then the `free_id` should already be the source site.
let id_replacement_map = match pointer_inference.get_id_renaming_map_at_call_tid(free_id) {
Some(map) => map,
None => return Ok((free_id.clone(), collected_callgraph_ids)),
};
let return_state = match return_site_states.get(free_id) {
Some(state) => state,
None => return Ok((free_id.clone(), collected_callgraph_ids)),
};
// Check whether the free site in the callee is already flagged.
for flagged_id in return_state.get_already_flagged_objects() {
if let Some(caller_data) = id_replacement_map.get(&flagged_id) {
if caller_data.get_relative_values().contains_key(object_id) {
// A corresponding object ID was already flagged in a callee,
// so we want to suppress this CWE warning as a duplicate of the already flagged CWE in the callee.
if object_id.get_tid() != &return_state.current_fn_tid {
return Err(());
free_path: &[Tid],
) -> Vec<Tid> {
let mut object_id = object_id.clone();
let mut shortened_free_path = free_path.to_vec();
while let (shortened_id, Some(last_path_hint)) = object_id.without_last_path_hint() {
if Some(&last_path_hint) == shortened_free_path.last() {
object_id = shortened_id;
shortened_free_path.pop();
} else {
// This is a recursive call and the object is a parameter to this call.
// We treat the call as the root cause
// to avoid erroneously suppressing some CWE warnings based on recursive calls.
return Ok((free_id.clone(), collected_callgraph_ids));
}
}
}
}
// If the object is a parameter to the callee then recursively find the real free site inside the callee
for (callee_id, free_site_in_callee) in return_state.get_dangling_objects() {
if collected_callgraph_ids.contains(&free_site_in_callee) {
// we skip potentially recursive paths
continue;
}
if let Some(caller_data) = id_replacement_map.get(&callee_id) {
if caller_data.get_relative_values().contains_key(object_id) {
collected_callgraph_ids.push(free_id.clone());
return get_full_path_to_source_of_free(
&callee_id,
&free_site_in_callee,
return_site_states,
pointer_inference,
collected_callgraph_ids,
);
}
}
}
// If the object originates from the same call that also frees the object,
// then use the path hints of the object ID to find the `free` site inside the callee.
if let (inner_object, Some(path_hint_id)) = object_id.without_last_path_hint() {
if path_hint_id == *free_id {
if let Some(return_state) = return_site_states.get(free_id) {
if return_state.is_id_already_flagged(&inner_object) {
return Err(());
}
if let Some(inner_free) = return_state.get_free_tid_if_dangling(&inner_object) {
collected_callgraph_ids.push(free_id.clone());
return get_full_path_to_source_of_free(
&inner_object,
inner_free,
return_site_states,
pointer_inference,
collected_callgraph_ids,
);
}
break;
}
}
if shortened_free_path.is_empty() {
vec![free_path[0].clone()]
} else {
shortened_free_path
}
// The `free_id` is an internal call, but no `free` site was found inside the callee.
// In theory, this case should never happen.
// We treat it like the `free_id` is the source `free` to at least return some useful information if it happens anyway.
Ok((free_id.clone(), collected_callgraph_ids))
}
/// Return `true` if the object originates in the same call as the "free"-site.
fn is_case_of_returned_dangling_pointer(object_id: &AbstractIdentifier, free_path: &[Tid]) -> bool {
// This implicitly uses that the `free_path` is never empty.
object_id.get_path_hints().last() == free_path.last()
}
/// Generate context information for CWE warnings.
/// E.g. relevant callgraph addresses are added to each CWE here.
fn generate_context_information_for_warnings<'a>(
return_site_states: HashMap<Tid, State>,
warnings: HashSet<WarningContext>,
fn generate_context_information_for_warnings(
warnings: BTreeSet<WarningContext>,
generate_full_paths_to_free_site: bool,
pointer_inference: &'a PointerInference<'a>,
) -> BTreeSet<CweWarning> {
let mut processed_warnings = BTreeSet::new();
let mut root_causes_for_returned_dangling_pointers = HashSet::new();
for mut warning in warnings {
let mut context_infos = Vec::new();
let mut relevant_callgraph_tids = Vec::new();
for (object_id, free_id) in warning.object_and_free_ids.iter() {
let source_free_site_info = if generate_full_paths_to_free_site {
get_full_path_to_source_of_free(
object_id,
free_id,
&return_site_states,
pointer_inference,
Vec::new(),
)
let mut relevant_callgraph_tids = BTreeSet::new();
for (object_id, mut free_path) in warning.object_and_free_ids.into_iter() {
if is_case_of_returned_dangling_pointer(&object_id, &free_path) {
let root_cause =
get_root_cause_for_returned_dangling_pointers(&object_id, &free_path);
if root_causes_for_returned_dangling_pointers.contains(&root_cause) {
// Skip this warning root cause, since another warning with the same root cause was already generated.
// FIXME: This is a coarse heuristic to reduce false positives.
// However, it is still possible that some but not all of these cases are real bugs
// and that this heuristic chooses the wrong representative.
continue;
} else {
get_shortended_path_to_source_of_free(object_id, free_id, &return_site_states)
};
if let Ok((root_free_id, mut callgraph_ids_to_free)) = source_free_site_info {
relevant_callgraph_tids.append(&mut callgraph_ids_to_free);
root_causes_for_returned_dangling_pointers.insert(root_cause);
}
}
if !generate_full_paths_to_free_site {
free_path = get_shortended_path_to_source_of_free(&object_id, &free_path);
}
for id in &free_path[1..] {
relevant_callgraph_tids.insert(id.clone());
}
context_infos.push(format!(
"Accessed ID {object_id} may have been freed before at {root_free_id}."
"Accessed ID {object_id} may have been freed before at {}.",
free_path[0]
));
}
}
if context_infos.is_empty() {
// Skip (delete) this CWE warning,
// since another warning with the same root cause was already generated in some callee.
......@@ -402,33 +280,23 @@ pub mod tests {
#[test]
fn test_warning_context_generation() {
let project = Project::mock_x64();
let pointer_inference = PointerInference::mock(&project);
let id = AbstractIdentifier::new(
Tid::new("object_origin_tid"),
AbstractLocation::Register(variable!("RAX:8")),
);
let path_id = id.with_path_hint(Tid::new("call_tid")).unwrap();
let object_and_free_ids = vec![(path_id, Tid::new("call_tid"))];
let object_id = id.with_path_hint(Tid::new("call_tid")).unwrap();
let object_and_free_ids = vec![(
object_id.clone(),
vec![Tid::new("free_tid"), Tid::new("call_tid")],
)];
let cwe = CweWarning::new("CWE416", "test", "mock_cwe");
let warning_context =
WarningContext::new(cwe, object_and_free_ids, Tid::new("root_func_tid"));
let warnings = HashSet::from([warning_context]);
let warnings = BTreeSet::from([warning_context.clone()]);
// Test warning context generation
let return_state = State::mock(
Tid::new("callee_tid"),
&[(id.clone(), Tid::new("free_tid"))],
&[],
);
let return_site_states = HashMap::from([(Tid::new("call_tid"), return_state)]);
let processed_warnings = generate_context_information_for_warnings(
return_site_states,
warnings.clone(),
false,
&pointer_inference,
);
let processed_warnings = generate_context_information_for_warnings(warnings.clone(), false);
assert_eq!(processed_warnings.len(), 1);
let processed_cwe = processed_warnings.iter().next().unwrap();
assert_eq!(&processed_cwe.other[0], &[
......@@ -437,14 +305,21 @@ pub mod tests {
]);
// Test warning filtering
let return_state = State::mock(Tid::new("callee_tid"), &[], &[id.clone()]);
let return_site_states = HashMap::from([(Tid::new("call_tid"), return_state)]);
let processed_warnings = generate_context_information_for_warnings(
return_site_states,
warnings,
false,
&pointer_inference,
);
assert_eq!(processed_warnings.len(), 0)
let object_and_free_ids_2 = vec![(
object_id
.with_path_hint(Tid::new("outer_call_tid"))
.unwrap(),
vec![
Tid::new("free_tid"),
Tid::new("call_tid"),
Tid::new("outer_call_tid"),
],
)];
let cwe_2 = CweWarning::new("CWE416", "test", "mock_cwe_2");
let warning_context_2 =
WarningContext::new(cwe_2, object_and_free_ids_2, Tid::new("root_func_tid_2"));
let warnings = BTreeSet::from([warning_context, warning_context_2]);
let processed_warnings = generate_context_information_for_warnings(warnings, false);
assert_eq!(processed_warnings.len(), 1)
}
}
......@@ -10,23 +10,44 @@ use std::collections::BTreeMap;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
enum ObjectState {
/// The object is already freed, i.e. pointers to it are dangling.
/// The associated TID denotes the point in time when the object was freed.
Dangling(Tid),
/// The associated TIDs denote the point in time when the object was freed
/// and possibly the call path taken to that point in time.
Dangling(Vec<Tid>),
/// The object is already freed and a use-after-free CWE message for it was already generated.
/// This object state is used to prevent duplicate CWE warnings with the same root cause.
AlreadyFlagged,
/// It still holds a path to a point in time where the object was freed.
AlreadyFlagged(Vec<Tid>),
}
impl AbstractDomain for ObjectState {
/// Merge two object states.
/// If both object states are dangling then use the source TID of `self` in the result.
/// If both object states are identical then use the shorter path to `free` in the result.
fn merge(&self, other: &Self) -> Self {
use std::cmp::Ordering;
match (self, other) {
(ObjectState::AlreadyFlagged, _) | (_, ObjectState::AlreadyFlagged) => {
ObjectState::AlreadyFlagged
}
(ObjectState::Dangling(tid), ObjectState::Dangling(other_tid)) => {
ObjectState::Dangling(std::cmp::min(tid, other_tid).clone())
(
ObjectState::AlreadyFlagged(free_path),
ObjectState::AlreadyFlagged(other_free_path),
) => {
let shortest_path = match free_path.len().cmp(&other_free_path.len()) {
Ordering::Less => free_path.clone(),
Ordering::Equal => std::cmp::min(free_path, other_free_path).clone(),
Ordering::Greater => other_free_path.clone(),
};
ObjectState::AlreadyFlagged(shortest_path)
}
(ObjectState::AlreadyFlagged(free_path), _)
| (_, ObjectState::AlreadyFlagged(free_path)) => {
ObjectState::AlreadyFlagged(free_path.clone())
}
(ObjectState::Dangling(free_path), ObjectState::Dangling(other_free_path)) => {
let shortest_path = match free_path.len().cmp(&other_free_path.len()) {
Ordering::Less => free_path.clone(),
Ordering::Equal => std::cmp::min(free_path, other_free_path).clone(),
Ordering::Greater => other_free_path.clone(),
};
ObjectState::Dangling(shortest_path)
}
}
}
......@@ -37,12 +58,19 @@ impl AbstractDomain for ObjectState {
}
}
/// The `State` currently only keeps track of the list of TIDs of memory object that may have been freed already
/// The `State` keeps track of the list of abstract IDs of memory objects that may have been freed already
/// together with the corresponding object states.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
/// The TID of the current function.
pub current_fn_tid: Tid,
/// Map from the abstract ID of dangling objects to their object state.
dangling_objects: DomainMap<AbstractIdentifier, ObjectState, UnionMergeStrategy>,
/// Memory objects that were generated and freed in the same call are tracked in a separate map.
/// Such objects are often analysis errors.
/// Tracking them separately prevents them from masking genuine Use-After-Free cases in the caller.
dangling_objects_generated_and_freed_in_same_call:
DomainMap<AbstractIdentifier, ObjectState, UnionMergeStrategy>,
}
impl State {
......@@ -51,64 +79,37 @@ impl State {
State {
current_fn_tid,
dangling_objects: BTreeMap::new().into(),
dangling_objects_generated_and_freed_in_same_call: BTreeMap::new().into(),
}
}
/// Return whether the given object ID is already flagged in this state,
/// i.e. whether a CWE warning was already generated for this object.
pub fn is_id_already_flagged(&self, object_id: &AbstractIdentifier) -> bool {
self.dangling_objects.get(object_id) == Some(&ObjectState::AlreadyFlagged)
}
/// If the given `object_id` represents a dangling object, return the TID of the site where it was freed.
pub fn get_free_tid_if_dangling(&self, object_id: &AbstractIdentifier) -> Option<&Tid> {
if let Some(ObjectState::Dangling(free_tid)) = self.dangling_objects.get(object_id) {
Some(free_tid)
} else {
None
}
}
/// Return a list of abstract identifiers that are marked as "flagged" in the current state,
/// i.e. they already triggered the generation of a CWE warning.
pub fn get_already_flagged_objects(&self) -> Vec<AbstractIdentifier> {
self.dangling_objects
.iter()
.filter_map(|(id, object_state)| match object_state {
ObjectState::AlreadyFlagged => Some(id.clone()),
_ => None,
})
.collect()
}
/// Return a list of abstract identifiers that are marked as "dangling" in the current state
/// together with the TIDs of the corresponding `free` instruction.
pub fn get_dangling_objects(&self) -> Vec<(AbstractIdentifier, Tid)> {
self.dangling_objects
.iter()
.filter_map(|(id, object_state)| match object_state {
ObjectState::AlreadyFlagged => None,
ObjectState::Dangling(free_id) => Some((id.clone(), free_id.clone())),
})
.collect()
}
/// Check the given address on whether it may point to already freed memory.
/// For each possible dangling pointer target the abstract ID of the object
/// and the TID of the corresponding site where the object was freed is returned.
/// and the path to the corresponding site where the object was freed is returned.
/// The object states of corresponding memory objects are set to [`ObjectState::AlreadyFlagged`]
/// to prevent reporting duplicate CWE messages with the same root cause.
pub fn check_address_for_use_after_free(
&mut self,
address: &Data,
) -> Option<Vec<(AbstractIdentifier, Tid)>> {
) -> Option<Vec<(AbstractIdentifier, Vec<Tid>)>> {
let mut free_ids_of_dangling_pointers = Vec::new();
for id in address.get_relative_values().keys() {
if let Some(ObjectState::Dangling(free_id)) = self.dangling_objects.get(id) {
free_ids_of_dangling_pointers.push((id.clone(), free_id.clone()));
if let Some(ObjectState::Dangling(free_id_path)) = self.dangling_objects.get(id) {
let free_id_path = free_id_path.clone();
free_ids_of_dangling_pointers.push((id.clone(), free_id_path.clone()));
self.dangling_objects
.insert(id.clone(), ObjectState::AlreadyFlagged);
.insert(id.clone(), ObjectState::AlreadyFlagged(free_id_path));
}
if let Some(ObjectState::Dangling(free_id_path)) = self
.dangling_objects_generated_and_freed_in_same_call
.get(id)
{
let free_id_path = free_id_path.clone();
free_ids_of_dangling_pointers.push((id.clone(), free_id_path.clone()));
self.dangling_objects_generated_and_freed_in_same_call
.insert(id.clone(), ObjectState::AlreadyFlagged(free_id_path));
}
}
if free_ids_of_dangling_pointers.is_empty() {
......@@ -118,6 +119,44 @@ impl State {
}
}
/// Mark the given object ID as freed with the given `free_id_path` denoting the path to the site where it is freed.
///
/// If the object ID was already marked as dangling,
/// return it plus the (previously saved) path to the site where it was freed.
#[must_use]
fn mark_as_freed(
&mut self,
object_id: &AbstractIdentifier,
free_id_path: Vec<Tid>,
pi_state: &PiState,
) -> Option<(AbstractIdentifier, Vec<Tid>)> {
if pi_state.memory.is_unique_object(object_id).ok() == Some(false) {
// FIXME: We cannot distinguish different objects represented by the same ID.
// So to avoid producing lots of false positive warnings
// we ignore these cases by not marking these IDs as freed.
return None;
}
if object_id.get_path_hints().last() == free_id_path.last() {
// The object was created in the same call as it is now freed.
if let Some(ObjectState::Dangling(old_free_id_path)) = self
.dangling_objects_generated_and_freed_in_same_call
.insert(
object_id.clone(),
ObjectState::Dangling(free_id_path.clone()),
)
{
return Some((object_id.clone(), old_free_id_path.clone()));
}
} else if let Some(ObjectState::Dangling(old_free_id_path)) = self.dangling_objects.insert(
object_id.clone(),
ObjectState::Dangling(free_id_path.clone()),
) {
return Some((object_id.clone(), old_free_id_path.clone()));
}
None
}
/// All TIDs that the given `param` may point to are marked as freed, i.e. pointers to them are dangling.
/// For each ID that was already marked as dangling return a string describing the root cause of a possible double free bug.
pub fn handle_param_of_free_call(
......@@ -125,22 +164,13 @@ impl State {
call_tid: &Tid,
param: &Data,
pi_state: &PiState,
) -> Option<Vec<(AbstractIdentifier, Tid)>> {
) -> Option<Vec<(AbstractIdentifier, Vec<Tid>)>> {
// FIXME: This function could also generate debug log messages whenever nonsensical information is detected.
// E.g. stack frame IDs or non-zero ID offsets can be indicators of other bugs.
let mut warnings = Vec::new();
for id in param.get_relative_values().keys() {
if pi_state.memory.is_unique_object(id).ok() == Some(false) {
// FIXME: We cannot distinguish different objects represented by the same ID.
// So to avoid producing lots of false positive warnings
// we ignore these cases by not marking these IDs as freed.
continue;
}
if let Some(ObjectState::Dangling(old_free_id)) = self
.dangling_objects
.insert(id.clone(), ObjectState::Dangling(call_tid.clone()))
{
warnings.push((id.clone(), old_free_id.clone()));
if let Some(warning_data) = self.mark_as_freed(id, vec![call_tid.clone()], pi_state) {
warnings.push(warning_data);
}
}
if !warnings.is_empty() {
......@@ -151,8 +181,7 @@ impl State {
}
/// Add objects that were freed in the callee of a function call to the list of dangling pointers of `self`.
/// May return a list of warnings if cases of possible double frees are detected,
/// i.e. if an already freed object may also have been freed in the callee.
/// Note that this function does not check for double frees.
pub fn collect_freed_objects_from_called_function(
&mut self,
state_before_return: &State,
......@@ -163,23 +192,15 @@ impl State {
for (callee_id, callee_object_state) in state_before_return.dangling_objects.iter() {
if let Some(caller_value) = id_replacement_map.get(callee_id) {
for caller_id in caller_value.get_relative_values().keys() {
if pi_state.memory.is_unique_object(caller_id).ok() != Some(false) {
// FIXME: We cannot distinguish different objects represented by the same ID.
// So to avoid producing lots of false positive warnings we ignore these cases.
match (callee_object_state, self.dangling_objects.get(caller_id)) {
// Case 1: The dangling object is unknown to the caller, so we add it.
(ObjectState::Dangling(_), None)
| (ObjectState::AlreadyFlagged, None) => {
self.dangling_objects.insert(
caller_id.clone(),
ObjectState::Dangling(call_tid.clone()),
);
}
// Case 2: The dangling object is already known to the caller.
// If this were a case of Use-After-Free, then this should have been flagged when checking the call parameters.
// Thus we can simply leave the object state as it is.
(_, Some(ObjectState::Dangling(_)))
| (_, Some(&ObjectState::AlreadyFlagged)) => (),
match callee_object_state {
ObjectState::Dangling(callee_free_path)
| ObjectState::AlreadyFlagged(callee_free_path) => {
let mut free_id_path = callee_free_path.clone();
free_id_path.push(call_tid.clone());
// FIXME: If the object was not created in the callee and it is also marked as flagged in the callee
// then one could interpret accesses in the caller as duplicates and mark the object ID as already flagged.
// But analysis errors in the callee could mask real Use-After-Frees in the caller if we do that.
let _ = self.mark_as_freed(caller_id, free_id_path, pi_state);
}
}
}
......@@ -194,6 +215,9 @@ impl AbstractDomain for State {
State {
current_fn_tid: self.current_fn_tid.clone(),
dangling_objects: self.dangling_objects.merge(&other.dangling_objects),
dangling_objects_generated_and_freed_in_same_call: self
.dangling_objects_generated_and_freed_in_same_call
.merge(&other.dangling_objects_generated_and_freed_in_same_call),
}
}
......@@ -209,23 +233,45 @@ impl State {
#[allow(dead_code)]
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let format_vec = |vec| {
let mut string = String::new();
for elem in vec {
string += &format!("{},", elem);
}
string
};
let mut state_map = Map::new();
state_map.insert(
"current_function".to_string(),
Value::String(format!("{}", self.current_fn_tid)),
);
for (id, object_state) in self.dangling_objects.iter() {
if let ObjectState::Dangling(free_tid) = object_state {
state_map.insert(
match object_state {
ObjectState::Dangling(free_path) => state_map.insert(
format!("{id}"),
Value::String(format!("Dangling({free_tid})")),
);
} else {
state_map.insert(
Value::String(format!("Dangling([{}])", format_vec(free_path))),
),
ObjectState::AlreadyFlagged(free_path) => state_map.insert(
format!("{id}"),
Value::String("Already flagged".to_string()),
);
Value::String(format!("Already flagged([{}])", format_vec(free_path))),
),
};
}
for (id, object_state) in self
.dangling_objects_generated_and_freed_in_same_call
.iter()
{
match object_state {
ObjectState::Dangling(free_path) => state_map.insert(
format!("{id} (already dangling in callee)"),
Value::String(format!("Dangling([{}])", format_vec(free_path))),
),
ObjectState::AlreadyFlagged(free_path) => state_map.insert(
format!("{id} (already dangling in callee)"),
Value::String(format!("Already flagged([{}])", format_vec(free_path))),
),
};
}
Value::Object(state_map)
}
......@@ -237,37 +283,16 @@ pub mod tests {
use crate::{bitvec, intermediate_representation::parsing, variable};
use std::collections::BTreeSet;
impl State {
pub fn mock(
current_fn_tid: Tid,
dangling_ids: &[(AbstractIdentifier, Tid)],
already_flagged_ids: &[AbstractIdentifier],
) -> Self {
let mut state = State::new(current_fn_tid);
for (id, free_id) in dangling_ids.iter() {
state
.dangling_objects
.insert(id.clone(), ObjectState::Dangling(free_id.clone()));
}
for id in already_flagged_ids.iter() {
state
.dangling_objects
.insert(id.clone(), ObjectState::AlreadyFlagged);
}
state
}
}
#[test]
fn test_check_address_for_use_after_free() {
let mut state = State::new(Tid::new("current_fn"));
state.dangling_objects.insert(
AbstractIdentifier::mock("obj_id", "RAX", 8),
ObjectState::Dangling(Tid::new("free_call")),
ObjectState::Dangling(vec![Tid::new("free_call")]),
);
state.dangling_objects.insert(
AbstractIdentifier::mock("flagged_obj_id", "RAX", 8),
ObjectState::AlreadyFlagged,
ObjectState::AlreadyFlagged(vec![Tid::new("free_call")]),
);
let address = Data::mock_from_target_map(BTreeMap::from([
(
......@@ -293,14 +318,14 @@ pub mod tests {
.dangling_objects
.get(&AbstractIdentifier::mock("obj_id", "RAX", 8))
.unwrap(),
ObjectState::AlreadyFlagged
ObjectState::AlreadyFlagged(vec![Tid::new("free_call")])
);
assert_eq!(
*state
.dangling_objects
.get(&AbstractIdentifier::mock("flagged_obj_id", "RAX", 8))
.unwrap(),
ObjectState::AlreadyFlagged
ObjectState::AlreadyFlagged(vec![Tid::new("free_call")])
);
}
......@@ -321,7 +346,7 @@ pub mod tests {
.dangling_objects
.get(&AbstractIdentifier::mock("obj_id", "RAX", 8))
.unwrap(),
ObjectState::Dangling(Tid::new("free_call"))
ObjectState::Dangling(vec![Tid::new("free_call")])
);
// Check that a second free operation yields a double free warning.
assert!(state
......@@ -335,7 +360,7 @@ pub mod tests {
let mut state_before_return = State::new(Tid::new("callee_fn_tid"));
state_before_return.dangling_objects.insert(
AbstractIdentifier::mock("callee_obj_tid", "RAX", 8),
ObjectState::Dangling(Tid::new("free_tid")),
ObjectState::Dangling(vec![Tid::new("free_tid")]),
);
let pi_state = PiState::new(&variable!("RSP:8"), Tid::new("call"), BTreeSet::new());
let id_replacement_map = BTreeMap::from([(
......@@ -358,7 +383,7 @@ pub mod tests {
.dangling_objects
.get(&AbstractIdentifier::mock("caller_tid", "RBX", 8))
.unwrap(),
&ObjectState::Dangling(Tid::new("call_tid"))
&ObjectState::Dangling(vec![Tid::new("free_tid"), Tid::new("call_tid")])
);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment