Unverified Commit cbe2f035 by Enkelmann Committed by GitHub

More detailed reports for CWE-119 check (#333)

parent e23a4fcd
use super::Context;
use crate::abstract_domain::{AbstractIdentifier, DataDomain, IntervalDomain, TryToBitvec};
use crate::prelude::*;
/// This struct contains the computed bound for an object.
/// If the object is a parameter object,
/// it also contains metadata about the source object used to determine the bound for the parameter object.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct BoundsMetadata {
/// The source object (and the offset into the source object that the object points to)
/// if the bound of the memory object is derived from another object (e.g. for parameter objects).
pub source: Option<DataDomain<IntervalDomain>>,
/// The resulting bound for the memory object.
pub resulting_bound: i64,
}
impl BoundsMetadata {
/// Create a new instance without source metadata.
pub fn new(resulting_bound: i64) -> BoundsMetadata {
BoundsMetadata {
source: None,
resulting_bound,
}
}
/// Create an instance where the source of the bound is given by `id + offset`.
pub fn from_source(
id: &AbstractIdentifier,
offset: &IntervalDomain,
resulting_bound: i64,
) -> BoundsMetadata {
BoundsMetadata {
source: Some(DataDomain::from_target(id.clone(), offset.clone())),
resulting_bound,
}
}
}
/// If `bound` is `None`, replace it with `new_bound`.
/// Else only replace it if the bound in `new_bound` is smaller than the existing bound.
fn replace_if_smaller_bound(bound: &mut Option<BoundsMetadata>, new_bound: BoundsMetadata) {
if let Some(old_bound) = bound {
if old_bound.resulting_bound > new_bound.resulting_bound {
*bound = Some(new_bound);
}
} else {
*bound = Some(new_bound);
}
}
/// If `bound` is `None`, replace it with `new_bound`.
/// Else only replace it if the bound in `new_bound` is larger than the existing bound.
fn replace_if_larger_bound(bound: &mut Option<BoundsMetadata>, new_bound: BoundsMetadata) {
if let Some(old_bound) = bound {
if old_bound.resulting_bound < new_bound.resulting_bound {
*bound = Some(new_bound);
}
} else {
*bound = Some(new_bound);
}
}
impl<'a> Context<'a> {
/// Compute the bounds of the memory object associated with the given parameter ID.
///
/// Since the memory object associated to a parameter may not be unique
/// the bounds are only approximated from those objects where exact bounds could be determined.
/// If different objects were found the bounds are approximated by the strictest bounds that were found.
fn compute_bounds_of_param_id(
&self,
param_object_id: &AbstractIdentifier,
) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
let object_data = self.recursively_substitute_param_values(&DataDomain::from_target(
param_object_id.clone(),
Bitvector::zero(param_object_id.bytesize().into()).into(),
));
let mut lower_bound: Option<BoundsMetadata> = None;
let mut upper_bound: Option<BoundsMetadata> = None;
for (id, offset) in object_data.get_relative_values() {
// Right now we ignore cases where we do not know the exact offset into the object.
let concrete_offset = match offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => continue,
};
if self
.malloc_tid_to_object_size_map
.contains_key(id.get_tid())
{
replace_if_larger_bound(
&mut lower_bound,
BoundsMetadata::from_source(id, offset, -concrete_offset),
);
let object_size = self.compute_size_of_heap_object(id);
if let Ok(concrete_object_size) = object_size.try_to_offset() {
replace_if_smaller_bound(
&mut upper_bound,
BoundsMetadata::from_source(
id,
offset,
concrete_object_size - concrete_offset,
),
);
}
} else if self.is_stack_frame_id(id) {
let stack_frame_upper_bound = self
.function_signatures
.get(id.get_tid())
.unwrap()
.get_stack_params_total_size();
replace_if_smaller_bound(
&mut upper_bound,
BoundsMetadata::from_source(
id,
offset,
stack_frame_upper_bound - concrete_offset,
),
);
// We do not set a lower bound since we do not know the concrete call site for stack pointers,
// which we would need to determine a correct lower bound.
}
// FIXME: Cases not handled here include unresolved parameter IDs, unknown IDs and global pointers.
// For the first two we do not have any size information.
// For global pointers we need some kind of pre-analysis so that we do not have to assume
// that the pointer may address the complete range of global data addresses.
}
(lower_bound, upper_bound)
}
/// Compute the bounds of a memory object given by the provided `object_id`.
///
/// Returns `(lower_bound, upper_bound)`, where the bounds may be `None` if they could not be determined.
pub fn compute_bounds_of_id(
&self,
object_id: &AbstractIdentifier,
current_stack_frame_id: &AbstractIdentifier,
) -> (Option<BoundsMetadata>, Option<BoundsMetadata>) {
if self
.malloc_tid_to_object_size_map
.contains_key(object_id.get_tid())
{
let object_size = self.compute_size_of_heap_object(object_id);
if let Ok(object_size) = object_size.try_to_offset() {
(
Some(BoundsMetadata::new(0)),
Some(BoundsMetadata::new(object_size)),
)
} else {
(Some(BoundsMetadata::new(0)), None)
}
} else if object_id == current_stack_frame_id {
let stack_frame_upper_bound = self
.function_signatures
.get(object_id.get_tid())
.unwrap()
.get_stack_params_total_size();
(None, Some(BoundsMetadata::new(stack_frame_upper_bound)))
} else if object_id.get_tid() == current_stack_frame_id.get_tid()
&& object_id.get_path_hints().is_empty()
{
// Handle parameter IDs
self.compute_bounds_of_param_id(object_id)
} else {
// The type of object is unknown, thus the size restrictions are also unknown.
(None, None)
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::analysis::pointer_inference::Data;
use std::collections::{HashMap, HashSet};
#[test]
fn test_compute_bounds_of_param_id() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let param_id_2 = AbstractIdentifier::mock("func", "RSI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let callsite_id_2 = AbstractIdentifier::mock("callsite_id", "RSI", 8);
let malloc_call_id = AbstractIdentifier::mock("malloc_call", "RAX", 8);
let main_stack_id = AbstractIdentifier::mock("main", "RSP", 8);
let param_value = Data::from_target(malloc_call_id.clone(), Bitvector::from_i64(2).into());
let param_value_2 =
Data::from_target(main_stack_id.clone(), Bitvector::from_i64(-10).into());
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(callsite_id_2, param_value_2.clone()),
]);
let callee_to_callsites_map =
HashMap::from([(Tid::new("func"), HashSet::from([Tid::new("callsite_id")]))]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context
.malloc_tid_to_object_size_map
.insert(Tid::new("malloc_call"), Data::from(Bitvector::from_i64(42)));
context.call_to_caller_fn_map = HashMap::from([
(Tid::new("malloc_call"), Tid::new("main")),
(Tid::new("callsite_id"), Tid::new("main")),
]);
// Test bound computation if the param gets resolved to a heap object
let (lower_bound, upper_bound) = context.compute_bounds_of_param_id(&param_id);
assert_eq!(lower_bound.unwrap().resulting_bound, -2);
assert_eq!(upper_bound.unwrap().resulting_bound, 40);
// Test bound computation if the param gets resolved to a caller stack frame
let (lower_bound, upper_bound) = context.compute_bounds_of_param_id(&param_id_2);
assert_eq!(lower_bound, None);
assert_eq!(upper_bound.unwrap().resulting_bound, 10);
}
}
......@@ -9,6 +9,11 @@ use std::collections::{BTreeMap, HashMap, HashSet};
use super::state::State;
/// Methods of [`Context`] related to computing bounds of memory objects.
mod bounds_computation;
pub use bounds_computation::BoundsMetadata;
/// Methods of [`Context`] and other helper functions related to replacing parameter IDs with possible caller values.
mod param_replacement;
/// Trait implementations for the [`Context`] struct,
/// especially the implementation of the [forward interprocedural fixpoint context](`crate::analysis::forward_interprocedural_fixpoint::Context`) trait.
mod trait_impls;
......@@ -53,7 +58,9 @@ impl<'a> Context<'a> {
pointer_inference: analysis_results.pointer_inference.unwrap(),
function_signatures: analysis_results.function_signatures.unwrap(),
callee_to_callsites_map: compute_callee_to_call_sites_map(project),
param_replacement_map: compute_param_replacement_map(analysis_results),
param_replacement_map: param_replacement::compute_param_replacement_map(
analysis_results,
),
malloc_tid_to_object_size_map: compute_size_values_of_malloc_calls(analysis_results),
call_to_caller_fn_map: compute_call_to_caller_map(project),
log_collector,
......@@ -109,149 +116,6 @@ impl<'a> Context<'a> {
}
}
/// Merge all possible caller values for the given parameter ID.
/// The absolute values also merged separately to prevent widening operations during the merge.
fn substitute_param_values(
&self,
param_id: &AbstractIdentifier,
) -> (Option<IntervalDomain>, Data) {
let mut merged_absolute_value: Option<IntervalDomain> = None;
let mut merged_data: Option<Data> = None;
let function_tid = param_id.get_tid();
if let Some(callsites) = self.callee_to_callsites_map.get(function_tid) {
for callsite in callsites {
let param_id_at_callsite =
AbstractIdentifier::new(callsite.clone(), param_id.get_location().clone());
let value_at_callsite = match self.param_replacement_map.get(&param_id_at_callsite)
{
Some(val) => val,
None => continue,
};
merged_absolute_value = match (
&merged_absolute_value,
value_at_callsite.get_absolute_value(),
) {
(Some(val_left), Some(val_right)) => Some(val_left.signed_merge(val_right)),
(Some(val), None) | (None, Some(val)) => Some(val.clone()),
(None, None) => None,
};
merged_data = merged_data
.map(|val| val.merge(value_at_callsite))
.or_else(|| Some(value_at_callsite.clone()));
}
}
let merged_data = merged_data.unwrap_or_else(|| Data::new_top(param_id.bytesize()));
(merged_absolute_value, merged_data)
}
/// Recursively merge and insert all possible caller vallues for all parameter IDs contained in the given value.
/// Absolute values are merged separately to prevent widening operations during the merge.
///
/// Since recursive function calls could lead to infinite loops during the merge operation,
/// each parameter ID is substituted at most once during the algorithm.
/// This can lead to unresolved parameter IDs still contained in the final result,
/// in some cases this can also happen without the presence of recursive function calls.
pub fn recursively_substitute_param_values(&self, value: &Data) -> Data {
let subs_list = &self.project.program.term.subs;
let mut already_handled_ids = HashSet::new();
let mut merged_absolute_value: Option<IntervalDomain> = value.get_absolute_value().cloned();
let mut merged_data = value.clone();
let mut has_stabilized = false;
while !has_stabilized {
has_stabilized = true;
let mut replacement_map: BTreeMap<AbstractIdentifier, Data> = BTreeMap::new();
for (id, offset) in merged_data.get_relative_values().clone() {
if !already_handled_ids.insert(id.clone())
|| !id.get_path_hints().is_empty()
|| !subs_list.contains_key(id.get_tid())
|| *id.get_location()
== AbstractLocation::Register(self.project.stack_pointer_register.clone())
{
// ID was already present in `already_handled_ids` or it is not a parameter ID
replacement_map.insert(
id.clone(),
Data::from_target(id, Bitvector::zero(offset.bytesize().into()).into()),
);
} else {
has_stabilized = false;
let (caller_absolute_value, caller_data) = self.substitute_param_values(&id);
replacement_map.insert(id, caller_data);
merged_absolute_value = match (
merged_absolute_value,
caller_absolute_value.map(|val| val + offset),
) {
(Some(val_left), Some(val_right)) => {
Some(val_left.signed_merge(&val_right))
}
(Some(val), None) | (None, Some(val)) => Some(val.clone()),
(None, None) => None,
};
}
}
merged_data.replace_all_ids(&replacement_map);
}
merged_data.set_absolute_value(merged_absolute_value);
merged_data
}
/// Replace all parameter IDs in the given value.
/// The replaced values are those of the parameters at the given call,
/// i.e. the replacement is context-sensitive to a specific call.
fn substitute_param_values_context_sensitive(
&self,
value: &Data,
call_tid: &Tid,
current_fn_tid: &Tid,
) -> Data {
let mut replacement_map: BTreeMap<AbstractIdentifier, Data> = BTreeMap::new();
for (id, offset) in value.get_relative_values().clone() {
if id.get_tid() == current_fn_tid && id.get_path_hints().is_empty() {
// Possible function param ID
let param_id_at_callsite =
AbstractIdentifier::new(call_tid.clone(), id.get_location().clone());
if let Some(value_at_callsite) =
self.param_replacement_map.get(&param_id_at_callsite)
{
replacement_map.insert(id, value_at_callsite.clone());
} // Else it is a pointer to the current stack frame, which is invalid in the caller.
} else {
// Not a function param.
replacement_map.insert(
id.clone(),
Data::from_target(id, Bitvector::zero(offset.bytesize().into()).into()),
);
}
}
let mut result = value.clone();
result.replace_all_ids(&replacement_map);
result
}
/// Replace all parameter IDs in the given value using the given path hints
/// to replace them with the corresponding values in the calling context of the path hints.
pub fn recursively_substitute_param_values_context_sensitive(
&self,
value: &Data,
current_fn_tid: &Tid,
path_hints: &[Tid],
) -> Data {
let mut substituted_value = value.clone();
let mut current_fn_tid = current_fn_tid.clone();
if path_hints.is_empty() {
return substituted_value;
}
for call_tid in path_hints {
substituted_value = self.substitute_param_values_context_sensitive(
&substituted_value,
call_tid,
&current_fn_tid,
);
// Now set the new current_fn_tid to the TID of the caller function.
current_fn_tid = self.call_to_caller_fn_map[call_tid].clone();
}
substituted_value
}
/// Log a debug log message in the log collector of `self`.
fn log_debug(&self, tid: &Tid, msg: impl ToString) {
let log_msg = LogMessage {
......@@ -319,70 +183,6 @@ fn compute_callee_to_call_sites_map(project: &Project) -> HashMap<Tid, HashSet<T
callee_to_call_sites_map
}
/// Compute a mapping that maps each parameter of each call (given by an abstract identifier representing the parameter value at the callsite).
/// to its value at the callsite according to the pointer inference analysis.
fn compute_param_replacement_map(
analysis_results: &AnalysisResults,
) -> HashMap<AbstractIdentifier, Data> {
let mut param_replacement_map = HashMap::new();
for sub in analysis_results.project.program.term.subs.values() {
for blk in &sub.term.blocks {
for jmp in &blk.term.jmps {
match &jmp.term {
Jmp::Call { target, .. } => add_param_replacements_for_call(
analysis_results,
jmp,
target,
&mut param_replacement_map,
),
Jmp::CallInd { .. } => (), // FIXME: indirect call targets not yet supported.
_ => (),
}
}
}
}
param_replacement_map
}
/// For each parameter of the given call term map the abstract identifier representing the value of the parameter at the callsite
/// to its concrete value (in the context of the caller).
/// Add the mappings to the given `replacement_map`.
fn add_param_replacements_for_call(
analysis_results: &AnalysisResults,
call: &Term<Jmp>,
callee_tid: &Tid,
replacement_map: &mut HashMap<AbstractIdentifier, Data>,
) {
let vsa_results = analysis_results.pointer_inference.unwrap();
if let Some(fn_sig) = analysis_results
.function_signatures
.unwrap()
.get(callee_tid)
{
for param_arg in fn_sig.parameters.keys() {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
replacement_map.insert(param_id, param_value);
}
}
} else if let Some(extern_symbol) = analysis_results
.project
.program
.term
.extern_symbols
.get(callee_tid)
{
for param_arg in &extern_symbol.parameters {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
replacement_map.insert(param_id, param_value);
}
}
}
}
/// Compute a map mapping the TIDs of malloc-like calls (e.g. malloc, realloc, calloc)
/// to the size value of the allocated object according to the pointer inference analysis.
fn compute_size_values_of_malloc_calls(analysis_results: &AnalysisResults) -> HashMap<Tid, Data> {
......
use super::Context;
use crate::analysis::pointer_inference::Data;
use crate::analysis::vsa_results::VsaResult;
use crate::intermediate_representation::*;
use crate::{abstract_domain::*, AnalysisResults};
use std::collections::{BTreeMap, HashMap, HashSet};
impl<'a> Context<'a> {
/// Merge all possible caller values for the given parameter ID.
/// The absolute values also merged separately to prevent widening operations during the merge.
fn substitute_param_values(
&self,
param_id: &AbstractIdentifier,
) -> (Option<IntervalDomain>, Data) {
let mut merged_absolute_value: Option<IntervalDomain> = None;
let mut merged_data: Option<Data> = None;
let function_tid = param_id.get_tid();
if let Some(callsites) = self.callee_to_callsites_map.get(function_tid) {
for callsite in callsites {
let param_id_at_callsite =
AbstractIdentifier::new(callsite.clone(), param_id.get_location().clone());
let value_at_callsite = match self.param_replacement_map.get(&param_id_at_callsite)
{
Some(val) => val,
None => continue,
};
merged_absolute_value = match (
&merged_absolute_value,
value_at_callsite.get_absolute_value(),
) {
(Some(val_left), Some(val_right)) => Some(val_left.signed_merge(val_right)),
(Some(val), None) | (None, Some(val)) => Some(val.clone()),
(None, None) => None,
};
merged_data = merged_data
.map(|val| val.merge(value_at_callsite))
.or_else(|| Some(value_at_callsite.clone()));
}
}
let merged_data = merged_data.unwrap_or_else(|| Data::new_top(param_id.bytesize()));
(merged_absolute_value, merged_data)
}
/// Recursively merge and insert all possible caller vallues for all parameter IDs contained in the given value.
/// Absolute values are merged separately to prevent widening operations during the merge.
///
/// Since recursive function calls could lead to infinite loops during the merge operation,
/// each parameter ID is substituted at most once during the algorithm.
/// This can lead to unresolved parameter IDs still contained in the final result,
/// in some cases this can also happen without the presence of recursive function calls.
pub fn recursively_substitute_param_values(&self, value: &Data) -> Data {
let subs_list = &self.project.program.term.subs;
let mut already_handled_ids = HashSet::new();
let mut merged_absolute_value: Option<IntervalDomain> = value.get_absolute_value().cloned();
let mut merged_data = value.clone();
let mut has_stabilized = false;
while !has_stabilized {
has_stabilized = true;
let mut replacement_map: BTreeMap<AbstractIdentifier, Data> = BTreeMap::new();
for (id, offset) in merged_data.get_relative_values().clone() {
if !already_handled_ids.insert(id.clone())
|| !id.get_path_hints().is_empty()
|| !subs_list.contains_key(id.get_tid())
|| *id.get_location()
== AbstractLocation::Register(self.project.stack_pointer_register.clone())
{
// ID was already present in `already_handled_ids` or it is not a parameter ID
replacement_map.insert(
id.clone(),
Data::from_target(id, Bitvector::zero(offset.bytesize().into()).into()),
);
} else {
has_stabilized = false;
let (caller_absolute_value, caller_data) = self.substitute_param_values(&id);
replacement_map.insert(id, caller_data);
merged_absolute_value = match (
merged_absolute_value,
caller_absolute_value.map(|val| val + offset),
) {
(Some(val_left), Some(val_right)) => {
Some(val_left.signed_merge(&val_right))
}
(Some(val), None) | (None, Some(val)) => Some(val.clone()),
(None, None) => None,
};
}
}
merged_data.replace_all_ids(&replacement_map);
}
merged_data.set_absolute_value(merged_absolute_value);
merged_data
}
/// Replace all parameter IDs in the given value.
/// The replaced values are those of the parameters at the given call,
/// i.e. the replacement is context-sensitive to a specific call.
fn substitute_param_values_context_sensitive(
&self,
value: &Data,
call_tid: &Tid,
current_fn_tid: &Tid,
) -> Data {
let mut replacement_map: BTreeMap<AbstractIdentifier, Data> = BTreeMap::new();
for (id, offset) in value.get_relative_values().clone() {
if id.get_tid() == current_fn_tid && id.get_path_hints().is_empty() {
// Possible function param ID
let param_id_at_callsite =
AbstractIdentifier::new(call_tid.clone(), id.get_location().clone());
if let Some(value_at_callsite) =
self.param_replacement_map.get(&param_id_at_callsite)
{
replacement_map.insert(id, value_at_callsite.clone());
} // Else it is a pointer to the current stack frame, which is invalid in the caller.
} else {
// Not a function param.
replacement_map.insert(
id.clone(),
Data::from_target(id, Bitvector::zero(offset.bytesize().into()).into()),
);
}
}
let mut result = value.clone();
result.replace_all_ids(&replacement_map);
result
}
/// Replace all parameter IDs in the given value using the given path hints
/// to replace them with the corresponding values in the calling context of the path hints.
pub fn recursively_substitute_param_values_context_sensitive(
&self,
value: &Data,
current_fn_tid: &Tid,
path_hints: &[Tid],
) -> Data {
let mut substituted_value = value.clone();
let mut current_fn_tid = current_fn_tid.clone();
if path_hints.is_empty() {
return substituted_value;
}
for call_tid in path_hints {
substituted_value = self.substitute_param_values_context_sensitive(
&substituted_value,
call_tid,
&current_fn_tid,
);
// Now set the new current_fn_tid to the TID of the caller function.
current_fn_tid = self.call_to_caller_fn_map[call_tid].clone();
}
substituted_value
}
}
/// Compute a mapping that maps each parameter of each call (given by an abstract identifier representing the parameter value at the callsite).
/// to its value at the callsite according to the pointer inference analysis.
pub fn compute_param_replacement_map(
analysis_results: &AnalysisResults,
) -> HashMap<AbstractIdentifier, Data> {
let mut param_replacement_map = HashMap::new();
for sub in analysis_results.project.program.term.subs.values() {
for blk in &sub.term.blocks {
for jmp in &blk.term.jmps {
match &jmp.term {
Jmp::Call { target, .. } => add_param_replacements_for_call(
analysis_results,
jmp,
target,
&mut param_replacement_map,
),
Jmp::CallInd { .. } => (), // FIXME: indirect call targets not yet supported.
_ => (),
}
}
}
}
param_replacement_map
}
/// For each parameter of the given call term map the abstract identifier representing the value of the parameter at the callsite
/// to its concrete value (in the context of the caller).
/// Add the mappings to the given `replacement_map`.
fn add_param_replacements_for_call(
analysis_results: &AnalysisResults,
call: &Term<Jmp>,
callee_tid: &Tid,
replacement_map: &mut HashMap<AbstractIdentifier, Data>,
) {
let vsa_results = analysis_results.pointer_inference.unwrap();
if let Some(fn_sig) = analysis_results
.function_signatures
.unwrap()
.get(callee_tid)
{
for param_arg in fn_sig.parameters.keys() {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
replacement_map.insert(param_id, param_value);
}
}
} else if let Some(extern_symbol) = analysis_results
.project
.program
.term
.extern_symbols
.get(callee_tid)
{
for param_arg in &extern_symbol.parameters {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
replacement_map.insert(param_id, param_value);
}
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
#[test]
fn test_substitute_param_values_context_sensitive() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value =
Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(41));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
let call_to_caller_map = HashMap::from([
(Tid::new("callsite_id"), Tid::new("main")),
(
Tid::new("recursive_callsite_id"),
Tid::new("somer_other_fn_id"),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context.call_to_caller_fn_map = call_to_caller_map;
// non-recursive substitution
let result = context.substitute_param_values_context_sensitive(
&Data::from_target(param_id.clone(), Bitvector::from_i64(5).into()),
&Tid::new("callsite_id"),
&Tid::new("func"),
);
assert_eq!(
result,
Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(6).into())
);
// recursive substitution
let result = context.recursively_substitute_param_values_context_sensitive(
&Data::from_target(param_id, Bitvector::from_i64(5).into()),
&Tid::new("func"),
&[Tid::new("callsite_id"), Tid::new("recursive_callsite_id")],
);
println!("{:#}", result.to_json_compact());
assert_eq!(result, Bitvector::from_i64(47).into());
}
#[test]
fn test_substitute_param_values() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value =
Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(39));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
// non-recursive substitution
let (result_absolute, result) = context.substitute_param_values(&param_id);
assert!(result_absolute.is_none());
assert_eq!(result, param_value);
// recursive substitution
let result = context.recursively_substitute_param_values(&Data::from_target(
param_id,
Bitvector::from_i64(5).into(),
));
assert_eq!(result, Bitvector::from_i64(45).into());
}
}
......@@ -48,91 +48,3 @@ fn test_compute_size_value_of_malloc_like_call() {
)
.is_none());
}
#[test]
fn test_substitute_param_values_context_sensitive() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value = Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(41));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
let call_to_caller_map = HashMap::from([
(Tid::new("callsite_id"), Tid::new("main")),
(
Tid::new("recursive_callsite_id"),
Tid::new("somer_other_fn_id"),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context.call_to_caller_fn_map = call_to_caller_map;
// non-recursive substitution
let result = context.substitute_param_values_context_sensitive(
&Data::from_target(param_id.clone(), Bitvector::from_i64(5).into()),
&Tid::new("callsite_id"),
&Tid::new("func"),
);
assert_eq!(
result,
Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(6).into())
);
// recursive substitution
let result = context.recursively_substitute_param_values_context_sensitive(
&Data::from_target(param_id, Bitvector::from_i64(5).into()),
&Tid::new("func"),
&[Tid::new("callsite_id"), Tid::new("recursive_callsite_id")],
);
println!("{:#}", result.to_json_compact());
assert_eq!(result, Bitvector::from_i64(47).into());
}
#[test]
fn test_substitute_param_values() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value = Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(39));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
// non-recursive substitution
let (result_absolute, result) = context.substitute_param_values(&param_id);
assert!(result_absolute.is_none());
assert_eq!(result, param_value);
// recursive substitution
let result = context.recursively_substitute_param_values(&Data::from_target(
param_id,
Bitvector::from_i64(5).into(),
));
assert_eq!(result, Bitvector::from_i64(45).into());
}
use super::context::BoundsMetadata;
use super::Context;
use super::Data;
use crate::abstract_domain::*;
......@@ -77,6 +78,16 @@ impl State {
lower_offset,
lower_bound,
));
if let (
Some(BoundsMetadata {
source: Some(source),
..
}),
_,
) = context.compute_bounds_of_id(id, &self.stack_id)
{
out_of_bounds_access_warnings.push(format!("The object bound is based on the possible source value {:#} for the object ID.", source.to_json_compact()));
}
// Replace the bound with `Top` to prevent duplicate CWE warnings with the same root cause.
self.object_lower_bounds
.insert(id.clone(), BitvectorDomain::new_top(address.bytesize()));
......@@ -84,11 +95,22 @@ impl State {
}
if let Ok(upper_bound) = self.object_upper_bounds.get(id).unwrap().try_to_offset() {
if upper_bound < upper_offset + (u64::from(value_size) as i64) {
out_of_bounds_access_warnings.push(format!("For the object ID {} access to the offset {} may be larger than the upper object bound of {}.",
out_of_bounds_access_warnings.push(format!("For the object ID {} access to the offset {} (size {}) may overflow the upper object bound of {}.",
id,
upper_offset + (u64::from(value_size) as i64),
upper_offset,
u64::from(value_size),
upper_bound,
));
if let (
_,
Some(BoundsMetadata {
source: Some(source),
..
}),
) = context.compute_bounds_of_id(id, &self.stack_id)
{
out_of_bounds_access_warnings.push(format!("The object bound is based on the possible source value {:#} for the object ID.", source.to_json_compact()));
}
// Replace the bound with `Top` to prevent duplicate CWE warnings with the same root cause.
self.object_upper_bounds
.insert(id.clone(), BitvectorDomain::new_top(address.bytesize()));
......@@ -107,107 +129,23 @@ impl State {
/// For bounds that could not be determined (e.g. because the source for the object ID is unknown)
/// we insert `Top` bounds into the bounds maps.
fn compute_bounds_of_id(&mut self, object_id: &AbstractIdentifier, context: &Context) {
if context
.malloc_tid_to_object_size_map
.contains_key(object_id.get_tid())
{
let object_size = context.compute_size_of_heap_object(object_id);
self.object_lower_bounds.insert(
object_id.clone(),
Bitvector::zero(object_id.bytesize().into()).into(),
);
self.object_upper_bounds
.insert(object_id.clone(), object_size);
} else if *object_id == self.stack_id {
panic!("Current stack frame bounds not set.");
} else if object_id.get_tid() == self.stack_id.get_tid()
&& object_id.get_path_hints().is_empty()
{
// Handle parameter IDs
self.compute_bounds_of_param_id(object_id, context);
} else {
// The type of object is unknown, thus the size restrictions are also unknown.
self.object_lower_bounds.insert(
object_id.clone(),
BitvectorDomain::new_top(object_id.bytesize()),
);
self.object_upper_bounds.insert(
object_id.clone(),
BitvectorDomain::new_top(object_id.bytesize()),
);
}
}
/// Compute the bounds of the memory object associated with the given parameter ID
/// and add the results to the known object bounds of `self`.
///
/// Since the memory object associated to a parameter may not be unique
/// the bounds are only approximated from those objects where exact bounds could be determined.
/// If different objects were found the bounds are approximated by the strictest bounds that were found.
fn compute_bounds_of_param_id(
&mut self,
param_object_id: &AbstractIdentifier,
context: &Context,
) {
let object_data = context.recursively_substitute_param_values(&DataDomain::from_target(
param_object_id.clone(),
Bitvector::zero(param_object_id.bytesize().into()).into(),
));
let mut lower_bound = None;
let mut upper_bound = None;
for (id, offset) in object_data.get_relative_values() {
// Right now we ignore cases where we do not know the exact offset into the object.
let offset = match offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => continue,
};
if context
.malloc_tid_to_object_size_map
.contains_key(id.get_tid())
{
let object_size = context.compute_size_of_heap_object(id);
lower_bound = lower_bound
.map(|old_bound| std::cmp::max(old_bound, -offset))
.or(Some(-offset));
if let Ok(concrete_object_size) = object_size.try_to_offset() {
upper_bound = upper_bound
.map(|old_bound| std::cmp::min(old_bound, concrete_object_size - offset))
.or(Some(concrete_object_size - offset));
}
} else if context.is_stack_frame_id(id) {
let stack_frame_upper_bound = context
.function_signatures
.get(id.get_tid())
.unwrap()
.get_stack_params_total_size();
upper_bound = upper_bound
.map(|old_bound| std::cmp::min(old_bound, stack_frame_upper_bound - offset))
.or(Some(stack_frame_upper_bound - offset));
// We do not set a lower bound since we do not know the concrete call site for stack pointers,
// which we would need to determine a correct lower bound.
}
// FIXME: Cases not handled here include unresolved parameter IDs, unknown IDs and global pointers.
// For the first two we do not have any size information.
// For global pointers we need some kind of pre-analysis so that we do not have to assume
// that the pointer may address the complete range of global data addresses.
}
let (lower_bound, upper_bound) = context.compute_bounds_of_id(object_id, &self.stack_id);
let lower_bound = match lower_bound {
Some(bound) => Bitvector::from_i64(bound)
.into_resize_signed(param_object_id.bytesize())
Some(bound_metadata) => Bitvector::from_i64(bound_metadata.resulting_bound)
.into_resize_signed(object_id.bytesize())
.into(),
None => BitvectorDomain::new_top(param_object_id.bytesize()),
None => BitvectorDomain::new_top(object_id.bytesize()),
};
let upper_bound = match upper_bound {
Some(bound) => Bitvector::from_i64(bound)
.into_resize_signed(param_object_id.bytesize())
Some(bound_metadata) => Bitvector::from_i64(bound_metadata.resulting_bound)
.into_resize_signed(object_id.bytesize())
.into(),
None => BitvectorDomain::new_top(param_object_id.bytesize()),
None => BitvectorDomain::new_top(object_id.bytesize()),
};
self.object_lower_bounds
.insert(param_object_id.clone(), lower_bound);
.insert(object_id.clone(), lower_bound);
self.object_upper_bounds
.insert(param_object_id.clone(), upper_bound);
.insert(object_id.clone(), upper_bound);
}
}
......@@ -260,7 +198,6 @@ impl State {
pub mod tests {
use super::*;
use crate::intermediate_representation::Variable;
use std::collections::{HashMap, HashSet};
#[test]
fn test_new() {
......@@ -340,60 +277,4 @@ pub mod tests {
Bitvector::from_i64(42).into()
);
}
#[test]
fn test_compute_bounds_of_param_id() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let param_id_2 = AbstractIdentifier::mock("func", "RSI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let callsite_id_2 = AbstractIdentifier::mock("callsite_id", "RSI", 8);
let malloc_call_id = AbstractIdentifier::mock("malloc_call", "RAX", 8);
let main_stack_id = AbstractIdentifier::mock("main", "RSP", 8);
let param_value = Data::from_target(malloc_call_id.clone(), Bitvector::from_i64(2).into());
let param_value_2 =
Data::from_target(main_stack_id.clone(), Bitvector::from_i64(-10).into());
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(callsite_id_2, param_value_2.clone()),
]);
let callee_to_callsites_map =
HashMap::from([(Tid::new("func"), HashSet::from([Tid::new("callsite_id")]))]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context
.malloc_tid_to_object_size_map
.insert(Tid::new("malloc_call"), Data::from(Bitvector::from_i64(42)));
context.call_to_caller_fn_map = HashMap::from([
(Tid::new("malloc_call"), Tid::new("main")),
(Tid::new("callsite_id"), Tid::new("main")),
]);
let mut state = State::new(
&Tid::new("func"),
&FunctionSignature::mock_x64(),
context.project,
);
// Test bound computation if the param gets resolved to a heap object
state.compute_bounds_of_param_id(&param_id, &context);
assert_eq!(state.object_lower_bounds.len(), 2);
assert_eq!(
state.object_lower_bounds[&AbstractIdentifier::mock("func", "RDI", 8)],
Bitvector::from_i64(-2).into()
);
assert_eq!(
state.object_upper_bounds[&AbstractIdentifier::mock("func", "RDI", 8)],
Bitvector::from_i64(40).into()
);
// Test bound computation if the param gets resolved to a caller stack frame
state.compute_bounds_of_param_id(&param_id_2, &context);
assert_eq!(
state.object_lower_bounds[&AbstractIdentifier::mock("func", "RSI", 8)],
BitvectorDomain::new_top(ByteSize::new(8))
);
assert_eq!(
state.object_upper_bounds[&AbstractIdentifier::mock("func", "RSI", 8)],
Bitvector::from_i64(10).into()
);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment