Unverified Commit 26f9844d by Enkelmann Committed by GitHub

implement CWE-119 check (#315)

parent d77159c5
......@@ -114,9 +114,9 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53"
dependencies = [
"cfg-if",
"crossbeam-utils",
......@@ -124,9 +124,9 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38"
dependencies = [
"cfg-if",
"lazy_static",
......@@ -222,9 +222,9 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "fixedbitset"
version = "0.4.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e"
checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e"
[[package]]
name = "fnv"
......@@ -356,18 +356,18 @@ checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memoffset"
version = "0.6.4"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "nix"
version = "0.23.0"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f305c2c2e4c39a82f7bf0bf65fb557f9070ce06781d4f2454295cc34b1c43188"
checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6"
dependencies = [
"bitflags",
"cc",
......
......@@ -178,7 +178,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
let modules_depending_on_string_abstraction = BTreeSet::from_iter(["CWE78"]);
let modules_depending_on_pointer_inference =
BTreeSet::from_iter(["CWE134", "CWE476", "Memory"]);
BTreeSet::from_iter(["CWE119", "CWE134", "CWE476", "Memory"]);
let string_abstraction_needed = modules
.iter()
......
......@@ -119,10 +119,15 @@ impl AbstractIdentifier {
&self.time
}
/// Get the location component of the abstract ID
/// Get the location component of the abstract ID.
pub fn get_location(&self) -> &AbstractLocation {
&self.location
}
/// Get the bytesize of the value represented by the abstract ID.
pub fn bytesize(&self) -> ByteSize {
self.location.bytesize()
}
}
impl std::fmt::Display for AbstractIdentifier {
......@@ -187,6 +192,14 @@ impl AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::Pointer(_pointer_var, mem_location) => mem_location.bytesize(),
}
}
}
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
......@@ -212,6 +225,16 @@ pub enum AbstractMemoryLocation {
},
}
impl AbstractMemoryLocation {
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
......@@ -222,9 +245,23 @@ impl std::fmt::Display for AbstractMemoryLocation {
}
#[cfg(test)]
mod tests {
pub mod tests {
use super::*;
impl AbstractIdentifier {
/// Mock an abstract identifier with the given TID name and pointing to the value in the given register name.
pub fn mock(
tid: impl ToString,
register: impl ToString,
size_in_bytes: u64,
) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(tid.to_string()),
AbstractLocation::from_var(&Variable::mock(register, size_in_bytes)).unwrap(),
)
}
}
#[test]
fn test_constraint_enforcements() {
// Test that no temporary registers are allowed as abstract locations.
......@@ -243,4 +280,12 @@ mod tests {
let id = id.with_path_hint(Tid::new("second_hint")).unwrap();
assert!(id.with_path_hint(Tid::new("first_hint")).is_err());
}
#[test]
fn test_bytesize() {
let location =
AbstractLocation::from_stack_position(&Variable::mock("RSP", 8), 10, ByteSize::new(4));
let id = AbstractIdentifier::new(Tid::new("id"), location);
assert_eq!(id.bytesize(), ByteSize::new(4));
}
}
......@@ -143,7 +143,7 @@ fn test_eval() {
#[test]
fn test_extern_symbol_handling() {
let mut state = State::mock_arm32();
let extern_symbol = ExternSymbol::mock_arm32();
let extern_symbol = ExternSymbol::mock_arm32("mock_symbol");
let cconv = CallingConvention::mock_arm32();
let call = Term {
tid: Tid::new("call_tid"),
......
......@@ -494,6 +494,22 @@ pub fn get_program_cfg(program: &Term<Program>, extern_subs: HashSet<Tid>) -> Gr
builder.build()
}
/// Returns a map from function TIDs to the node index of the `BlkStart` node of the first block in the function.
pub fn get_entry_nodes_of_subs(graph: &Graph) -> HashMap<Tid, NodeIndex> {
let mut sub_to_entry_node_map: HashMap<Tid, NodeIndex> = HashMap::new();
for node in graph.node_indices() {
if let Node::BlkStart(block, sub) = graph[node] {
if let Some(entry_block) = sub.term.blocks.get(0) {
if block.tid == entry_block.tid {
sub_to_entry_node_map.insert(sub.tid.clone(), node);
}
}
}
}
sub_to_entry_node_map
}
#[cfg(test)]
mod tests {
use super::*;
......
......@@ -10,3 +10,4 @@ pub mod graph;
pub mod interprocedural_fixpoint_generic;
pub mod pointer_inference;
pub mod string_abstraction;
pub mod vsa_results;
......@@ -29,20 +29,22 @@
use super::fixpoint::Computation;
use super::forward_interprocedural_fixpoint::GeneralizedContext;
use super::interprocedural_fixpoint_generic::NodeValue;
use crate::abstract_domain::{DataDomain, IntervalDomain};
use crate::abstract_domain::{DataDomain, IntervalDomain, SizedDomain};
use crate::analysis::forward_interprocedural_fixpoint::Context as _;
use crate::analysis::graph::{Graph, Node};
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::*;
use petgraph::graph::NodeIndex;
use petgraph::visit::IntoNodeReferences;
use std::collections::{BTreeMap, HashMap};
use std::collections::HashMap;
mod context;
pub mod object;
mod object_list;
mod state;
mod statistics;
mod vsa_result_impl;
use context::Context;
pub use state::State;
......@@ -77,11 +79,23 @@ pub struct Config {
}
/// A wrapper struct for the pointer inference computation object.
/// Also contains different analysis results computed through the fixpoint computation including generated log messages.
pub struct PointerInference<'a> {
/// The pointer inference fixpoint computation object.
computation: Computation<GeneralizedContext<'a, Context<'a>>>,
/// A sender channel that can be used to collect logs in the corresponding log thread.
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
/// The log messages and CWE warnings that have been generated during the pointer inference analysis.
pub collected_logs: (Vec<LogMessage>, Vec<CweWarning>),
/// Maps the TIDs of assignment, load or store [`Def`] instructions to the computed value data.
/// The map will be filled after the fixpoint computation finished.
values_at_defs: HashMap<Tid, Data>,
/// Maps the TIDs of load or store [`Def`] instructions to the computed address data.
/// The map will be filled after the fixpoint computation finished.
addresses_at_defs: HashMap<Tid, Data>,
/// Maps certain TIDs like the TIDs of [`Jmp`] instructions to the pointer inference state at that TID.
/// The map will be filled after the fixpoint computation finished.
states_at_tids: HashMap<Tid, State>,
}
impl<'a> PointerInference<'a> {
......@@ -95,27 +109,8 @@ impl<'a> PointerInference<'a> {
let context = Context::new(analysis_results, config, log_sender.clone());
let project = analysis_results.project;
let function_signatures = analysis_results.function_signatures.unwrap();
let sub_to_entry_node_map = crate::analysis::graph::get_entry_nodes_of_subs(context.graph);
let mut sub_to_entry_blocks_map = HashMap::new();
for (sub_tid, sub) in project.program.term.subs.iter() {
if let Some(entry_block) = sub.term.blocks.get(0) {
sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
}
let mut tid_to_graph_indices_map = HashMap::new();
for node in context.graph.node_indices() {
if let super::graph::Node::BlkStart(block, sub) = context.graph[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
let sub_to_entry_node_map: HashMap<Tid, NodeIndex> = sub_to_entry_blocks_map
.into_iter()
.filter_map(|(sub_tid, block_tid)| {
tid_to_graph_indices_map
.get(&(block_tid, sub_tid.clone()))
.map(|start_node_index| (sub_tid.clone(), *start_node_index))
})
.collect();
let mut fixpoint_computation =
super::forward_interprocedural_fixpoint::create_computation_with_alternate_worklist_order(context, None);
if print_stats {
......@@ -147,6 +142,9 @@ impl<'a> PointerInference<'a> {
computation: fixpoint_computation,
log_collector: log_sender,
collected_logs: (Vec::new(), Vec::new()),
values_at_defs: HashMap::new(),
addresses_at_defs: HashMap::new(),
states_at_tids: HashMap::new(),
}
}
......@@ -247,11 +245,68 @@ impl<'a> PointerInference<'a> {
));
}
/// Send an info log message to the log collector.
fn log_info(&self, msg: impl Into<String>) {
let log_msg = LogMessage::new_info(msg.into()).source("Pointer Inference");
let _ = self.log_collector.send(LogThreadMsg::Log(log_msg));
}
/// Fill the various result maps of `self` that are needed for the [`VsaResult`](crate::analysis::vsa_results::VsaResult) trait implementation.
fn fill_vsa_result_maps(&mut self) {
let context = self.computation.get_context().get_context();
let graph = self.computation.get_graph();
for node in graph.node_indices() {
let node_state = match self.computation.get_node_value(node) {
Some(NodeValue::Value(value)) => value,
_ => continue,
};
match graph[node] {
Node::BlkStart(blk, _sub) => {
let mut state = node_state.clone();
for def in &blk.term.defs {
match &def.term {
Def::Assign { var: _, value } => {
self.values_at_defs
.insert(def.tid.clone(), state.eval(value));
}
Def::Load { var, address } => {
let loaded_value = state
.load_value(address, var.size, context.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(var.size));
self.values_at_defs.insert(def.tid.clone(), loaded_value);
self.addresses_at_defs
.insert(def.tid.clone(), state.eval(address));
}
Def::Store { address, value } => {
self.values_at_defs
.insert(def.tid.clone(), state.eval(value));
self.addresses_at_defs
.insert(def.tid.clone(), state.eval(address));
}
}
state = match context.update_def(&state, def) {
Some(new_state) => new_state,
None => break,
}
}
}
Node::BlkEnd(blk, _sub) => {
for jmp in &blk.term.jmps {
self.states_at_tids
.insert(jmp.tid.clone(), node_state.clone());
}
}
Node::CallSource { .. } | Node::CallReturn { .. } => (),
}
}
}
/// Get the state of the fixpoint computation at the block end node before the given jump instruction.
/// This function only yields results after the fixpoint has been computed.
pub fn get_state_at_jmp_tid(&self, jmp_tid: &Tid) -> Option<&State> {
self.states_at_tids.get(jmp_tid)
}
/// Print information on dead ends in the control flow graph for debugging purposes.
/// Ignore returns where there is no known caller stack id.
#[allow(dead_code)]
......@@ -343,7 +398,7 @@ pub fn run<'a>(
print_debug: bool,
print_stats: bool,
) -> PointerInference<'a> {
let logging_thread = LogThread::spawn(collect_all_logs);
let logging_thread = LogThread::spawn(LogThread::collect_and_deduplicate);
let mut computation = PointerInference::new(
analysis_results,
......@@ -353,6 +408,7 @@ pub fn run<'a>(
);
computation.compute(print_stats);
computation.fill_vsa_result_maps();
if print_debug {
computation.print_compact_json();
......@@ -363,47 +419,6 @@ pub fn run<'a>(
computation
}
/// This function is responsible for collecting logs and CWE warnings.
/// For warnings with the same origin address only the last one is kept.
/// This prevents duplicates but may suppress some log messages
/// in the rare case that several different log messages with the same origin address are generated.
fn collect_all_logs(
receiver: crossbeam_channel::Receiver<LogThreadMsg>,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let mut logs_with_address = BTreeMap::new();
let mut general_logs = Vec::new();
let mut collected_cwes = BTreeMap::new();
while let Ok(log_thread_msg) = receiver.recv() {
match log_thread_msg {
LogThreadMsg::Log(log_message) => {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
LogThreadMsg::Cwe(cwe_warning) => match &cwe_warning.addresses[..] {
[] => panic!("Unexpected CWE warning without origin address"),
[address, ..] => {
collected_cwes.insert(address.clone(), cwe_warning);
}
},
LogThreadMsg::Terminate => break,
}
}
let logs = logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.collect();
let cwes = collected_cwes
.into_iter()
.map(|(_key, value)| value)
.collect();
(logs, cwes)
}
#[cfg(test)]
mod tests {
use super::*;
......@@ -424,5 +439,17 @@ mod tests {
self.computation
.set_node_value(node_index, NodeValue::Value(node_value));
}
pub fn get_mut_values_at_defs(&mut self) -> &mut HashMap<Tid, Data> {
&mut self.values_at_defs
}
pub fn get_mut_addresses_at_defs(&mut self) -> &mut HashMap<Tid, Data> {
&mut self.addresses_at_defs
}
pub fn get_mut_states_at_tids(&mut self) -> &mut HashMap<Tid, State> {
&mut self.states_at_tids
}
}
}
use super::*;
use crate::analysis::vsa_results::VsaResult;
/// Implementation of the [`VsaResult`] trait for providing other analyses with an easy-to-use interface
/// to use the value set and points-to analysis results of the pointer inference.
impl<'a> VsaResult for PointerInference<'a> {
type ValueDomain = Data;
/// Return the value of the address at the given read or store instruction.
fn eval_address_at_def(&self, def_tid: &Tid) -> Option<Data> {
self.addresses_at_defs.get(def_tid).cloned()
}
/// Return the assigned value for store or assignment instructions or the value read for load instructions.
fn eval_value_at_def(&self, def_tid: &Tid) -> Option<Data> {
self.values_at_defs.get(def_tid).cloned()
}
/// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
Some(state.eval(expression))
}
/// Evaluate the value of the given parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, parameter: &Arg) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
let context = self.computation.get_context().get_context();
state
.eval_parameter_arg(parameter, context.runtime_memory_image)
.ok()
}
}
//! This module provides the [`VsaResult`] trait
//! which defines an interface for the results of analyses similar to a value set analysis.
use crate::intermediate_representation::{Arg, Expression};
use crate::prelude::*;
/// A trait providing an interface for accessing the results of a value set analysis.
/// Note that the returned values may be any type of information associated with values at certain program points,
/// i.e. the trait can also be used for other analyses than just value set analyses.
///
/// Every returned value is wrapped into an `Option<..>`.
/// This should mainly be used to indicate that the analysis did not compute a value at a certain point,
/// e.g. because the code point was deemed to be dead code.
/// If the analysis wants to indicate that no specific information is known about a certain value
/// then this should be encoded in the `ValueDomain` itself instead of returning `None`.
pub trait VsaResult {
/// The type of the returned values.
/// Usually this should be an [`AbstractDomain`](crate::abstract_domain::AbstractDomain),
/// although this is not strictly required.
type ValueDomain;
/// Return the value stored for write instructions, the value read for read instructions or the value assigned for assignments.
fn eval_value_at_def(&self, def_tid: &Tid) -> Option<Self::ValueDomain>;
/// Return the value of the address where something is read or written for read or store instructions.
fn eval_address_at_def(&self, def_tid: &Tid) -> Option<Self::ValueDomain>;
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, param: &Arg) -> Option<Self::ValueDomain>;
/// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Self::ValueDomain>;
}
......@@ -5,6 +5,7 @@
//! but directly incorporated into the [`pointer_inference`](crate::analysis::pointer_inference) module.
//! See there for detailed information about this check.
pub mod cwe_119;
pub mod cwe_134;
pub mod cwe_190;
pub mod cwe_215;
......
use crate::abstract_domain::*;
use crate::analysis::function_signature::FunctionSignature;
use crate::analysis::graph::Graph;
use crate::analysis::pointer_inference::{Data, PointerInference};
use crate::intermediate_representation::*;
use crate::utils::log::{CweWarning, LogMessage, LogThreadMsg};
use crate::{analysis::vsa_results::VsaResult, prelude::*};
use std::collections::{BTreeMap, HashMap, HashSet};
use super::state::State;
/// Trait implementations for the [`Context`] struct,
/// especially the implementation of the [forward interprocedural fixpoint context](`crate::analysis::forward_interprocedural_fixpoint::Context`) trait.
mod trait_impls;
/// The context struct for the analysis.
pub struct Context<'a> {
/// A pointer to the project struct.
pub project: &'a Project,
/// A pointer to the control flow graph.
pub graph: &'a Graph<'a>,
/// A pointer to the results of the pointer inference analysis.
pub pointer_inference: &'a PointerInference<'a>,
/// A pointer to the computed function signatures for all internal functions.
pub function_signatures: &'a BTreeMap<Tid, FunctionSignature>,
/// A map mapping the TID of a function to the set of all known callsites of that function.
pub callee_to_callsites_map: HashMap<Tid, HashSet<Tid>>,
/// A map that maps abstract identifiers representing the values of parameters at callsites
/// to the corresponding value (in the context of the caller) according to the pointer inference analysis.
pub param_replacement_map: HashMap<AbstractIdentifier, Data>,
/// A map that maps the TIDs of calls to allocatingfunctions (like malloc, realloc and calloc)
/// to the value representing the size of the allocated memory object according to the pointer inference analysis.
pub malloc_tid_to_object_size_map: HashMap<Tid, Data>,
/// A map that maps the TIDs of jump instructions to the function TID of the caller.
pub call_to_caller_fn_map: HashMap<Tid, Tid>,
/// A sender channel that can be used to collect logs in the corresponding logging thread.
pub log_collector: crossbeam_channel::Sender<LogThreadMsg>,
}
impl<'a> Context<'a> {
/// Create a new context object.
pub fn new(
project: &'a Project,
graph: &'a Graph<'a>,
pointer_inference: &'a PointerInference<'a>,
function_signatures: &'a BTreeMap<Tid, FunctionSignature>,
analysis_results: &AnalysisResults,
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
) -> Self {
Context {
project,
graph,
pointer_inference,
function_signatures,
callee_to_callsites_map: compute_callee_to_call_sites_map(project),
param_replacement_map: compute_param_replacement_map(analysis_results),
malloc_tid_to_object_size_map: compute_size_values_of_malloc_calls(analysis_results),
call_to_caller_fn_map: compute_call_to_caller_map(project),
log_collector,
}
}
/// Returns `true` if the given abstract ID is the identifier of a stack frame of some function.
pub fn is_stack_frame_id(&self, id: &AbstractIdentifier) -> bool {
self.project.program.term.subs.contains_key(id.get_tid())
&& *id
== AbstractIdentifier::from_var(
id.get_tid().clone(),
&self.project.stack_pointer_register,
)
}
/// Compute the size of a heap object created by a malloc-like function call.
///
/// Uses the path hints in the given `object_id` to concretize the size if possible.
/// If the size may be unknown but at least one possible absolute value for the size is found,
/// then the absolute value is used and unknown origins of the size value are ignored.
/// If more than one possible absolute value for the size is found then the minimum value for the size is returned.
pub fn compute_size_of_heap_object(&self, object_id: &AbstractIdentifier) -> BitvectorDomain {
if let Some(object_size) = self.malloc_tid_to_object_size_map.get(object_id.get_tid()) {
let fn_tid_at_malloc_call = self.call_to_caller_fn_map[object_id.get_tid()].clone();
let object_size = self.recursively_substitute_param_values_context_sensitive(
object_size,
&fn_tid_at_malloc_call,
object_id.get_path_hints(),
);
let object_size = self.recursively_substitute_param_values(&object_size);
let object_size = match object_size.get_absolute_value() {
Some(size) => {
if let Ok((lower_bound, upper_bound)) = size.try_to_offset_interval() {
// If the lower bound is a reasonable value we approximate the object size by the lower bound instead of the upper bound.
let bound = if lower_bound > 0 {
lower_bound
} else {
upper_bound
};
Bitvector::from_i64(bound)
.into_resize_signed(object_size.bytesize())
.into()
} else {
BitvectorDomain::new_top(object_size.bytesize())
}
}
None => BitvectorDomain::new_top(object_size.bytesize()),
};
object_size
} else {
BitvectorDomain::new_top(object_id.bytesize())
}
}
/// Merge all possible caller values for the given parameter ID.
/// The absolute values also merged separately to prevent widening operations during the merge.
fn substitute_param_values(
&self,
param_id: &AbstractIdentifier,
) -> (Option<IntervalDomain>, Data) {
let mut merged_absolute_value: Option<IntervalDomain> = None;
let mut merged_data: Option<Data> = None;
let function_tid = param_id.get_tid();
if let Some(callsites) = self.callee_to_callsites_map.get(function_tid) {
for callsite in callsites {
let param_id_at_callsite =
AbstractIdentifier::new(callsite.clone(), param_id.get_location().clone());
let value_at_callsite = match self.param_replacement_map.get(&param_id_at_callsite)
{
Some(val) => val,
None => continue,
};
merged_absolute_value = match (
&merged_absolute_value,
value_at_callsite.get_absolute_value(),
) {
(Some(val_left), Some(val_right)) => Some(val_left.signed_merge(val_right)),
(Some(val), None) | (None, Some(val)) => Some(val.clone()),
(None, None) => None,
};
merged_data = merged_data
.map(|val| val.merge(value_at_callsite))
.or_else(|| Some(value_at_callsite.clone()));
}
}
let merged_data = merged_data.unwrap_or_else(|| Data::new_top(param_id.bytesize()));
(merged_absolute_value, merged_data)
}
/// Recursively merge and insert all possible caller vallues for all parameter IDs contained in the given value.
/// Absolute values are merged separately to prevent widening operations during the merge.
///
/// Since recursive function calls could lead to infinite loops during the merge operation,
/// each parameter ID is substituted at most once during the algorithm.
/// This can lead to unresolved parameter IDs still contained in the final result,
/// in some cases this can also happen without the presence of recursive function calls.
pub fn recursively_substitute_param_values(&self, value: &Data) -> Data {
let subs_list = &self.project.program.term.subs;
let mut already_handled_ids = HashSet::new();
let mut merged_absolute_value: Option<IntervalDomain> = value.get_absolute_value().cloned();
let mut merged_data = value.clone();
let mut has_stabilized = false;
while !has_stabilized {
has_stabilized = true;
let mut replacement_map: BTreeMap<AbstractIdentifier, Data> = BTreeMap::new();
for (id, offset) in merged_data.get_relative_values().clone() {
if !already_handled_ids.insert(id.clone())
|| !id.get_path_hints().is_empty()
|| !subs_list.contains_key(id.get_tid())
{
// ID was already present in `already_handled_ids` or it is not a parameter ID
replacement_map.insert(
id.clone(),
Data::from_target(id, Bitvector::zero(offset.bytesize().into()).into()),
);
} else {
has_stabilized = false;
let (caller_absolute_value, caller_data) = self.substitute_param_values(&id);
replacement_map.insert(id, caller_data);
merged_absolute_value = match (
merged_absolute_value,
caller_absolute_value.map(|val| val + offset),
) {
(Some(val_left), Some(val_right)) => {
Some(val_left.signed_merge(&val_right))
}
(Some(val), None) | (None, Some(val)) => Some(val.clone()),
(None, None) => None,
};
}
}
merged_data.replace_all_ids(&replacement_map);
}
merged_data.set_absolute_value(merged_absolute_value);
merged_data
}
/// Replace all parameter IDs in the given value.
/// The replaced values are those of the parameters at the given call,
/// i.e. the replacement is context-sensitive to a specific call.
fn substitute_param_values_context_sensitive(
&self,
value: &Data,
call_tid: &Tid,
current_fn_tid: &Tid,
) -> Data {
let mut replacement_map: BTreeMap<AbstractIdentifier, Data> = BTreeMap::new();
for (id, offset) in value.get_relative_values().clone() {
if id.get_tid() == current_fn_tid && id.get_path_hints().is_empty() {
// Possible function param ID
let param_id_at_callsite =
AbstractIdentifier::new(call_tid.clone(), id.get_location().clone());
if let Some(value_at_callsite) =
self.param_replacement_map.get(&param_id_at_callsite)
{
replacement_map.insert(id, value_at_callsite.clone());
} // Else it is a pointer to the current stack frame, which is invalid in the caller.
} else {
// Not a function param.
replacement_map.insert(
id.clone(),
Data::from_target(id, Bitvector::zero(offset.bytesize().into()).into()),
);
}
}
let mut result = value.clone();
result.replace_all_ids(&replacement_map);
result
}
/// Replace all parameter IDs in the given value using the given path hints
/// to replace them with the corresponding values in the calling context of the path hints.
pub fn recursively_substitute_param_values_context_sensitive(
&self,
value: &Data,
current_fn_tid: &Tid,
path_hints: &[Tid],
) -> Data {
let mut substituted_value = value.clone();
let mut current_fn_tid = current_fn_tid.clone();
if path_hints.is_empty() {
return substituted_value;
}
for call_tid in path_hints {
substituted_value = self.substitute_param_values_context_sensitive(
&substituted_value,
call_tid,
&current_fn_tid,
);
// Now set the new current_fn_tid to the TID of the caller function.
current_fn_tid = self.call_to_caller_fn_map[call_tid].clone();
}
substituted_value
}
/// Log a debug log message in the log collector of `self`.
fn log_debug(&self, tid: &Tid, msg: impl ToString) {
let log_msg = LogMessage {
text: msg.to_string(),
level: crate::utils::log::LogLevel::Debug,
location: Some(tid.clone()),
source: Some(super::CWE_MODULE.name.to_string()),
};
self.log_collector.send(log_msg.into()).unwrap();
}
/// Check whether the given parameter at the given callsite may point outside of its corresponding memory object.
/// If yes, then generate a CWE warning.
fn check_param_at_call(
&self,
state: &mut State,
param: &Arg,
call_tid: &Tid,
target_fn_name: Option<&str>,
) {
if let Some(possible_address) = self
.pointer_inference
.eval_parameter_arg_at_call(call_tid, param)
{
let warnings = state.check_address_access(&possible_address, ByteSize::new(1), self);
if !warnings.is_empty() {
let description = match target_fn_name {
Some(target_name) => format!(
"(Buffer Overflow) Call to {} at {} may access out-of-bounds memory.",
target_name, &call_tid.address
),
None => format!(
"(Buffer Overflow) Call at {} may access out-of-bounds memory.",
&call_tid.address
),
};
let mut cwe_warning =
CweWarning::new("CWE119", super::CWE_MODULE.version, description);
cwe_warning.tids = vec![format!("{}", call_tid)];
cwe_warning.addresses = vec![call_tid.address.to_string()];
cwe_warning.other = vec![warnings];
self.log_collector.send(cwe_warning.into()).unwrap();
}
}
}
}
/// Compute a map that maps the TIDs of functions to the set of TIDs of all known callsites to the corresponding function.
fn compute_callee_to_call_sites_map(project: &Project) -> HashMap<Tid, HashSet<Tid>> {
let mut callee_to_call_sites_map: HashMap<Tid, HashSet<Tid>> = HashMap::new();
for sub in project.program.term.subs.values() {
for blk in &sub.term.blocks {
for jmp in &blk.term.jmps {
match &jmp.term {
Jmp::Call { target, .. } => {
let callsites = callee_to_call_sites_map.entry(target.clone()).or_default();
callsites.insert(jmp.tid.clone());
}
Jmp::CallInd { .. } => (), // FIXME: indirect call targets not yet supported.
_ => (),
}
}
}
}
callee_to_call_sites_map
}
/// Compute a mapping that maps each parameter of each call (given by an abstract identifier representing the parameter value at the callsite).
/// to its value at the callsite according to the pointer inference analysis.
fn compute_param_replacement_map(
analysis_results: &AnalysisResults,
) -> HashMap<AbstractIdentifier, Data> {
let mut param_replacement_map = HashMap::new();
for sub in analysis_results.project.program.term.subs.values() {
for blk in &sub.term.blocks {
for jmp in &blk.term.jmps {
match &jmp.term {
Jmp::Call { target, .. } => add_param_replacements_for_call(
analysis_results,
jmp,
target,
&mut param_replacement_map,
),
Jmp::CallInd { .. } => (), // FIXME: indirect call targets not yet supported.
_ => (),
}
}
}
}
param_replacement_map
}
/// For each parameter of the given call term map the abstract identifier representing the value of the parameter at the callsite
/// to its concrete value (in the context of the caller).
/// Add the mappings to the given `replacement_map`.
fn add_param_replacements_for_call(
analysis_results: &AnalysisResults,
call: &Term<Jmp>,
callee_tid: &Tid,
replacement_map: &mut HashMap<AbstractIdentifier, Data>,
) {
let vsa_results = analysis_results.pointer_inference.unwrap();
if let Some(fn_sig) = analysis_results
.function_signatures
.unwrap()
.get(callee_tid)
{
for param_arg in fn_sig.parameters.keys() {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
replacement_map.insert(param_id, param_value);
}
}
} else if let Some(extern_symbol) = analysis_results
.project
.program
.term
.extern_symbols
.get(callee_tid)
{
for param_arg in &extern_symbol.parameters {
if let Some(param_value) = vsa_results.eval_parameter_arg_at_call(&call.tid, param_arg)
{
let param_id = AbstractIdentifier::from_arg(&call.tid, param_arg);
replacement_map.insert(param_id, param_value);
}
}
}
}
/// Compute a map mapping the TIDs of malloc-like calls (e.g. malloc, realloc, calloc)
/// to the size value of the allocated object according to the pointer inference analysis.
fn compute_size_values_of_malloc_calls(analysis_results: &AnalysisResults) -> HashMap<Tid, Data> {
let project = analysis_results.project;
let pointer_inference = analysis_results.pointer_inference.unwrap();
let mut malloc_size_map = HashMap::new();
for sub in analysis_results.project.program.term.subs.values() {
for blk in &sub.term.blocks {
for jmp in &blk.term.jmps {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = project.program.term.extern_symbols.get(target) {
if let Some(size_value) = compute_size_value_of_malloc_like_call(
&jmp.tid,
symbol,
pointer_inference,
) {
malloc_size_map.insert(jmp.tid.clone(), size_value);
}
}
}
}
}
}
malloc_size_map
}
/// Compute the size value of a call to a malloc-like function according to the pointer inference and return it.
/// Returns `None` if the called symbol is not an allocating function or the size computation for the symbol is not yet implemented.
///
/// Currently this function computes the size values for the symbols `malloc`, `realloc` and `calloc`.
fn compute_size_value_of_malloc_like_call(
jmp_tid: &Tid,
called_symbol: &ExternSymbol,
pointer_inference: &PointerInference,
) -> Option<Data> {
match called_symbol.name.as_str() {
"malloc" => {
let size_param = &called_symbol.parameters[0];
match pointer_inference.eval_parameter_arg_at_call(jmp_tid, size_param) {
Some(size_value) => Some(size_value),
None => Some(Data::new_top(size_param.bytesize())),
}
}
"realloc" => {
let size_param = &called_symbol.parameters[1];
match pointer_inference.eval_parameter_arg_at_call(jmp_tid, size_param) {
Some(size_value) => Some(size_value),
None => Some(Data::new_top(size_param.bytesize())),
}
}
"calloc" => {
let count_param = &called_symbol.parameters[0];
let size_param = &called_symbol.parameters[1];
match (
pointer_inference.eval_parameter_arg_at_call(jmp_tid, count_param),
pointer_inference.eval_parameter_arg_at_call(jmp_tid, size_param),
) {
(Some(count_value), Some(size_value)) => {
Some(count_value.bin_op(BinOpType::IntMult, &size_value))
}
_ => Some(Data::new_top(size_param.bytesize())),
}
}
_ => None,
}
}
/// Compute a map that maps the TIDs of call instructions to the TID of the caller function.
fn compute_call_to_caller_map(project: &Project) -> HashMap<Tid, Tid> {
let mut call_to_caller_map = HashMap::new();
for (sub_tid, sub) in &project.program.term.subs {
for block in &sub.term.blocks {
for jmp in &block.term.jmps {
match &jmp.term {
Jmp::Call { .. } | Jmp::CallInd { .. } | Jmp::CallOther { .. } => {
call_to_caller_map.insert(jmp.tid.clone(), sub_tid.clone());
}
_ => (),
}
}
}
}
call_to_caller_map
}
#[cfg(test)]
pub mod tests;
use super::*;
impl<'a> Context<'a> {
/// Create a mock context.
/// Note that this function leaks memory!
pub fn mock_x64() -> Context<'static> {
let mut project = Box::new(Project::mock_x64());
project.program.term.subs = BTreeMap::from([
(Tid::new("func"), Sub::mock("func")),
(Tid::new("main"), Sub::mock("main")),
]);
let project = Box::leak(project);
let pointer_inference = Box::new(PointerInference::mock(project));
let pointer_inference = Box::leak(pointer_inference);
let analysis_results = AnalysisResults::mock_from_project(project);
let analysis_results =
Box::new(analysis_results.with_pointer_inference(Some(pointer_inference)));
let analysis_results = Box::leak(analysis_results);
let (log_collector, _) = crossbeam_channel::unbounded();
Context::new(
analysis_results.project,
analysis_results.control_flow_graph,
analysis_results.pointer_inference.unwrap(),
analysis_results.function_signatures.unwrap(),
analysis_results,
log_collector,
)
}
}
#[test]
fn test_compute_size_value_of_malloc_like_call() {
use crate::analysis::pointer_inference::State as PiState;
let project = Project::mock_x64();
let mut pi_results = PointerInference::mock(&project);
let mut malloc_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("func"));
malloc_state.set_register(&Variable::mock("RDI", 8), Bitvector::from_i64(3).into());
*pi_results.get_mut_states_at_tids() = HashMap::from([(Tid::new("malloc_call"), malloc_state)]);
let malloc_symbol = ExternSymbol::mock_x64("malloc");
assert_eq!(
compute_size_value_of_malloc_like_call(
&Tid::new("malloc_call"),
&malloc_symbol,
&pi_results
)
.unwrap(),
Bitvector::from_i64(3).into()
);
assert!(compute_size_value_of_malloc_like_call(
&Tid::new("other"),
&ExternSymbol::mock_x64("other"),
&pi_results
)
.is_none());
}
#[test]
fn test_substitute_param_values_context_sensitive() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value = Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(41));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
let call_to_caller_map = HashMap::from([
(Tid::new("callsite_id"), Tid::new("main")),
(
Tid::new("recursive_callsite_id"),
Tid::new("somer_other_fn_id"),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context.call_to_caller_fn_map = call_to_caller_map;
// non-recursive substitution
let result = context.substitute_param_values_context_sensitive(
&Data::from_target(param_id.clone(), Bitvector::from_i64(5).into()),
&Tid::new("callsite_id"),
&Tid::new("func"),
);
assert_eq!(
result,
Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(6).into())
);
// recursive substitution
let result = context.recursively_substitute_param_values_context_sensitive(
&Data::from_target(param_id, Bitvector::from_i64(5).into()),
&Tid::new("func"),
&[Tid::new("callsite_id"), Tid::new("recursive_callsite_id")],
);
println!("{:#}", result.to_json_compact());
assert_eq!(result, Bitvector::from_i64(47).into());
}
#[test]
fn test_substitute_param_values() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value = Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(39));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
// non-recursive substitution
let (result_absolute, result) = context.substitute_param_values(&param_id);
assert!(result_absolute.is_none());
assert_eq!(result, param_value);
// recursive substitution
let result = context.recursively_substitute_param_values(&Data::from_target(
param_id,
Bitvector::from_i64(5).into(),
));
assert_eq!(result, Bitvector::from_i64(45).into());
}
use super::super::State;
use super::*;
use crate::analysis::graph::Graph;
impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
/// Get the control flow graph.
fn get_graph(&self) -> &Graph<'a> {
self.graph
}
/// Merge two states.
fn merge(&self, state1: &State, state2: &State) -> State {
state1.merge(state2)
}
/// If the given [`Def`] is a load or store instruction, check whether it may access addresses
/// that are out of bounds of the corresponding memory object.
/// Generate CWE warnings accordingly.
fn update_def(&self, state: &State, def: &Term<Def>) -> Option<State> {
let mut state = state.clone();
match &def.term {
Def::Load { address: _, var } => {
let address = match self.pointer_inference.eval_address_at_def(&def.tid) {
Some(address) => address,
None => return None, // There seems to be no pointer inference state here.
};
let warnings = state.check_address_access(&address, var.size, self);
if !warnings.is_empty() {
let mut cwe_warning = CweWarning::new(
"CWE125",
super::super::CWE_MODULE.version,
format!(
"(Out-of-bounds Read) Memory read at {} may be out of bounds",
&def.tid.address
),
);
cwe_warning.tids = vec![format!("{}", def.tid)];
cwe_warning.addresses = vec![def.tid.address.to_string()];
cwe_warning.other = vec![warnings];
self.log_collector.send(cwe_warning.into()).unwrap();
}
}
Def::Store { address: _, value } => {
let address = match self.pointer_inference.eval_address_at_def(&def.tid) {
Some(address) => address,
None => return None, // There seems to be no pointer inference state here.
};
let warnings = state.check_address_access(&address, value.bytesize(), self);
if !warnings.is_empty() {
let mut cwe_warning = CweWarning::new(
"CWE787",
super::super::CWE_MODULE.version,
format!(
"(Out-of-bounds Write) Memory write at {} may be out of bounds.",
&def.tid.address
),
);
cwe_warning.tids = vec![format!("{}", def.tid)];
cwe_warning.addresses = vec![def.tid.address.to_string()];
cwe_warning.other = vec![warnings];
self.log_collector.send(cwe_warning.into()).unwrap();
}
}
Def::Assign { .. } => (),
}
Some(state)
}
/// The state does not change for intraprocedural jumps.
fn update_jump(
&self,
state: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State> {
Some(state.clone())
}
/// Always returns `None`, since the fixpoint computation is intraprocedural
/// and the access to parameter values is checked in the callee separately.
fn update_call(
&self,
_state: &State,
_call: &Term<Jmp>,
_target: &crate::analysis::graph::Node,
_calling_convention: &Option<String>,
) -> Option<State> {
// The analysis is intraprocedural and parameters are checked not here but in the callee.
None
}
/// Just return the `state_before_call` since the fixpoint comutation is intraprocedural.
fn update_return(
&self,
_state_before_return: Option<&State>,
state_before_call: Option<&State>,
_call_term: &Term<Jmp>,
_return_term: &Term<Jmp>,
_calling_convention: &Option<String>,
) -> Option<State> {
// The analysis is intraprocedural
state_before_call.cloned()
}
/// For calls to extern symbols check whether any parameter may point out of bounds of the corresponding memory object.
/// Note that we do not know whether the called function accesses memory areas of certain sizes.
/// Thus we only check that parameter pointers themselves point into the memory object
/// but not whether certain address ranges around a pointer are still inside the corresponding memory object.
fn update_call_stub(&self, state: &State, call: &Term<Jmp>) -> Option<State> {
let mut state = state.clone();
match &call.term {
Jmp::Call { target, .. } => {
if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) {
for param in &extern_symbol.parameters {
self.check_param_at_call(
&mut state,
param,
&call.tid,
Some(&extern_symbol.name),
);
}
} else {
self.log_debug(
&call.tid,
"Call stub edge without associated extern symbol encountered.",
);
}
}
Jmp::CallInd { .. } => {
if let Some(cconv) = self.project.get_standard_calling_convention() {
for param in &cconv.integer_parameter_register {
let param_arg = Arg::from_var(param.clone(), None);
self.check_param_at_call(&mut state, &param_arg, &call.tid, None);
}
}
}
_ => (),
}
Some(state)
}
/// Just return the given state without modification.
fn specialize_conditional(
&self,
state: &State,
_condition: &Expression,
_block_before_condition: &Term<Blk>,
_is_true: bool,
) -> Option<State> {
Some(state.clone())
}
}
//! This module implements a check for CWE-119: Buffer Overflow
//! and its variants CWE-125: Out-of-bounds Read and CWE-787: Out-of-bounds Write.
//!
//! Arrays or buffers of any kind are often accessed through indices.
//! If the index of an access is outside of the bounds of the buffer this can lead to severe consequences.
//! In the case of out-of-bounds read accesses this often leads to exposure of sensitive information to an attacker.
//! Out-of-bounds write accesses can often be used to hijack the control flow of a program
//! and thus may lead to arbitrary code execution.
//!
//! See <https://cwe.mitre.org/data/definitions/119.html> for a detailed description.
//!
//! ## How the check works
//!
//! The check uses the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`)
//! to check whether any memory accesses may point outside of the bounds of the corresponding memory objects.
//! For this the results of the Pointer Inference analysis are aggregated interprocedurally.
//! Additionally, the check uses a lightweight intraprocedural dataflow fixpoint computation
//! to ensure that for each memory object only the first access outside of its bounds is flagged as a CWE.
//!
//! ## False Positives
//!
//! - Any analysis imprecision of the Pointer Inference analysis may lead to false positive results in this check.
//! - If no exact bounds for a memory object could be inferred then the strictest bounds found are used,
//! which can lead to false positive warnings.
//!
//! ## False Negatives
//!
//! - In cases where the Pointer Inference analysis could not infer any bounds at all for the memory object or the access index
//! this check generally assumes analysis imprecision as the culprit and will not flag them as CWEs.
//! This leads to false negatives, especially in cases where the bounds directly depend on user input.
//! - The Pointer Inference analysis cannot distinguish different objects located on the same stack frame.
//! Thus buffer overflows on the stack can only be detected if they may reach outside of the whole stack frame.
//! This leads to false negatives, especially for buffer overflows caused by off-by-one bugs.
//! - For parameters of extern function calls the check only checks whether the parameter itself may point outside of the boundaries of a memory object.
//! But since we generally do not know what size the called function expects the pointed-to object to have
//! this still may miss buffer overflows occuring in the called function.
//! - Right now the check only considers buffers on the stack or the heap, but not buffers in global memory.
//! Thus corresponding overflows of buffers in global memory are not detected.
use crate::analysis::pointer_inference::Data;
use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage, LogThread};
use crate::CweModule;
mod context;
use context::Context;
mod state;
use state::State;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE119",
version: "0.3",
run: check_cwe,
};
/// Run the check for CWE-119: Buffer Overflows.
///
/// This function prepares the fixpoint computation that computes the CWE warnings by setting the start states for all function starts.
/// Then the fixpoint computation is executed.
/// Afterwards, the collected logs and CWE warnings are collected from a separate logging thread and returned.
pub fn check_cwe(
analysis_results: &AnalysisResults,
_config: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let log_thread = LogThread::spawn(LogThread::collect_and_deduplicate);
let context = Context::new(
analysis_results.project,
analysis_results.control_flow_graph,
analysis_results.pointer_inference.unwrap(),
analysis_results.function_signatures.unwrap(),
analysis_results,
log_thread.get_msg_sender(),
);
let mut fixpoint_computation =
crate::analysis::forward_interprocedural_fixpoint::create_computation(context, None);
for (sub_tid, entry_node_of_sub) in
crate::analysis::graph::get_entry_nodes_of_subs(analysis_results.control_flow_graph)
{
if let Some(function_sig) = analysis_results.function_signatures.unwrap().get(&sub_tid) {
let fn_start_state = State::new(&sub_tid, function_sig, analysis_results.project);
fixpoint_computation.set_node_value(
entry_node_of_sub,
crate::analysis::interprocedural_fixpoint_generic::NodeValue::Value(fn_start_state),
);
}
}
fixpoint_computation.compute_with_max_steps(100);
log_thread.collect()
}
use super::Context;
use super::Data;
use crate::abstract_domain::*;
use crate::analysis::function_signature::FunctionSignature;
use crate::intermediate_representation::Project;
use crate::prelude::*;
use std::collections::BTreeMap;
/// The state consists of the abstract identifier for the current stack frame
/// and lists of the lower and upper bounds for all known memory objects.
///
/// The bounds of memory objects are computed the first time an access to it is observed.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct State {
/// The abstract identifier of the stack frame of the function that the state belongs to.
stack_id: AbstractIdentifier,
/// The lower bounds of all memory objects for which accesses have been observed.
object_lower_bounds: DomainMap<AbstractIdentifier, BitvectorDomain, UnionMergeStrategy>,
/// The upper bounds of all memory objects for which accesses have been observed.
object_upper_bounds: DomainMap<AbstractIdentifier, BitvectorDomain, UnionMergeStrategy>,
}
impl State {
/// Create a new state representing the state at the start of the function
/// given by the `function_tid` and corresponding function signature.
///
/// Only the bounds of the current stack frame are known in this state,
/// since there are no memory accesses to observe prior to the function start.
pub fn new(function_tid: &Tid, function_sig: &FunctionSignature, project: &Project) -> State {
let stack_id =
AbstractIdentifier::from_var(function_tid.clone(), &project.stack_pointer_register);
let stack_upper_bound: i64 = match project.stack_pointer_register.name.as_str() {
"ESP" => 4,
"RSP" => 8,
_ => 0,
};
let stack_upper_bound = std::cmp::max(
stack_upper_bound,
function_sig.get_stack_params_total_size(),
);
let object_lower_bounds = BTreeMap::from([(
stack_id.clone(),
BitvectorDomain::new_top(stack_id.bytesize()),
)]);
let object_upper_bounds = BTreeMap::from([(
stack_id.clone(),
Bitvector::from_i64(stack_upper_bound)
.into_resize_signed(stack_id.bytesize())
.into(),
)]);
State {
stack_id,
object_lower_bounds: object_lower_bounds.into(),
object_upper_bounds: object_upper_bounds.into(),
}
}
/// Check for the given address whether the access to it would be in the boundaries for all possible target memory objects.
/// Return a list of logging messages describing those cases
/// where the access may fall outside of the corresponding memory object boundaries.
pub fn check_address_access(
&mut self,
address: &Data,
value_size: ByteSize,
context: &Context,
) -> Vec<String> {
let mut out_of_bounds_access_warnings = Vec::new();
for (id, offset) in address.get_relative_values() {
if !self.object_lower_bounds.contains_key(id) {
self.compute_bounds_of_id(id, context);
}
if let Ok((lower_offset, upper_offset)) = offset.try_to_offset_interval() {
if let Ok(lower_bound) = self.object_lower_bounds.get(id).unwrap().try_to_offset() {
if lower_bound > lower_offset {
out_of_bounds_access_warnings.push(format!("For the object ID {} access to the offset {} may be smaller than the lower object bound of {}.",
id,
lower_offset,
lower_bound,
));
// Replace the bound with `Top` to prevent duplicate CWE warnings with the same root cause.
self.object_lower_bounds
.insert(id.clone(), BitvectorDomain::new_top(address.bytesize()));
}
}
if let Ok(upper_bound) = self.object_upper_bounds.get(id).unwrap().try_to_offset() {
if upper_bound < upper_offset + (u64::from(value_size) as i64) {
out_of_bounds_access_warnings.push(format!("For the object ID {} access to the offset {} may be larger than the upper object bound of {}.",
id,
upper_offset + (u64::from(value_size) as i64),
upper_bound,
));
// Replace the bound with `Top` to prevent duplicate CWE warnings with the same root cause.
self.object_upper_bounds
.insert(id.clone(), BitvectorDomain::new_top(address.bytesize()));
}
}
}
}
out_of_bounds_access_warnings
}
/// Compute the bounds of a memory object given by the provided `object_id`
/// and insert the results into `self.object_lower_bounds` and `self.object_upper_bounds`.
///
/// This function assumes that the objects bounds have not been computed prior to this function call.
/// For bounds that could not be determined (e.g. because the source for the object ID is unknown)
/// we insert `Top` bounds into the bounds maps.
fn compute_bounds_of_id(&mut self, object_id: &AbstractIdentifier, context: &Context) {
if context
.malloc_tid_to_object_size_map
.contains_key(object_id.get_tid())
{
let object_size = context.compute_size_of_heap_object(object_id);
self.object_lower_bounds.insert(
object_id.clone(),
Bitvector::zero(object_id.bytesize().into()).into(),
);
self.object_upper_bounds
.insert(object_id.clone(), object_size);
} else if *object_id == self.stack_id {
panic!("Current stack frame bounds not set.");
} else if object_id.get_tid() == self.stack_id.get_tid()
&& object_id.get_path_hints().is_empty()
{
// Handle parameter IDs
self.compute_bounds_of_param_id(object_id, context);
} else {
// The type of object is unknown, thus the size restrictions are also unknown.
self.object_lower_bounds.insert(
object_id.clone(),
BitvectorDomain::new_top(object_id.bytesize()),
);
self.object_upper_bounds.insert(
object_id.clone(),
BitvectorDomain::new_top(object_id.bytesize()),
);
}
}
/// Compute the bounds of the memory object associated with the given parameter ID
/// and add the results to the known object bounds of `self`.
///
/// Since the memory object associated to a parameter may not be unique
/// the bounds are only approximated from those objects where exact bounds could be determined.
/// If different objects were found the bounds are approximated by the strictest bounds that were found.
fn compute_bounds_of_param_id(
&mut self,
param_object_id: &AbstractIdentifier,
context: &Context,
) {
let object_data = context.recursively_substitute_param_values(&DataDomain::from_target(
param_object_id.clone(),
Bitvector::zero(param_object_id.bytesize().into()).into(),
));
let mut lower_bound = None;
let mut upper_bound = None;
for (id, offset) in object_data.get_relative_values() {
// Right now we ignore cases where we do not know the exact offset into the object.
let offset = match offset.try_to_offset() {
Ok(offset) => offset,
Err(_) => continue,
};
if context
.malloc_tid_to_object_size_map
.contains_key(id.get_tid())
{
let object_size = context.compute_size_of_heap_object(id);
lower_bound = lower_bound
.map(|old_bound| std::cmp::max(old_bound, -offset))
.or(Some(-offset));
if let Ok(concrete_object_size) = object_size.try_to_offset() {
upper_bound = upper_bound
.map(|old_bound| std::cmp::min(old_bound, concrete_object_size - offset))
.or(Some(concrete_object_size - offset));
}
} else if context.is_stack_frame_id(id) {
let stack_frame_upper_bound = context
.function_signatures
.get(id.get_tid())
.unwrap()
.get_stack_params_total_size();
upper_bound = upper_bound
.map(|old_bound| std::cmp::min(old_bound, stack_frame_upper_bound))
.or(Some(stack_frame_upper_bound));
// We do not set a lower bound since we do not know the concrete call site for stack pointers,
// which we would need to determine a correct lower bound.
}
// FIXME: Cases not handled here include unresolved parameter IDs, unknown IDs and global pointers.
// For the first two we do not have any size information.
// For global pointers we need some kind of pre-analysis so that we do not have to assume
// that the pointer may address the complete range of global data addresses.
}
let lower_bound = match lower_bound {
Some(bound) => Bitvector::from_i64(bound)
.into_resize_signed(param_object_id.bytesize())
.into(),
None => BitvectorDomain::new_top(param_object_id.bytesize()),
};
let upper_bound = match upper_bound {
Some(bound) => Bitvector::from_i64(bound)
.into_resize_signed(param_object_id.bytesize())
.into(),
None => BitvectorDomain::new_top(param_object_id.bytesize()),
};
self.object_lower_bounds
.insert(param_object_id.clone(), lower_bound);
self.object_upper_bounds
.insert(param_object_id.clone(), upper_bound);
}
}
impl AbstractDomain for State {
/// Merge two states by merging the known object bounds of both.
fn merge(&self, other: &State) -> State {
State {
stack_id: self.stack_id.clone(),
object_lower_bounds: self.object_lower_bounds.merge(&other.object_lower_bounds),
object_upper_bounds: self.object_upper_bounds.merge(&other.object_upper_bounds),
}
}
/// The state has no logical `Top` element.
fn is_top(&self) -> bool {
false
}
}
impl State {
/// Get a json-representation of the state.
/// Intended for pretty printing, not useable for serialization/deserialization.
#[allow(dead_code)]
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
let mut state_map = Map::new();
state_map.insert(
"stack_id".to_string(),
Value::String(self.stack_id.to_string()),
);
let lower_bounds: Vec<_> = self
.object_lower_bounds
.iter()
.map(|(id, bound)| Value::String(format!("{}: {}", id, bound)))
.collect();
state_map.insert("lower_bounds".to_string(), Value::Array(lower_bounds));
let upper_bounds: Vec<_> = self
.object_upper_bounds
.iter()
.map(|(id, bound)| Value::String(format!("{}: {}", id, bound)))
.collect();
state_map.insert("upper_bounds".to_string(), Value::Array(upper_bounds));
Value::Object(state_map)
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::intermediate_representation::Variable;
use std::collections::{HashMap, HashSet};
#[test]
fn test_new() {
let context = Context::mock_x64();
let state = State::new(
&Tid::new("func"),
&FunctionSignature::mock_x64(),
context.project,
);
let stack_id = AbstractIdentifier::from_var(Tid::new("func"), &Variable::mock("RSP", 8));
assert_eq!(state.stack_id, stack_id);
assert_eq!(state.object_lower_bounds.len(), 1);
assert_eq!(state.object_upper_bounds.len(), 1);
assert_eq!(
*state.object_lower_bounds.get(&stack_id).unwrap(),
BitvectorDomain::new_top(ByteSize::new(8))
);
assert_eq!(
*state.object_upper_bounds.get(&stack_id).unwrap(),
Bitvector::from_i64(8).into()
);
}
#[test]
fn test_check_address_access() {
let context = Context::mock_x64();
let mut state = State::new(
&Tid::new("func"),
&FunctionSignature::mock_x64(),
context.project,
);
let stack_id = AbstractIdentifier::from_var(Tid::new("func"), &Variable::mock("RSP", 8));
// access in bounds
let address = Data::from_target(stack_id.clone(), Bitvector::from_i64(-12).into());
assert!(state
.check_address_access(&address, ByteSize::new(8), &context)
.is_empty());
// access out of bounds
let address = Data::from_target(stack_id.clone(), Bitvector::from_i64(4).into());
assert_eq!(
state
.check_address_access(&address, ByteSize::new(8), &context)
.len(),
1
);
// subsequent errors are suppressed
let address = Data::from_target(stack_id, Bitvector::from_i64(8).into());
assert!(state
.check_address_access(&address, ByteSize::new(8), &context)
.is_empty());
}
#[test]
fn test_compute_bounds_of_id() {
let mut context = Context::mock_x64();
context
.malloc_tid_to_object_size_map
.insert(Tid::new("malloc_call"), Data::from(Bitvector::from_i64(42)));
context
.call_to_caller_fn_map
.insert(Tid::new("malloc_call"), Tid::new("main"));
let mut state = State::new(
&Tid::new("func"),
&FunctionSignature::mock_x64(),
context.project,
);
state.compute_bounds_of_id(&AbstractIdentifier::mock("malloc_call", "RAX", 8), &context);
assert_eq!(state.object_lower_bounds.len(), 2);
assert_eq!(
state.object_lower_bounds[&AbstractIdentifier::mock("malloc_call", "RAX", 8)],
Bitvector::from_i64(0).into()
);
assert_eq!(
state.object_upper_bounds[&AbstractIdentifier::mock("malloc_call", "RAX", 8)],
Bitvector::from_i64(42).into()
);
}
#[test]
fn test_compute_bounds_of_param_id() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let malloc_call_id = AbstractIdentifier::mock("malloc_call", "RAX", 8);
let param_value = Data::from_target(malloc_call_id.clone(), Bitvector::from_i64(2).into());
let param_replacement_map = HashMap::from([(callsite_id, param_value.clone())]);
let callee_to_callsites_map =
HashMap::from([(Tid::new("func"), HashSet::from([Tid::new("callsite_id")]))]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context
.malloc_tid_to_object_size_map
.insert(Tid::new("malloc_call"), Data::from(Bitvector::from_i64(42)));
context.call_to_caller_fn_map = HashMap::from([
(Tid::new("malloc_call"), Tid::new("main")),
(Tid::new("callsite_id"), Tid::new("main")),
]);
let mut state = State::new(
&Tid::new("func"),
&FunctionSignature::mock_x64(),
context.project,
);
state.compute_bounds_of_param_id(&param_id, &context);
assert_eq!(state.object_lower_bounds.len(), 2);
assert_eq!(
state.object_lower_bounds[&AbstractIdentifier::mock("func", "RDI", 8)],
Bitvector::from_i64(-2).into()
);
assert_eq!(
state.object_upper_bounds[&AbstractIdentifier::mock("func", "RDI", 8)],
Bitvector::from_i64(40).into()
);
}
}
......@@ -453,7 +453,7 @@ mod tests {
assert_eq!(
context.check_parameters_for_taint(
&state,
&ExternSymbol::mock_x64(),
&ExternSymbol::mock_x64("mock_symbol"),
NodeIndex::new(0)
),
false
......@@ -466,7 +466,7 @@ mod tests {
assert_eq!(
context.check_parameters_for_taint(
&state,
&ExternSymbol::mock_x64(),
&ExternSymbol::mock_x64("mock_symbol"),
NodeIndex::new(0)
),
true
......
......@@ -349,11 +349,11 @@ mod tests {
}
impl ExternSymbol {
pub fn mock_x64() -> ExternSymbol {
pub fn mock_x64(name: impl ToString) -> ExternSymbol {
ExternSymbol {
tid: Tid::new("mock_symbol"),
tid: Tid::new(name.to_string()),
addresses: vec!["UNKNOWN".to_string()],
name: "mock_symbol".to_string(),
name: name.to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
......@@ -362,11 +362,11 @@ mod tests {
}
}
pub fn mock_arm32() -> ExternSymbol {
pub fn mock_arm32(name: impl ToString) -> ExternSymbol {
ExternSymbol {
tid: Tid::new("mock_symbol"),
tid: Tid::new(name.to_string()),
addresses: vec!["UNKNOWN".to_string()],
name: "mock_symbol".to_string(),
name: name.to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
......
......@@ -116,6 +116,7 @@ impl std::fmt::Display for CweModule {
pub fn get_modules() -> Vec<&'static CweModule> {
vec![
&crate::checkers::cwe_78::CWE_MODULE,
&crate::checkers::cwe_119::CWE_MODULE,
&crate::checkers::cwe_134::CWE_MODULE,
&crate::checkers::cwe_190::CWE_MODULE,
&crate::checkers::cwe_215::CWE_MODULE,
......
......@@ -248,6 +248,18 @@ pub enum LogThreadMsg {
Terminate,
}
impl From<LogMessage> for LogThreadMsg {
fn from(msg: LogMessage) -> Self {
Self::Log(msg)
}
}
impl From<CweWarning> for LogThreadMsg {
fn from(warning: CweWarning) -> Self {
Self::Cwe(warning)
}
}
/// A type for managing threads for collecting log messages.
///
/// With [`LogThread::spawn()`] one can create a new log thread
......@@ -287,6 +299,8 @@ impl LogThread {
/// I.e. the function should receive messages through the given receiver until the channel disconnects
/// or until it receives a [`LogThreadMsg::Terminate`] message.
/// After that it should return the logs collected up to that point.
///
/// See [`LogThread::collect_and_deduplicate`] for a standard collector function that can be used here.
pub fn spawn<F>(collector_func: F) -> LogThread
where
F: FnOnce(crossbeam_channel::Receiver<LogThreadMsg>) -> (Vec<LogMessage>, Vec<CweWarning>)
......@@ -323,4 +337,50 @@ impl LogThread {
(Vec::new(), Vec::new())
}
}
/// This function is collects logs from the given receiver until a [`LogThreadMsg::Terminate`] signal is received.
/// All collected logs are deduplicated before being returned.
///
/// CWE warnings and log messages are deduplicated if two messages share the same address of origin.
/// In such a case only the last message received is kept.
/// If a CWE message has more than one address only the first address is considered when deduplicating.
/// Note that this may lead to information loss if log messages with the same origin address that are not duplicates are generated.
///
/// This function can be used as a standard collector function for [`LogThread::spawn`].
pub fn collect_and_deduplicate(
receiver: crossbeam_channel::Receiver<LogThreadMsg>,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let mut logs_with_address = BTreeMap::new();
let mut general_logs = Vec::new();
let mut collected_cwes = BTreeMap::new();
while let Ok(log_thread_msg) = receiver.recv() {
match log_thread_msg {
LogThreadMsg::Log(log_message) => {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
LogThreadMsg::Cwe(cwe_warning) => match &cwe_warning.addresses[..] {
[] => panic!("Unexpected CWE warning without origin address"),
[address, ..] => {
collected_cwes.insert(address.clone(), cwe_warning);
}
},
LogThreadMsg::Terminate => break,
}
}
let logs = logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.collect();
let cwes = collected_cwes
.into_iter()
.map(|(_key, value)| value)
.collect();
(logs, cwes)
}
}
......@@ -85,7 +85,7 @@ impl CweTestCase {
}
/// Mark test cases using the given CPU architecture as `skipped`.
pub fn mark_architecture_skipped(test_cases: &mut Vec<CweTestCase>, arch: &str) {
pub fn mark_architecture_skipped(test_cases: &mut [CweTestCase], arch: &str) {
for test in test_cases.iter_mut() {
if test.architecture == arch {
test.skipped = true;
......@@ -94,7 +94,7 @@ pub fn mark_architecture_skipped(test_cases: &mut Vec<CweTestCase>, arch: &str)
}
/// Mark test cases using the given compiler as `skipped`.
pub fn mark_compiler_skipped(test_cases: &mut Vec<CweTestCase>, comp: &str) {
pub fn mark_compiler_skipped(test_cases: &mut [CweTestCase], comp: &str) {
for test in test_cases.iter_mut() {
if test.compiler == comp {
test.skipped = true;
......@@ -103,7 +103,7 @@ pub fn mark_compiler_skipped(test_cases: &mut Vec<CweTestCase>, comp: &str) {
}
/// Mark test cases using the given CPU architecture + compiler combination as `skipped`.
pub fn mark_skipped(test_cases: &mut Vec<CweTestCase>, value1: &str, value2: &str) {
pub fn mark_skipped(test_cases: &mut [CweTestCase], value1: &str, value2: &str) {
for test in test_cases.iter_mut() {
if (test.architecture == value1 && test.compiler == value2)
|| (test.architecture == value2 && test.compiler == value1)
......@@ -239,7 +239,7 @@ mod tests {
#[ignore]
fn cwe_119() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory");
let mut tests = all_test_cases("cwe_119", "CWE119");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
......@@ -264,21 +264,20 @@ mod tests {
#[ignore]
fn cwe_125() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory");
mark_architecture_skipped(&mut tests, "mips"); // A second unrelated instance is found in "__do_global_ctors_aux".
mark_architecture_skipped(&mut tests, "mipsel"); // A second unrelated instance is found in "__do_global_ctors_aux".
let mut tests = all_test_cases("cwe_119", "CWE119");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "ppc", "gcc"); // Needs tracking of linear dependencies between register values.
mark_skipped(&mut tests, "x86", "gcc"); // Loss of stack register value since we do not track pointer alignment yet.
mark_skipped(&mut tests, "x86", "clang"); // A second unrelated instance is found in "__do_global_ctors_aux".
mark_skipped(&mut tests, "x86", "clang"); // Unrelated third CWE hit in `__libc_csu_init`
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
let num_expected_occurences = 2;
if let Err(error) = test_case.run_test("[CWE125]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
......@@ -626,7 +625,7 @@ mod tests {
#[ignore]
fn cwe_787() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory");
let mut tests = all_test_cases("cwe_119", "CWE119");
mark_skipped(&mut tests, "arm", "gcc"); // Needs tracking of linear dependencies between register values.
mark_skipped(&mut tests, "mips64", "gcc"); // Needs tracking of linear dependencies between register values.
......@@ -645,7 +644,7 @@ mod tests {
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
let num_expected_occurences = 2;
if let Err(error) = test_case.run_test("[CWE787]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment