Unverified Commit 26f9844d by Enkelmann Committed by GitHub

implement CWE-119 check (#315)

parent d77159c5
......@@ -114,9 +114,9 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53"
dependencies = [
"cfg-if",
"crossbeam-utils",
......@@ -124,9 +124,9 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38"
dependencies = [
"cfg-if",
"lazy_static",
......@@ -222,9 +222,9 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "fixedbitset"
version = "0.4.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "398ea4fabe40b9b0d885340a2a991a44c8a645624075ad966d21f88688e2b69e"
checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e"
[[package]]
name = "fnv"
......@@ -356,18 +356,18 @@ checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memoffset"
version = "0.6.4"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "nix"
version = "0.23.0"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f305c2c2e4c39a82f7bf0bf65fb557f9070ce06781d4f2454295cc34b1c43188"
checksum = "9f866317acbd3a240710c63f065ffb1e4fd466259045ccb504130b7f668f35c6"
dependencies = [
"bitflags",
"cc",
......
......@@ -178,7 +178,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
let modules_depending_on_string_abstraction = BTreeSet::from_iter(["CWE78"]);
let modules_depending_on_pointer_inference =
BTreeSet::from_iter(["CWE134", "CWE476", "Memory"]);
BTreeSet::from_iter(["CWE119", "CWE134", "CWE476", "Memory"]);
let string_abstraction_needed = modules
.iter()
......
......@@ -119,10 +119,15 @@ impl AbstractIdentifier {
&self.time
}
/// Get the location component of the abstract ID
/// Get the location component of the abstract ID.
pub fn get_location(&self) -> &AbstractLocation {
&self.location
}
/// Get the bytesize of the value represented by the abstract ID.
pub fn bytesize(&self) -> ByteSize {
self.location.bytesize()
}
}
impl std::fmt::Display for AbstractIdentifier {
......@@ -187,6 +192,14 @@ impl AbstractLocation {
let stack_pos = AbstractMemoryLocation::Location { offset, size };
AbstractLocation::Pointer(stack_register.clone(), stack_pos)
}
/// Get the bytesize of the value represented by the abstract location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Register(var) => var.size,
Self::Pointer(_pointer_var, mem_location) => mem_location.bytesize(),
}
}
}
/// An abstract memory location is either an offset from the given location, where the actual value can be found,
......@@ -212,6 +225,16 @@ pub enum AbstractMemoryLocation {
},
}
impl AbstractMemoryLocation {
/// Get the bytesize of the value represented by the abstract memory location.
pub fn bytesize(&self) -> ByteSize {
match self {
Self::Location { size, .. } => *size,
Self::Pointer { target, .. } => target.bytesize(),
}
}
}
impl std::fmt::Display for AbstractMemoryLocation {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
......@@ -222,9 +245,23 @@ impl std::fmt::Display for AbstractMemoryLocation {
}
#[cfg(test)]
mod tests {
pub mod tests {
use super::*;
impl AbstractIdentifier {
/// Mock an abstract identifier with the given TID name and pointing to the value in the given register name.
pub fn mock(
tid: impl ToString,
register: impl ToString,
size_in_bytes: u64,
) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new(tid.to_string()),
AbstractLocation::from_var(&Variable::mock(register, size_in_bytes)).unwrap(),
)
}
}
#[test]
fn test_constraint_enforcements() {
// Test that no temporary registers are allowed as abstract locations.
......@@ -243,4 +280,12 @@ mod tests {
let id = id.with_path_hint(Tid::new("second_hint")).unwrap();
assert!(id.with_path_hint(Tid::new("first_hint")).is_err());
}
#[test]
fn test_bytesize() {
let location =
AbstractLocation::from_stack_position(&Variable::mock("RSP", 8), 10, ByteSize::new(4));
let id = AbstractIdentifier::new(Tid::new("id"), location);
assert_eq!(id.bytesize(), ByteSize::new(4));
}
}
......@@ -143,7 +143,7 @@ fn test_eval() {
#[test]
fn test_extern_symbol_handling() {
let mut state = State::mock_arm32();
let extern_symbol = ExternSymbol::mock_arm32();
let extern_symbol = ExternSymbol::mock_arm32("mock_symbol");
let cconv = CallingConvention::mock_arm32();
let call = Term {
tid: Tid::new("call_tid"),
......
......@@ -494,6 +494,22 @@ pub fn get_program_cfg(program: &Term<Program>, extern_subs: HashSet<Tid>) -> Gr
builder.build()
}
/// Returns a map from function TIDs to the node index of the `BlkStart` node of the first block in the function.
pub fn get_entry_nodes_of_subs(graph: &Graph) -> HashMap<Tid, NodeIndex> {
let mut sub_to_entry_node_map: HashMap<Tid, NodeIndex> = HashMap::new();
for node in graph.node_indices() {
if let Node::BlkStart(block, sub) = graph[node] {
if let Some(entry_block) = sub.term.blocks.get(0) {
if block.tid == entry_block.tid {
sub_to_entry_node_map.insert(sub.tid.clone(), node);
}
}
}
}
sub_to_entry_node_map
}
#[cfg(test)]
mod tests {
use super::*;
......
......@@ -10,3 +10,4 @@ pub mod graph;
pub mod interprocedural_fixpoint_generic;
pub mod pointer_inference;
pub mod string_abstraction;
pub mod vsa_results;
use super::*;
use crate::analysis::vsa_results::VsaResult;
/// Implementation of the [`VsaResult`] trait for providing other analyses with an easy-to-use interface
/// to use the value set and points-to analysis results of the pointer inference.
impl<'a> VsaResult for PointerInference<'a> {
type ValueDomain = Data;
/// Return the value of the address at the given read or store instruction.
fn eval_address_at_def(&self, def_tid: &Tid) -> Option<Data> {
self.addresses_at_defs.get(def_tid).cloned()
}
/// Return the assigned value for store or assignment instructions or the value read for load instructions.
fn eval_value_at_def(&self, def_tid: &Tid) -> Option<Data> {
self.values_at_defs.get(def_tid).cloned()
}
/// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
Some(state.eval(expression))
}
/// Evaluate the value of the given parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, parameter: &Arg) -> Option<Data> {
let state = self.states_at_tids.get(jmp_tid)?;
let context = self.computation.get_context().get_context();
state
.eval_parameter_arg(parameter, context.runtime_memory_image)
.ok()
}
}
//! This module provides the [`VsaResult`] trait
//! which defines an interface for the results of analyses similar to a value set analysis.
use crate::intermediate_representation::{Arg, Expression};
use crate::prelude::*;
/// A trait providing an interface for accessing the results of a value set analysis.
/// Note that the returned values may be any type of information associated with values at certain program points,
/// i.e. the trait can also be used for other analyses than just value set analyses.
///
/// Every returned value is wrapped into an `Option<..>`.
/// This should mainly be used to indicate that the analysis did not compute a value at a certain point,
/// e.g. because the code point was deemed to be dead code.
/// If the analysis wants to indicate that no specific information is known about a certain value
/// then this should be encoded in the `ValueDomain` itself instead of returning `None`.
pub trait VsaResult {
/// The type of the returned values.
/// Usually this should be an [`AbstractDomain`](crate::abstract_domain::AbstractDomain),
/// although this is not strictly required.
type ValueDomain;
/// Return the value stored for write instructions, the value read for read instructions or the value assigned for assignments.
fn eval_value_at_def(&self, def_tid: &Tid) -> Option<Self::ValueDomain>;
/// Return the value of the address where something is read or written for read or store instructions.
fn eval_address_at_def(&self, def_tid: &Tid) -> Option<Self::ValueDomain>;
/// Return the value of a parameter at the given jump instruction.
fn eval_parameter_arg_at_call(&self, jmp_tid: &Tid, param: &Arg) -> Option<Self::ValueDomain>;
/// Evaluate the value of the given expression at the given jump instruction.
fn eval_at_jmp(&self, jmp_tid: &Tid, expression: &Expression) -> Option<Self::ValueDomain>;
}
......@@ -5,6 +5,7 @@
//! but directly incorporated into the [`pointer_inference`](crate::analysis::pointer_inference) module.
//! See there for detailed information about this check.
pub mod cwe_119;
pub mod cwe_134;
pub mod cwe_190;
pub mod cwe_215;
......
use super::*;
impl<'a> Context<'a> {
/// Create a mock context.
/// Note that this function leaks memory!
pub fn mock_x64() -> Context<'static> {
let mut project = Box::new(Project::mock_x64());
project.program.term.subs = BTreeMap::from([
(Tid::new("func"), Sub::mock("func")),
(Tid::new("main"), Sub::mock("main")),
]);
let project = Box::leak(project);
let pointer_inference = Box::new(PointerInference::mock(project));
let pointer_inference = Box::leak(pointer_inference);
let analysis_results = AnalysisResults::mock_from_project(project);
let analysis_results =
Box::new(analysis_results.with_pointer_inference(Some(pointer_inference)));
let analysis_results = Box::leak(analysis_results);
let (log_collector, _) = crossbeam_channel::unbounded();
Context::new(
analysis_results.project,
analysis_results.control_flow_graph,
analysis_results.pointer_inference.unwrap(),
analysis_results.function_signatures.unwrap(),
analysis_results,
log_collector,
)
}
}
#[test]
fn test_compute_size_value_of_malloc_like_call() {
use crate::analysis::pointer_inference::State as PiState;
let project = Project::mock_x64();
let mut pi_results = PointerInference::mock(&project);
let mut malloc_state = PiState::new(&Variable::mock("RSP", 8), Tid::new("func"));
malloc_state.set_register(&Variable::mock("RDI", 8), Bitvector::from_i64(3).into());
*pi_results.get_mut_states_at_tids() = HashMap::from([(Tid::new("malloc_call"), malloc_state)]);
let malloc_symbol = ExternSymbol::mock_x64("malloc");
assert_eq!(
compute_size_value_of_malloc_like_call(
&Tid::new("malloc_call"),
&malloc_symbol,
&pi_results
)
.unwrap(),
Bitvector::from_i64(3).into()
);
assert!(compute_size_value_of_malloc_like_call(
&Tid::new("other"),
&ExternSymbol::mock_x64("other"),
&pi_results
)
.is_none());
}
#[test]
fn test_substitute_param_values_context_sensitive() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value = Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(41));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
let call_to_caller_map = HashMap::from([
(Tid::new("callsite_id"), Tid::new("main")),
(
Tid::new("recursive_callsite_id"),
Tid::new("somer_other_fn_id"),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
context.call_to_caller_fn_map = call_to_caller_map;
// non-recursive substitution
let result = context.substitute_param_values_context_sensitive(
&Data::from_target(param_id.clone(), Bitvector::from_i64(5).into()),
&Tid::new("callsite_id"),
&Tid::new("func"),
);
assert_eq!(
result,
Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(6).into())
);
// recursive substitution
let result = context.recursively_substitute_param_values_context_sensitive(
&Data::from_target(param_id, Bitvector::from_i64(5).into()),
&Tid::new("func"),
&[Tid::new("callsite_id"), Tid::new("recursive_callsite_id")],
);
println!("{:#}", result.to_json_compact());
assert_eq!(result, Bitvector::from_i64(47).into());
}
#[test]
fn test_substitute_param_values() {
let mut context = Context::mock_x64();
let param_id = AbstractIdentifier::mock("func", "RDI", 8);
let callsite_id = AbstractIdentifier::mock("callsite_id", "RDI", 8);
let recursive_param_id = AbstractIdentifier::mock("main", "RSI", 8);
let recursive_callsite_id = AbstractIdentifier::mock("recursive_callsite_id", "RSI", 8);
let param_value = Data::from_target(recursive_param_id.clone(), Bitvector::from_i64(1).into());
let recursive_param_value = Data::from(Bitvector::from_i64(39));
let param_replacement_map = HashMap::from([
(callsite_id, param_value.clone()),
(recursive_callsite_id.clone(), recursive_param_value),
]);
let callee_to_callsites_map = HashMap::from([
(Tid::new("func"), HashSet::from([Tid::new("callsite_id")])),
(
Tid::new("main"),
HashSet::from([Tid::new("recursive_callsite_id")]),
),
]);
context.param_replacement_map = param_replacement_map;
context.callee_to_callsites_map = callee_to_callsites_map;
// non-recursive substitution
let (result_absolute, result) = context.substitute_param_values(&param_id);
assert!(result_absolute.is_none());
assert_eq!(result, param_value);
// recursive substitution
let result = context.recursively_substitute_param_values(&Data::from_target(
param_id,
Bitvector::from_i64(5).into(),
));
assert_eq!(result, Bitvector::from_i64(45).into());
}
use super::super::State;
use super::*;
use crate::analysis::graph::Graph;
impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
/// Get the control flow graph.
fn get_graph(&self) -> &Graph<'a> {
self.graph
}
/// Merge two states.
fn merge(&self, state1: &State, state2: &State) -> State {
state1.merge(state2)
}
/// If the given [`Def`] is a load or store instruction, check whether it may access addresses
/// that are out of bounds of the corresponding memory object.
/// Generate CWE warnings accordingly.
fn update_def(&self, state: &State, def: &Term<Def>) -> Option<State> {
let mut state = state.clone();
match &def.term {
Def::Load { address: _, var } => {
let address = match self.pointer_inference.eval_address_at_def(&def.tid) {
Some(address) => address,
None => return None, // There seems to be no pointer inference state here.
};
let warnings = state.check_address_access(&address, var.size, self);
if !warnings.is_empty() {
let mut cwe_warning = CweWarning::new(
"CWE125",
super::super::CWE_MODULE.version,
format!(
"(Out-of-bounds Read) Memory read at {} may be out of bounds",
&def.tid.address
),
);
cwe_warning.tids = vec![format!("{}", def.tid)];
cwe_warning.addresses = vec![def.tid.address.to_string()];
cwe_warning.other = vec![warnings];
self.log_collector.send(cwe_warning.into()).unwrap();
}
}
Def::Store { address: _, value } => {
let address = match self.pointer_inference.eval_address_at_def(&def.tid) {
Some(address) => address,
None => return None, // There seems to be no pointer inference state here.
};
let warnings = state.check_address_access(&address, value.bytesize(), self);
if !warnings.is_empty() {
let mut cwe_warning = CweWarning::new(
"CWE787",
super::super::CWE_MODULE.version,
format!(
"(Out-of-bounds Write) Memory write at {} may be out of bounds.",
&def.tid.address
),
);
cwe_warning.tids = vec![format!("{}", def.tid)];
cwe_warning.addresses = vec![def.tid.address.to_string()];
cwe_warning.other = vec![warnings];
self.log_collector.send(cwe_warning.into()).unwrap();
}
}
Def::Assign { .. } => (),
}
Some(state)
}
/// The state does not change for intraprocedural jumps.
fn update_jump(
&self,
state: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<State> {
Some(state.clone())
}
/// Always returns `None`, since the fixpoint computation is intraprocedural
/// and the access to parameter values is checked in the callee separately.
fn update_call(
&self,
_state: &State,
_call: &Term<Jmp>,
_target: &crate::analysis::graph::Node,
_calling_convention: &Option<String>,
) -> Option<State> {
// The analysis is intraprocedural and parameters are checked not here but in the callee.
None
}
/// Just return the `state_before_call` since the fixpoint comutation is intraprocedural.
fn update_return(
&self,
_state_before_return: Option<&State>,
state_before_call: Option<&State>,
_call_term: &Term<Jmp>,
_return_term: &Term<Jmp>,
_calling_convention: &Option<String>,
) -> Option<State> {
// The analysis is intraprocedural
state_before_call.cloned()
}
/// For calls to extern symbols check whether any parameter may point out of bounds of the corresponding memory object.
/// Note that we do not know whether the called function accesses memory areas of certain sizes.
/// Thus we only check that parameter pointers themselves point into the memory object
/// but not whether certain address ranges around a pointer are still inside the corresponding memory object.
fn update_call_stub(&self, state: &State, call: &Term<Jmp>) -> Option<State> {
let mut state = state.clone();
match &call.term {
Jmp::Call { target, .. } => {
if let Some(extern_symbol) = self.project.program.term.extern_symbols.get(target) {
for param in &extern_symbol.parameters {
self.check_param_at_call(
&mut state,
param,
&call.tid,
Some(&extern_symbol.name),
);
}
} else {
self.log_debug(
&call.tid,
"Call stub edge without associated extern symbol encountered.",
);
}
}
Jmp::CallInd { .. } => {
if let Some(cconv) = self.project.get_standard_calling_convention() {
for param in &cconv.integer_parameter_register {
let param_arg = Arg::from_var(param.clone(), None);
self.check_param_at_call(&mut state, &param_arg, &call.tid, None);
}
}
}
_ => (),
}
Some(state)
}
/// Just return the given state without modification.
fn specialize_conditional(
&self,
state: &State,
_condition: &Expression,
_block_before_condition: &Term<Blk>,
_is_true: bool,
) -> Option<State> {
Some(state.clone())
}
}
//! This module implements a check for CWE-119: Buffer Overflow
//! and its variants CWE-125: Out-of-bounds Read and CWE-787: Out-of-bounds Write.
//!
//! Arrays or buffers of any kind are often accessed through indices.
//! If the index of an access is outside of the bounds of the buffer this can lead to severe consequences.
//! In the case of out-of-bounds read accesses this often leads to exposure of sensitive information to an attacker.
//! Out-of-bounds write accesses can often be used to hijack the control flow of a program
//! and thus may lead to arbitrary code execution.
//!
//! See <https://cwe.mitre.org/data/definitions/119.html> for a detailed description.
//!
//! ## How the check works
//!
//! The check uses the results of the [Pointer Inference analysis](`crate::analysis::pointer_inference`)
//! to check whether any memory accesses may point outside of the bounds of the corresponding memory objects.
//! For this the results of the Pointer Inference analysis are aggregated interprocedurally.
//! Additionally, the check uses a lightweight intraprocedural dataflow fixpoint computation
//! to ensure that for each memory object only the first access outside of its bounds is flagged as a CWE.
//!
//! ## False Positives
//!
//! - Any analysis imprecision of the Pointer Inference analysis may lead to false positive results in this check.
//! - If no exact bounds for a memory object could be inferred then the strictest bounds found are used,
//! which can lead to false positive warnings.
//!
//! ## False Negatives
//!
//! - In cases where the Pointer Inference analysis could not infer any bounds at all for the memory object or the access index
//! this check generally assumes analysis imprecision as the culprit and will not flag them as CWEs.
//! This leads to false negatives, especially in cases where the bounds directly depend on user input.
//! - The Pointer Inference analysis cannot distinguish different objects located on the same stack frame.
//! Thus buffer overflows on the stack can only be detected if they may reach outside of the whole stack frame.
//! This leads to false negatives, especially for buffer overflows caused by off-by-one bugs.
//! - For parameters of extern function calls the check only checks whether the parameter itself may point outside of the boundaries of a memory object.
//! But since we generally do not know what size the called function expects the pointed-to object to have
//! this still may miss buffer overflows occuring in the called function.
//! - Right now the check only considers buffers on the stack or the heap, but not buffers in global memory.
//! Thus corresponding overflows of buffers in global memory are not detected.
use crate::analysis::pointer_inference::Data;
use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage, LogThread};
use crate::CweModule;
mod context;
use context::Context;
mod state;
use state::State;
/// The module name and version
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE119",
version: "0.3",
run: check_cwe,
};
/// Run the check for CWE-119: Buffer Overflows.
///
/// This function prepares the fixpoint computation that computes the CWE warnings by setting the start states for all function starts.
/// Then the fixpoint computation is executed.
/// Afterwards, the collected logs and CWE warnings are collected from a separate logging thread and returned.
pub fn check_cwe(
analysis_results: &AnalysisResults,
_config: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let log_thread = LogThread::spawn(LogThread::collect_and_deduplicate);
let context = Context::new(
analysis_results.project,
analysis_results.control_flow_graph,
analysis_results.pointer_inference.unwrap(),
analysis_results.function_signatures.unwrap(),
analysis_results,
log_thread.get_msg_sender(),
);
let mut fixpoint_computation =
crate::analysis::forward_interprocedural_fixpoint::create_computation(context, None);
for (sub_tid, entry_node_of_sub) in
crate::analysis::graph::get_entry_nodes_of_subs(analysis_results.control_flow_graph)
{
if let Some(function_sig) = analysis_results.function_signatures.unwrap().get(&sub_tid) {
let fn_start_state = State::new(&sub_tid, function_sig, analysis_results.project);
fixpoint_computation.set_node_value(
entry_node_of_sub,
crate::analysis::interprocedural_fixpoint_generic::NodeValue::Value(fn_start_state),
);
}
}
fixpoint_computation.compute_with_max_steps(100);
log_thread.collect()
}
......@@ -453,7 +453,7 @@ mod tests {
assert_eq!(
context.check_parameters_for_taint(
&state,
&ExternSymbol::mock_x64(),
&ExternSymbol::mock_x64("mock_symbol"),
NodeIndex::new(0)
),
false
......@@ -466,7 +466,7 @@ mod tests {
assert_eq!(
context.check_parameters_for_taint(
&state,
&ExternSymbol::mock_x64(),
&ExternSymbol::mock_x64("mock_symbol"),
NodeIndex::new(0)
),
true
......
......@@ -349,11 +349,11 @@ mod tests {
}
impl ExternSymbol {
pub fn mock_x64() -> ExternSymbol {
pub fn mock_x64(name: impl ToString) -> ExternSymbol {
ExternSymbol {
tid: Tid::new("mock_symbol"),
tid: Tid::new(name.to_string()),
addresses: vec!["UNKNOWN".to_string()],
name: "mock_symbol".to_string(),
name: name.to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
......@@ -362,11 +362,11 @@ mod tests {
}
}
pub fn mock_arm32() -> ExternSymbol {
pub fn mock_arm32(name: impl ToString) -> ExternSymbol {
ExternSymbol {
tid: Tid::new("mock_symbol"),
tid: Tid::new(name.to_string()),
addresses: vec!["UNKNOWN".to_string()],
name: "mock_symbol".to_string(),
name: name.to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("r0", 4)],
return_values: vec![Arg::mock_register("r0", 4)],
......
......@@ -116,6 +116,7 @@ impl std::fmt::Display for CweModule {
pub fn get_modules() -> Vec<&'static CweModule> {
vec![
&crate::checkers::cwe_78::CWE_MODULE,
&crate::checkers::cwe_119::CWE_MODULE,
&crate::checkers::cwe_134::CWE_MODULE,
&crate::checkers::cwe_190::CWE_MODULE,
&crate::checkers::cwe_215::CWE_MODULE,
......
......@@ -248,6 +248,18 @@ pub enum LogThreadMsg {
Terminate,
}
impl From<LogMessage> for LogThreadMsg {
fn from(msg: LogMessage) -> Self {
Self::Log(msg)
}
}
impl From<CweWarning> for LogThreadMsg {
fn from(warning: CweWarning) -> Self {
Self::Cwe(warning)
}
}
/// A type for managing threads for collecting log messages.
///
/// With [`LogThread::spawn()`] one can create a new log thread
......@@ -287,6 +299,8 @@ impl LogThread {
/// I.e. the function should receive messages through the given receiver until the channel disconnects
/// or until it receives a [`LogThreadMsg::Terminate`] message.
/// After that it should return the logs collected up to that point.
///
/// See [`LogThread::collect_and_deduplicate`] for a standard collector function that can be used here.
pub fn spawn<F>(collector_func: F) -> LogThread
where
F: FnOnce(crossbeam_channel::Receiver<LogThreadMsg>) -> (Vec<LogMessage>, Vec<CweWarning>)
......@@ -323,4 +337,50 @@ impl LogThread {
(Vec::new(), Vec::new())
}
}
/// This function is collects logs from the given receiver until a [`LogThreadMsg::Terminate`] signal is received.
/// All collected logs are deduplicated before being returned.
///
/// CWE warnings and log messages are deduplicated if two messages share the same address of origin.
/// In such a case only the last message received is kept.
/// If a CWE message has more than one address only the first address is considered when deduplicating.
/// Note that this may lead to information loss if log messages with the same origin address that are not duplicates are generated.
///
/// This function can be used as a standard collector function for [`LogThread::spawn`].
pub fn collect_and_deduplicate(
receiver: crossbeam_channel::Receiver<LogThreadMsg>,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let mut logs_with_address = BTreeMap::new();
let mut general_logs = Vec::new();
let mut collected_cwes = BTreeMap::new();
while let Ok(log_thread_msg) = receiver.recv() {
match log_thread_msg {
LogThreadMsg::Log(log_message) => {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
LogThreadMsg::Cwe(cwe_warning) => match &cwe_warning.addresses[..] {
[] => panic!("Unexpected CWE warning without origin address"),
[address, ..] => {
collected_cwes.insert(address.clone(), cwe_warning);
}
},
LogThreadMsg::Terminate => break,
}
}
let logs = logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.collect();
let cwes = collected_cwes
.into_iter()
.map(|(_key, value)| value)
.collect();
(logs, cwes)
}
}
......@@ -85,7 +85,7 @@ impl CweTestCase {
}
/// Mark test cases using the given CPU architecture as `skipped`.
pub fn mark_architecture_skipped(test_cases: &mut Vec<CweTestCase>, arch: &str) {
pub fn mark_architecture_skipped(test_cases: &mut [CweTestCase], arch: &str) {
for test in test_cases.iter_mut() {
if test.architecture == arch {
test.skipped = true;
......@@ -94,7 +94,7 @@ pub fn mark_architecture_skipped(test_cases: &mut Vec<CweTestCase>, arch: &str)
}
/// Mark test cases using the given compiler as `skipped`.
pub fn mark_compiler_skipped(test_cases: &mut Vec<CweTestCase>, comp: &str) {
pub fn mark_compiler_skipped(test_cases: &mut [CweTestCase], comp: &str) {
for test in test_cases.iter_mut() {
if test.compiler == comp {
test.skipped = true;
......@@ -103,7 +103,7 @@ pub fn mark_compiler_skipped(test_cases: &mut Vec<CweTestCase>, comp: &str) {
}
/// Mark test cases using the given CPU architecture + compiler combination as `skipped`.
pub fn mark_skipped(test_cases: &mut Vec<CweTestCase>, value1: &str, value2: &str) {
pub fn mark_skipped(test_cases: &mut [CweTestCase], value1: &str, value2: &str) {
for test in test_cases.iter_mut() {
if (test.architecture == value1 && test.compiler == value2)
|| (test.architecture == value2 && test.compiler == value1)
......@@ -239,7 +239,7 @@ mod tests {
#[ignore]
fn cwe_119() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory");
let mut tests = all_test_cases("cwe_119", "CWE119");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
......@@ -264,21 +264,20 @@ mod tests {
#[ignore]
fn cwe_125() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory");
mark_architecture_skipped(&mut tests, "mips"); // A second unrelated instance is found in "__do_global_ctors_aux".
mark_architecture_skipped(&mut tests, "mipsel"); // A second unrelated instance is found in "__do_global_ctors_aux".
let mut tests = all_test_cases("cwe_119", "CWE119");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "ppc", "gcc"); // Needs tracking of linear dependencies between register values.
mark_skipped(&mut tests, "x86", "gcc"); // Loss of stack register value since we do not track pointer alignment yet.
mark_skipped(&mut tests, "x86", "clang"); // A second unrelated instance is found in "__do_global_ctors_aux".
mark_skipped(&mut tests, "x86", "clang"); // Unrelated third CWE hit in `__libc_csu_init`
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
let num_expected_occurences = 2;
if let Err(error) = test_case.run_test("[CWE125]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
......@@ -626,7 +625,7 @@ mod tests {
#[ignore]
fn cwe_787() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory");
let mut tests = all_test_cases("cwe_119", "CWE119");
mark_skipped(&mut tests, "arm", "gcc"); // Needs tracking of linear dependencies between register values.
mark_skipped(&mut tests, "mips64", "gcc"); // Needs tracking of linear dependencies between register values.
......@@ -645,7 +644,7 @@ mod tests {
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
let num_expected_occurences = 2;
if let Err(error) = test_case.run_test("[CWE787]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment