Unverified Commit 64238b18 by Enkelmann Committed by GitHub

Reimplement CWE 476 check in Rust (#111)

parent 1e367e58
use cwe_checker_rs::intermediate_representation::Project;
use cwe_checker_rs::utils::log::print_all_messages;
use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_rs::AnalysisResults;
use std::collections::HashSet;
use std::path::Path;
use std::process::Command;
......@@ -138,6 +139,17 @@ fn run_with_ghidra(args: CmdlineArgs) {
let mut project = get_project_from_ghidra(&Path::new(&args.binary.unwrap()));
// Normalize the project and gather log messages generated from it.
let mut all_logs = project.normalize();
let mut analysis_results = AnalysisResults::new(&project);
let pointer_inference_results = if modules
.iter()
.any(|module| module.name == "CWE476" || module.name == "Memory")
{
Some(analysis_results.compute_pointer_inference(&config["Memory"]))
} else {
None
};
analysis_results = analysis_results.set_pointer_inference(pointer_inference_results.as_ref());
// Print debug and then return.
// Right now there is only one debug printing function.
......@@ -154,7 +166,7 @@ fn run_with_ghidra(args: CmdlineArgs) {
// Execute the modules and collect their logs and CWE-warnings.
let mut all_cwes = Vec::new();
for module in modules {
let (mut logs, mut cwes) = (module.run)(&project, &config[&module.name]);
let (mut logs, mut cwes) = (module.run)(&analysis_results, &config[&module.name]);
all_logs.append(&mut logs);
all_cwes.append(&mut cwes);
}
......
......@@ -203,17 +203,7 @@ impl RegisterDomain for BitvectorDomain {
BitvectorDomain::new_top(self.bytesize())
}
},
_ => match op {
Piece => BitvectorDomain::new_top(self.bytesize() + rhs.bytesize()),
IntAdd | IntSub | IntMult | IntDiv | IntSDiv | IntRem | IntSRem | IntLeft
| IntRight | IntSRight | IntAnd | IntOr | IntXOr | FloatAdd | FloatSub
| FloatMult | FloatDiv => BitvectorDomain::new_top(self.bytesize()),
IntEqual | IntNotEqual | IntLess | IntLessEqual | IntSLess | IntSLessEqual
| IntCarry | IntSCarry | IntSBorrow | BoolAnd | BoolOr | BoolXOr | FloatEqual
| FloatNotEqual | FloatLess | FloatLessEqual => {
BitvectorDomain::new_top(ByteSize::new(1))
}
},
_ => BitvectorDomain::new_top(self.bin_op_bytesize(op, rhs)),
}
}
......
......@@ -114,6 +114,12 @@ impl<T: AbstractDomain + HasByteSize + RegisterDomain + std::fmt::Debug> MemRegi
pub fn add(&mut self, value: T, position: Bitvector) {
assert_eq!(ByteSize::from(position.width()), self.address_bytesize);
let position = Int::from(position).try_to_i64().unwrap();
self.insert_at_byte_index(value, position);
}
/// Insert a value into the memory region at the given position.
/// The position is the index (in bytes) in the memory region.
pub fn insert_at_byte_index(&mut self, value: T, position: i64) {
let size_in_bytes = u64::from(value.bytesize()) as i64;
assert!(size_in_bytes > 0);
......
......@@ -69,4 +69,19 @@ pub trait RegisterDomain: AbstractDomain + HasByteSize + HasTop {
/// Perform a typecast to extend a bitvector or to cast between integer and floating point types.
fn cast(&self, kind: CastOpType, width: ByteSize) -> Self;
/// Return the bytesize of the result of the given binary operation.
/// Has a generic implementation that should not be overwritten!
fn bin_op_bytesize(&self, op: BinOpType, rhs: &Self) -> ByteSize {
use BinOpType::*;
match op {
Piece => self.bytesize() + rhs.bytesize(),
IntAdd | IntSub | IntMult | IntDiv | IntSDiv | IntRem | IntSRem | IntLeft
| IntRight | IntSRight | IntAnd | IntOr | IntXOr | FloatAdd | FloatSub | FloatMult
| FloatDiv => self.bytesize(),
IntEqual | IntNotEqual | IntLess | IntLessEqual | IntSLess | IntSLessEqual
| IntCarry | IntSCarry | IntSBorrow | BoolAnd | BoolOr | BoolXOr | FloatEqual
| FloatNotEqual | FloatLess | FloatLessEqual => ByteSize::new(1),
}
}
}
......@@ -23,14 +23,12 @@ pub struct Context<'a> {
pub project: &'a Project,
/// Maps the TIDs of functions that shall be treated as extern symbols to the `ExternSymbol` object representing it.
pub extern_symbol_map: BTreeMap<Tid, &'a ExternSymbol>,
/// A channel where found CWE warnings should be sent to.
/// A channel where found CWE warnings and log messages should be sent to.
/// The receiver may filter or modify the warnings before presenting them to the user.
/// For example, the same CWE warning will be found several times
/// if the fixpoint computation does not instantly stabilize at the corresponding code point.
/// These duplicates need to be filtered out.
pub cwe_collector: crossbeam_channel::Sender<CweWarning>,
/// A channel where log messages should be sent to.
pub log_collector: crossbeam_channel::Sender<LogMessage>,
pub log_collector: crossbeam_channel::Sender<LogThreadMsg>,
/// Names of `malloc`-like extern functions.
pub allocation_symbols: Vec<String>,
/// Names of `free`-like extern functions.
......@@ -43,8 +41,7 @@ impl<'a> Context<'a> {
pub fn new(
project: &Project,
config: Config,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
log_collector: crossbeam_channel::Sender<LogMessage>,
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
) -> Context {
let mut extern_symbol_map = BTreeMap::new();
for symbol in project.program.term.extern_symbols.iter() {
......@@ -63,7 +60,6 @@ impl<'a> Context<'a> {
graph,
project,
extern_symbol_map,
cwe_collector,
log_collector,
allocation_symbols: config.allocation_symbols,
deallocation_symbols: config.deallocation_symbols,
......@@ -78,7 +74,7 @@ impl<'a> Context<'a> {
if let Some(loc) = location {
log_message = log_message.location(loc.clone());
};
self.log_collector.send(log_message).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Log(log_message));
}
}
......@@ -196,7 +192,7 @@ impl<'a> Context<'a> {
call.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
} else {
self.log_debug(
......@@ -244,7 +240,7 @@ impl<'a> Context<'a> {
extern_symbol.name, call.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
}
Err(err) => self.log_debug(
......@@ -349,12 +345,7 @@ impl<'a> Context<'a> {
/// We also assume that the function does not use any parameters saved on the stack,
/// which may greatly reduce correctness of the analysis for the x86_32 architecture.
fn handle_call_to_generic_unknown_function(&self, state_before_call: &State) -> Option<State> {
if let Some(calling_conv) = self
.project
.calling_conventions
.iter()
.find(|cconv| cconv.name == "__stdcall")
{
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
let mut new_state = state_before_call.clone();
new_state.clear_non_callee_saved_register(&calling_conv.callee_saved_register[..]);
// Adjust stack register value (for x86 architecture).
......
......@@ -108,9 +108,8 @@ fn context_problem_implementation() {
use Expression::*;
let (project, config) = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, config, cwe_sender, log_sender);
let context = Context::new(&project, config, log_sender);
let mut state = State::new(&register("RSP"), Tid::new("main"));
let def = Term {
......@@ -271,9 +270,8 @@ fn update_return() {
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let (project, config) = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, config, cwe_sender, log_sender);
let context = Context::new(&project, config, log_sender);
let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
let mut state_before_return = context
.update_def(
......
......@@ -29,7 +29,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Context<'a> for Context<'a>
def.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
match &def.term {
......
......@@ -38,11 +38,11 @@ const VERSION: &str = "0.1";
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
name: "Memory",
version: VERSION,
run: run_analysis,
run: extract_pi_analysis_results,
};
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
pub type Data = DataDomain<BitvectorDomain>;
/// Configurable parameters for the analysis.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
......@@ -60,7 +60,8 @@ pub struct Config {
/// A wrapper struct for the pointer inference computation object.
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
pub collected_logs: (Vec<LogMessage>, Vec<CweWarning>),
}
impl<'a> PointerInference<'a> {
......@@ -68,10 +69,9 @@ impl<'a> PointerInference<'a> {
pub fn new(
project: &'a Project,
config: Config,
cwe_sender: crossbeam_channel::Sender<CweWarning>,
log_sender: crossbeam_channel::Sender<LogMessage>,
log_sender: crossbeam_channel::Sender<LogThreadMsg>,
) -> PointerInference<'a> {
let context = Context::new(project, config, cwe_sender, log_sender.clone());
let context = Context::new(project, config, log_sender.clone());
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
......@@ -108,12 +108,10 @@ impl<'a> PointerInference<'a> {
.collect();
let mut fixpoint_computation =
super::interprocedural_fixpoint::Computation::new(context, None);
log_sender
.send(LogMessage::new_debug(format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
)))
.unwrap();
let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
))));
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
......@@ -126,6 +124,7 @@ impl<'a> PointerInference<'a> {
PointerInference {
computation: fixpoint_computation,
log_collector: log_sender,
collected_logs: (Vec::new(), Vec::new()),
}
}
......@@ -175,6 +174,14 @@ impl<'a> PointerInference<'a> {
self.computation.get_graph()
}
pub fn get_context(&self) -> &Context {
self.computation.get_context()
}
pub fn get_node_value(&self, node_id: NodeIndex) -> Option<&NodeValue<State>> {
self.computation.get_node_value(node_id)
}
/// Add speculative entry points to the fixpoint algorithm state.
///
/// Since indirect jumps and calls are not handled yet (TODO: change that),
......@@ -258,92 +265,107 @@ impl<'a> PointerInference<'a> {
fn log_debug(&self, msg: impl Into<String>) {
let log_msg = LogMessage::new_debug(msg.into());
self.log_collector.send(log_msg).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Log(log_msg));
}
}
/// The main entry point for executing the pointer inference analysis.
pub fn run_analysis(
project: &Project,
analysis_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let config: Config = serde_json::from_value(analysis_params.clone()).unwrap();
run(project, config, false)
/// Compute the results of the pointer inference fixpoint algorithm.
/// Successively adds more functions as possible entry points
/// to increase code coverage.
pub fn compute_with_speculative_entry_points(&mut self, project: &Project) {
self.compute();
self.count_blocks_with_state();
// Now compute again with speculative entry points added
self.add_speculative_entry_points(project, true);
self.compute();
self.count_blocks_with_state();
// Now compute again with all missed functions as additional entry points
self.add_speculative_entry_points(project, false);
self.compute();
self.count_blocks_with_state();
}
}
/// Generate and execute the pointer inference analysis.
/// Returns a vector of all found CWE warnings and a vector of all log messages generated during analysis.
pub fn run(
project: &Project,
config: Config,
print_debug: bool,
/// The entry point for the memory analysis check.
/// Does not actually compute anything
/// but just extracts the results of the already computed pointer inference analysis.
pub fn extract_pi_analysis_results(
analysis_results: &AnalysisResults,
_analysis_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, log_receiver) = crossbeam_channel::unbounded();
let warning_collector_thread = std::thread::spawn(move || collect_cwe_warnings(cwe_receiver));
let log_collector_thread = std::thread::spawn(move || collect_logs(log_receiver));
let pi_anaylsis = analysis_results.pointer_inference.unwrap();
pi_anaylsis.collected_logs.clone()
}
{
// Scope the computation object so that it is dropped before the warning collector thread is joined.
// Else the warning collector thread will not terminate (the cwe_sender needs to be dropped for it to terminate).
let mut computation = PointerInference::new(project, config, cwe_sender, log_sender);
/// Compute the pointer inference analysis and return its results.
///
/// If `print_debug` is set to `true` print debug information to *stdout*.
/// Note that the format of the debug information is currently unstable and subject to change.
pub fn run(project: &Project, config: Config, print_debug: bool) -> PointerInference {
let logging_thread = LogThread::spawn(collect_all_logs);
computation.compute();
computation.count_blocks_with_state();
let mut computation = PointerInference::new(project, config, logging_thread.get_msg_sender());
// Now compute again with speculative entry points added
computation.add_speculative_entry_points(project, true);
computation.compute();
computation.count_blocks_with_state();
computation.compute_with_speculative_entry_points(project);
// Now compute again with all missed functions as additional entry points
computation.add_speculative_entry_points(project, false);
computation.compute();
computation.count_blocks_with_state();
if print_debug {
computation.print_compact_json();
}
if print_debug {
computation.print_compact_json();
}
// Return the CWE warnings
(
log_collector_thread.join().unwrap(),
warning_collector_thread.join().unwrap(),
)
}
/// Collect CWE warnings from the receiver until the channel is closed. Then return them.
fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Vec<CweWarning> {
let mut collected_warnings = HashMap::new();
while let Ok(warning) = receiver.recv() {
match &warning.addresses[..] {
[] => unimplemented!(),
[address, ..] => {
collected_warnings.insert(address.clone(), warning);
}
}
}
collected_warnings
.drain()
.map(|(_key, value)| value)
.collect()
// save the logs and CWE warnings
computation.collected_logs = logging_thread.collect();
computation
}
/// Collect log messages from the receiver until the channel is closed. Then return them.
fn collect_logs(receiver: crossbeam_channel::Receiver<LogMessage>) -> Vec<LogMessage> {
/// This function is responsible for collecting logs and CWE warnings.
/// For warnings with the same origin address only the last one is kept.
/// This prevents duplicates but may suppress some log messages
/// in the rare case that several different log messages with the same origin address are generated.
fn collect_all_logs(
receiver: crossbeam_channel::Receiver<LogThreadMsg>,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let mut logs_with_address = HashMap::new();
let mut general_logs = Vec::new();
while let Ok(log_message) = receiver.recv() {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
let mut collected_cwes = HashMap::new();
while let Ok(log_thread_msg) = receiver.recv() {
match log_thread_msg {
LogThreadMsg::Log(log_message) => {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
LogThreadMsg::Cwe(cwe_warning) => match &cwe_warning.addresses[..] {
[] => panic!("Unexpected CWE warning without origin address"),
[address, ..] => {
collected_cwes.insert(address.clone(), cwe_warning);
}
},
LogThreadMsg::Terminate => break,
}
}
logs_with_address
let logs = logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.collect()
.collect();
let cwes = collected_cwes.drain().map(|(_key, value)| value).collect();
(logs, cwes)
}
#[cfg(test)]
mod tests {
use super::*;
impl<'a> PointerInference<'a> {
pub fn mock(project: &'a Project) -> PointerInference<'a> {
let config = Config {
allocation_symbols: vec!["malloc".to_string()],
deallocation_symbols: vec!["free".to_string()],
};
let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(project, config, log_sender)
}
}
}
......@@ -2,6 +2,7 @@ pub mod cwe_190;
pub mod cwe_332;
pub mod cwe_426;
pub mod cwe_467;
pub mod cwe_476;
pub mod cwe_560;
pub mod cwe_676;
pub mod cwe_782;
......@@ -100,9 +100,10 @@ fn generate_cwe_warning(callsite: &Tid, called_symbol: &ExternSymbol) -> CweWarn
/// For each call to one of the symbols configured in config.json
/// we check whether the block containing the call also contains a multiplication instruction.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
let symbol_map = get_symbol_map(project, &config.symbols);
......
......@@ -20,7 +20,6 @@
//!
//! - It is not checked whether the seeding function gets called before the random number generator function.
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::find_symbol;
......@@ -56,9 +55,10 @@ fn generate_cwe_warning(secure_initializer_func: &str, rand_func: &str) -> CweWa
/// Run the CWE check. See the module-level description for more information.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
......
......@@ -70,9 +70,10 @@ fn generate_cwe_warning(sub: &Term<Sub>) -> CweWarning {
/// We check whether a function calls both `system(..)` and a privilege changing function.
/// For each such function a CWE warning is generated.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
let mut privilege_changing_symbols = HashMap::new();
......
......@@ -103,9 +103,10 @@ fn generate_cwe_warning(jmp: &Term<Jmp>, extern_symbol: &ExternSymbol) -> CweWar
/// we check whether a parameter has value `sizeof(void*)`,
/// which may indicate an instance of CWE 467.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
......
//! This module implements a check for CWE-476: NULL Pointer Dereference.
//!
//! Functions like `malloc()` may return NULL values instead of pointers to indicate
//! failed calls. If one tries to access memory through this return value without
//! checking it for being NULL first, this can crash the program.
//!
//! See <https://cwe.mitre.org/data/definitions/476.html> for a detailed description.
//!
//! ## How the check works
//!
//! Using dataflow analysis we search for an execution path where a memory access using the return value of
//! a symbol happens before the return value is checked through a conditional jump instruction.
//!
//! ### Symbols configurable in config.json
//!
//! The symbols are the functions whose return values are assumed to be potential
//! NULL pointers.
//!
//! ## False Positives
//!
//! - If a possible NULL pointer is temporarily saved in a memory location
//! that the [Pointer Inference analysis](crate::analysis::pointer_inference) could not track,
//! the analysis may miss a correct NULL pointer check and thus generate false positives.
//! - The analysis is intraprocedural.
//! If a parameter to a function is a potential NULL pointer,
//! this gets flagged as a CWE hit even if the function may expect NULL pointers in its parameters.
//! If a function returns a potential NULL pointer this gets flagged as a CWE hit,
//! although the function may be supposed to return potential NULL pointers.
//!
//! ## False Negatives
//!
//! - We do not check whether an access to a potential NULL pointer happens regardless
//! of a prior check.
//! - We do not check whether the conditional jump instruction checks specifically
//! for the return value being NULL or something else
//! - For functions with more than one return value we do not distinguish between
//! the return values.
use crate::analysis::graph::{Edge, Node};
use crate::analysis::interprocedural_fixpoint::Computation;
use crate::analysis::interprocedural_fixpoint::Context as _;
use crate::analysis::interprocedural_fixpoint::NodeValue;
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage};
use crate::CweModule;
use petgraph::visit::EdgeRef;
use std::collections::HashMap;
mod state;
use state::*;
mod taint;
use taint::*;
mod context;
use context::*;
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE476",
version: "0.3",
run: check_cwe,
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Config {
/// The names of symbols for which the analysis should check
/// whether the return values are checked for being a Null pointer by the analysed binary.
symbols: Vec<String>,
}
/// Run the CWE check.
/// We check whether the return values of symbols configurable in the config file are being checked for Null pointers
/// before any memory access (and thus potential Null pointer dereferences) through these values happen.
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let symbol_map = crate::utils::symbol_utils::get_symbol_map(project, &config.symbols[..]);
let general_context = Context::new(project, &pointer_inference_results, cwe_sender);
for edge in general_context.get_graph().edge_references() {
if let Edge::ExternCallStub(jmp) = edge.weight() {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = symbol_map.get(target) {
let node = edge.target();
let current_sub = match general_context.get_graph()[node] {
Node::BlkStart(_blk, sub) => sub,
_ => panic!(),
};
let mut context = general_context.clone();
context.set_taint_source(jmp, current_sub);
let pi_state_at_taint_source =
match pointer_inference_results.get_node_value(node) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
};
let mut computation = Computation::new(context, None);
computation.set_node_value(
node,
NodeValue::Value(State::new(
symbol,
&project.stack_pointer_register,
pi_state_at_taint_source.as_ref(),
)),
);
computation.compute_with_max_steps(100);
}
}
}
}
let mut cwe_warnings = HashMap::new();
for cwe in cwe_receiver.try_iter() {
match &cwe.addresses[..] {
[taint_source_address, ..] => cwe_warnings.insert(taint_source_address.clone(), cwe),
_ => panic!(),
};
}
let cwe_warnings = cwe_warnings.into_iter().map(|(_, cwe)| cwe).collect();
(Vec::new(), cwe_warnings)
}
use super::State;
use super::Taint;
use super::CWE_MODULE;
use crate::abstract_domain::AbstractDomain;
use crate::analysis::graph::{Graph, Node};
use crate::analysis::interprocedural_fixpoint::Context as _;
use crate::analysis::interprocedural_fixpoint::NodeValue;
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::CweWarning;
use petgraph::graph::NodeIndex;
use petgraph::visit::IntoNodeReferences;
use std::collections::HashMap;
use std::sync::Arc;
/// The context object for the Null-Pointer-Dereference check.
///
/// There is always only one source of taint for the analysis.
/// On creation of a `Context` object, the taint source is not set.
/// Starting the fixpoint algorithm without
/// [setting the taint source](Context::set_taint_source()) first will lead to a panic.
/// By resetting the taint source one can reuse the context object for several fixpoint computations.
#[derive(Clone)]
pub struct Context<'a> {
/// A pointer to the corresponding project struct.
project: &'a Project,
/// A pointer to the results of the pointer inference analysis.
/// They are used to determine the targets of pointers to memory,
/// which in turn is used to keep track of taint on the stack or on the heap.
pub pointer_inference_results: &'a PointerInferenceComputation<'a>,
/// A map to get the node index of the `BlkStart` node containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
block_start_node_map: Arc<HashMap<(Tid, Tid), NodeIndex>>,
/// Maps the TID of an extern symbol to the extern symbol struct.
extern_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
jmp_to_blk_end_node_map: Arc<HashMap<(Tid, Tid), NodeIndex>>,
/// The call whose return values are the sources for taint for the analysis.
taint_source: Option<&'a Term<Jmp>>,
/// The name of the function, whose return values are the taint sources.
taint_source_name: Option<String>,
/// The current subfunction.
///Since the analysis is intraprocedural,
///all nodes with state during the fixpoint algorithm should belong to this function.
current_sub: Option<&'a Term<Sub>>,
/// A channel where found CWE hits can be sent to.
cwe_collector: crossbeam_channel::Sender<CweWarning>,
}
impl<'a> Context<'a> {
/// Create a new context object.
///
/// Note that one has to set the taint source separately before starting the analysis!
///
/// If one wants to run the analysis for several sources,
/// one should clone or reuse an existing `Context` object instead of generating new ones,
/// since this function can be expensive!
pub fn new(
project: &'a Project,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
) -> Self {
let mut block_start_node_map = HashMap::new();
let mut jmp_to_blk_end_node_map = HashMap::new();
let graph = pointer_inference_results.get_graph();
for (node_id, node) in graph.node_references() {
match node {
Node::BlkStart(block, sub) => {
if let Some(def) = block.term.defs.get(0) {
block_start_node_map.insert((def.tid.clone(), sub.tid.clone()), node_id);
}
}
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
let mut extern_symbol_map = HashMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
Context {
project,
pointer_inference_results,
block_start_node_map: Arc::new(block_start_node_map),
extern_symbol_map: Arc::new(extern_symbol_map),
jmp_to_blk_end_node_map: Arc::new(jmp_to_blk_end_node_map),
taint_source: None,
taint_source_name: None,
current_sub: None,
cwe_collector,
}
}
/// Set the taint source and the current function for the analysis.
pub fn set_taint_source(&mut self, taint_source: &'a Term<Jmp>, current_sub: &'a Term<Sub>) {
let taint_source_name = match &taint_source.term {
Jmp::Call { target, .. } => self
.project
.program
.term
.extern_symbols
.iter()
.find_map(|symb| {
if symb.tid == *target {
Some(symb.name.clone())
} else {
None
}
})
.unwrap_or_else(|| "Unknown".to_string()),
_ => "Unknown".to_string(),
};
self.taint_source = Some(taint_source);
self.taint_source_name = Some(taint_source_name);
self.current_sub = Some(current_sub);
}
/// Get the current pointer inference state (if one can be found) for the given taint state.
fn get_current_pointer_inference_state(
&self,
state: &State,
tid: &Tid,
) -> Option<PointerInferenceState> {
if let Some(pi_state) = state.get_pointer_inference_state() {
Some(pi_state.clone())
} else if let Some(node_id) = self
.block_start_node_map
.get(&(tid.clone(), self.current_sub.unwrap().tid.clone()))
{
match self.pointer_inference_results.get_node_value(*node_id) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
}
} else {
None
}
}
/// Update the pointer inference state contained in the given taint state
/// according to the effect of the given `Def` term.
fn update_pointer_inference_state(&self, state: &mut State, def: &Term<Def>) {
if let Some(pi_state) = self.get_current_pointer_inference_state(state, &def.tid) {
let pi_context = self.pointer_inference_results.get_context();
let new_pi_state = pi_context.update_def(&pi_state, def);
state.set_pointer_inference_state(new_pi_state);
}
}
/// Generate a CWE warning for the taint source of the context object.
fn generate_cwe_warning(&self, taint_access_location: &Tid) {
let taint_source = self.taint_source.unwrap();
let taint_source_name = self.taint_source_name.clone().unwrap();
let cwe_warning = CweWarning::new(CWE_MODULE.name, CWE_MODULE.version,
format!("(NULL Pointer Dereference) There is no check if the return value is NULL at {} ({}).",
taint_source.tid.address, taint_source_name))
.addresses(vec![taint_source.tid.address.clone(), taint_access_location.address.clone()])
.tids(vec![format!("{}", taint_source.tid), format!("{}", taint_access_location)])
.symbols(vec![taint_source_name]);
let _ = self.cwe_collector.send(cwe_warning);
}
/// Check whether the given function parameter contains taint
/// when evaluating it on the given state.
///
/// The `node_id` is used to find the correct pointer inference state.
pub fn check_parameter_arg_for_taint(
&self,
parameter: &Arg,
state: &State,
node_id: NodeIndex,
) -> Taint {
match parameter {
Arg::Register(var) => state.eval(&Expression::Var(var.clone())),
Arg::Stack { offset, size } => {
if let Some(NodeValue::Value(pi_state)) =
self.pointer_inference_results.get_node_value(node_id)
{
if let Ok(stack_address) = pi_state.eval(&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(self.project.stack_pointer_register.clone())),
rhs: Box::new(Expression::Const(
Bitvector::from_i64(*offset)
.into_truncate(apint::BitWidth::from(
self.project.stack_pointer_register.size,
))
.unwrap(),
)),
}) {
state.load_taint_from_memory(&stack_address, *size)
} else {
Taint::Top(*size)
}
} else {
Taint::Top(*size)
}
}
}
}
/// If a possible parameter register of the call contains taint,
/// generate a CWE warning and return `None`.
/// Else remove all taint contained in non-callee-saved registers.
fn handle_generic_call(&self, state: &State, call_tid: &Tid) -> Option<State> {
// TODO: We do not yet check recursively for taint contained in objects pointed to by parameters.
if state.check_generic_function_params_for_taint(self.project) {
self.generate_cwe_warning(call_tid);
return None;
}
let mut new_state = state.clone();
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
new_state.remove_non_callee_saved_taint(calling_conv);
}
Some(new_state)
}
}
impl<'a> crate::analysis::interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
/// Get the underlying graph of the fixpoint computation
fn get_graph(&self) -> &Graph<'a> {
self.pointer_inference_results.get_graph()
}
/// Merge two states
fn merge(&self, state1: &State, state2: &State) -> State {
state1.merge(state2)
}
/// Just returns a copy of the input state.
fn specialize_conditional(
&self,
state: &State,
_condition: &Expression,
_is_true: bool,
) -> Option<State> {
Some(state.clone())
}
/// Generate a CWE warning if taint may be contained in the function parameters.
/// Always returns `None` so that the analysis stays intraprocedural.
fn update_call(&self, state: &State, call: &Term<Jmp>, _target: &Node) -> Option<Self::Value> {
if state.check_generic_function_params_for_taint(self.project) {
self.generate_cwe_warning(&call.tid);
}
// TODO: We do not yet check recursively for taint contained in objects pointed to by parameters.
None
}
/// If taint may be contained in the function parameters, generate a CWE warning and return None.
/// Else remove taint from non-callee-saved registers.
fn update_call_stub(&self, state: &State, call: &Term<Jmp>) -> Option<Self::Value> {
// TODO: We do not yet check recursively for taint contained in objects pointed to by parameters.
if state.is_empty() {
return None;
}
match &call.term {
Jmp::Call { target, .. } => {
if let Some(extern_symbol) = self.extern_symbol_map.get(target) {
let blk_end_node_id = self
.jmp_to_blk_end_node_map
.get(&(call.tid.clone(), self.current_sub.unwrap().tid.clone()))
.unwrap();
for parameter in extern_symbol.parameters.iter() {
if !self
.check_parameter_arg_for_taint(parameter, state, *blk_end_node_id)
.is_top()
{
self.generate_cwe_warning(&call.tid);
return None;
}
}
let mut new_state = state.clone();
new_state.remove_non_callee_saved_taint(
extern_symbol.get_calling_convention(self.project),
);
Some(new_state)
} else {
panic!("Extern symbol not found.");
}
}
Jmp::CallInd { .. } => self.handle_generic_call(state, &call.tid),
_ => panic!("Malformed control flow graph encountered."),
}
}
/// Update the taint state according to the effects of the given [`Def`].
/// If tainted memory is accessed through a load or store instruction
/// generate a CWE warning and return `None`.
fn update_def(&self, state: &State, def: &Term<Def>) -> Option<Self::Value> {
if state.is_empty() {
// Without taint there is nothing to propagate.
return None;
}
let mut new_state = state.clone();
match &def.term {
Def::Assign { var, value } => {
new_state.set_register_taint(var, state.eval(value));
}
Def::Load { var, address } => {
if state.eval(address).is_tainted() {
self.generate_cwe_warning(&def.tid);
return None;
} else if let Some(pi_state) =
self.get_current_pointer_inference_state(state, &def.tid)
{
if let Ok(address_data) = pi_state.eval(address) {
let taint = state.load_taint_from_memory(&address_data, var.size);
new_state.set_register_taint(var, taint);
}
} else {
new_state.set_register_taint(var, Taint::Top(var.size));
}
}
Def::Store { address, value } => {
if state.eval(address).is_tainted() {
self.generate_cwe_warning(&def.tid);
return None;
} else if let Some(pi_state) =
self.get_current_pointer_inference_state(state, &def.tid)
{
if let Ok(address_data) = pi_state.eval(address) {
let taint = state.eval(value);
new_state.save_taint_to_memory(&address_data, taint);
}
} else {
// We lost all knowledge about memory pointers.
// We delete all memory taint to reduce false positives.
new_state.remove_all_memory_taints();
}
}
}
self.update_pointer_inference_state(&mut new_state, def);
Some(new_state)
}
/// Update the state according to a jump instruction.
/// Checks whether the jump or the untaken conditional jump is a `CBranch` instruction
/// which checks a tainted value.
/// If yes, we assume that the taint source was correctly checked for being a Null pointer and return `None`.
/// If no we only remove the `pointer_inference_state` from the state.
fn update_jump(
&self,
state: &State,
jump: &Term<Jmp>,
untaken_conditional: Option<&Term<Jmp>>,
_target: &Term<Blk>,
) -> Option<Self::Value> {
if state.is_empty() {
// Without taint there is nothing to propagate.
return None;
}
if let Jmp::CBranch { condition, .. } = &jump.term {
if state.eval(condition).is_tainted() {
return None;
}
}
if let Some(untaken_jump) = untaken_conditional {
if let Jmp::CBranch { condition, .. } = &untaken_jump.term {
if state.eval(condition).is_tainted() {
return None;
}
}
}
let mut new_state = state.clone();
new_state.set_pointer_inference_state(None);
Some(new_state)
}
/// If `state_before_return` is set and contains taint,
/// generate a CWE warning (since the function may return a Null pointer in this case).
/// If `state_before_call` is set, handle it like a generic extern function call
/// (see [`update_call_stub`](Context::update_call_stub()) for more).
fn update_return(
&self,
state_before_return: Option<&State>,
state_before_call: Option<&State>,
call_term: &Term<Jmp>,
return_term: &Term<Jmp>,
) -> Option<State> {
if let Some(state) = state_before_return {
// If taint is returned, generate a CWE warning
if !state.is_empty() {
self.generate_cwe_warning(&return_term.tid)
}
// Do not return early in case `state_before_call` is also set (possible for recursive functions).
}
if let Some(state) = state_before_call {
self.handle_generic_call(state, &call_term.tid)
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
impl<'a> Context<'a> {
pub fn mock(
project: &'a Project,
pi_results: &'a PointerInferenceComputation<'a>,
) -> Context<'a> {
let (cwe_sender, _) = crossbeam_channel::unbounded();
let mut context = Context::new(project, pi_results, cwe_sender);
let taint_source = Box::new(Term {
tid: Tid::new("taint_source"),
term: Jmp::Call {
target: Tid::new("malloc"),
return_: None,
},
});
let taint_source = Box::leak(taint_source);
let current_sub = Box::new(Sub::mock("current_sub"));
let current_sub = Box::leak(current_sub);
context.set_taint_source(taint_source, current_sub);
context
}
}
#[test]
fn check_parameter_arg_for_taint() {
let project = Project::mock_empty();
let pi_results = PointerInferenceComputation::mock(&project);
let context = Context::mock(&project, &pi_results);
let (state, _pi_state) = State::mock_with_pi_state();
let arg = Arg::Register(Variable::mock("RAX", 8u64));
let param_taint = context.check_parameter_arg_for_taint(&arg, &state, NodeIndex::new(0));
assert!(param_taint.is_tainted());
let arg = Arg::Register(Variable::mock("RBX", 8u64));
let param_taint = context.check_parameter_arg_for_taint(&arg, &state, NodeIndex::new(0));
assert!(!param_taint.is_tainted());
}
#[test]
fn handle_generic_call() {
let project = Project::mock_empty();
let pi_results = PointerInferenceComputation::mock(&project);
let context = Context::mock(&project, &pi_results);
let mut state = State::mock();
assert!(context
.handle_generic_call(&state, &Tid::new("call_tid"))
.is_some());
state.set_register_taint(
&Variable::mock("RDX", 8u64),
Taint::Tainted(ByteSize::new(8)),
);
assert!(context
.handle_generic_call(&state, &Tid::new("call_tid"))
.is_none());
}
#[test]
fn update_def() {
let project = Project::mock_empty();
let pi_results = PointerInferenceComputation::mock(&project);
let context = Context::mock(&project, &pi_results);
let (mut state, pi_state) = State::mock_with_pi_state();
state.set_pointer_inference_state(Some(pi_state));
let assign_def = Term {
tid: Tid::new("def"),
term: Def::Assign {
var: Variable::mock("RCX", 8u64),
value: Expression::Var(Variable::mock("RAX", 8u64)),
},
};
let result = context.update_def(&state, &assign_def).unwrap();
assert!(result
.eval(&Expression::Var(Variable::mock("RCX", 8u64)))
.is_tainted());
assert!(result
.eval(&Expression::Var(Variable::mock("RSP", 8u64)))
.is_top());
let load_def = Term {
tid: Tid::new("def"),
term: Def::Load {
var: Variable::mock("RCX", 8u64),
address: Expression::Var(Variable::mock("RSP", 8u64)),
},
};
let result = context.update_def(&state, &load_def).unwrap();
assert!(result
.eval(&Expression::Var(Variable::mock("RCX", 8u64)))
.is_tainted());
assert!(result
.eval(&Expression::Var(Variable::mock("RSP", 8u64)))
.is_top());
let store_def = Term {
tid: Tid::new("def"),
term: Def::Store {
value: Expression::Var(Variable::mock("RCX", 8u64)),
address: Expression::Var(Variable::mock("RSP", 8u64)),
},
};
let result = context.update_def(&state, &store_def).unwrap();
let result = context.update_def(&result, &load_def).unwrap();
assert!(result
.eval(&Expression::Var(Variable::mock("RCX", 8u64)))
.is_top());
}
#[test]
fn update_jump() {
let project = Project::mock_empty();
let pi_results = PointerInferenceComputation::mock(&project);
let context = Context::mock(&project, &pi_results);
let (state, _pi_state) = State::mock_with_pi_state();
let jump = Term {
tid: Tid::new("jmp"),
term: Jmp::CBranch {
target: Tid::new("target"),
condition: Expression::Var(Variable::mock("RAX", 8u64)),
},
};
assert!(context
.update_jump(&state, &jump, None, &Blk::mock())
.is_none());
let jump = Term {
tid: Tid::new("jmp"),
term: Jmp::CBranch {
target: Tid::new("target"),
condition: Expression::Var(Variable::mock("RBX", 8u64)),
},
};
assert!(context
.update_jump(&state, &jump, None, &Blk::mock())
.is_some());
}
}
use crate::abstract_domain::{
AbstractDomain, AbstractIdentifier, BitvectorDomain, HasByteSize, MemRegion, RegisterDomain,
};
use crate::analysis::pointer_inference::Data;
use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::intermediate_representation::*;
use crate::prelude::*;
use std::collections::HashMap;
use super::Taint;
/// The state object of the taint analysis representing all known tainted memory and register values.
#[derive(Serialize, Deserialize, Debug, Eq, Clone)]
pub struct State {
/// The set of currently tainted registers.
register_taint: HashMap<Variable, Taint>,
/// The Taint contained in memory objects
memory_taint: HashMap<AbstractIdentifier, MemRegion<Taint>>,
/// The state of the pointer inference analysis.
/// Used only for preventing unneccessary recomputation during handling of `Def`s in a basic block.
/// It is set when handling `Def`s (except for the first `Def` in a block)
/// provided that a corresponding pointer inference analysis state exists.
/// Otherwise the field is ignored (including in the [merge](State::merge)-function) and usually set to `None`.
#[serde(skip_serializing)]
pointer_inference_state: Option<PointerInferenceState>,
}
impl PartialEq for State {
/// Two states are equal if the same values are tainted in both states.
///
/// The equality operator ignores the `pointer_inference_state` field,
/// since it only denotes an intermediate value.
fn eq(&self, other: &Self) -> bool {
self.register_taint == other.register_taint && self.memory_taint == other.memory_taint
}
}
impl AbstractDomain for State {
/// Merge two states.
/// Any value tainted in at least one input state is also tainted in the merged state.
///
/// The used algorithm for merging the taints contained in memory regions is unsound
/// when merging taints that intersect only partially.
/// However, this should not have an effect in practice,
/// since these values are usually unsound and unused by the program anyway.
fn merge(&self, other: &Self) -> Self {
let mut register_taint = self.register_taint.clone();
for (var, other_taint) in other.register_taint.iter() {
if let Some(taint) = self.register_taint.get(var) {
register_taint.insert(var.clone(), taint.merge(other_taint));
} else {
register_taint.insert(var.clone(), *other_taint);
}
}
let mut memory_taint = self.memory_taint.clone();
for (tid, other_mem_region) in other.memory_taint.iter() {
if let Some(mem_region) = memory_taint.get_mut(tid) {
for (index, taint) in other_mem_region.iter() {
mem_region.insert_at_byte_index(*taint, *index);
// Unsound in theory for partially intersecting taints. Should not matter in practice.
}
} else {
memory_taint.insert(tid.clone(), other_mem_region.clone());
}
}
State {
register_taint,
memory_taint,
pointer_inference_state: None, // At nodes this intermediate value can be safely forgotten.
}
}
/// The state has no explicit Top element.
fn is_top(&self) -> bool {
false
}
}
impl State {
/// Get a new state in which only the return values of the given extern symbol are tainted.
pub fn new(
taint_source: &ExternSymbol,
stack_pointer_register: &Variable,
pi_state: Option<&PointerInferenceState>,
) -> State {
let mut state = State {
register_taint: HashMap::new(),
memory_taint: HashMap::new(),
pointer_inference_state: None,
};
for return_arg in taint_source.return_values.iter() {
match return_arg {
Arg::Register(var) => {
state
.register_taint
.insert(var.clone(), Taint::Tainted(var.size));
}
Arg::Stack { offset, size } => {
if let Some(pi_state) = pi_state {
let address_exp = Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(stack_pointer_register.clone())),
rhs: Box::new(Expression::Const(
Bitvector::from_i64(*offset)
.into_truncate(apint::BitWidth::from(
stack_pointer_register.size,
))
.unwrap(),
)),
};
if let Ok(address) = pi_state.eval(&address_exp) {
state.save_taint_to_memory(&address, Taint::Tainted(*size));
}
}
}
}
}
state
}
/// Evaluate whether the result of the given expression is tainted in the current state.
pub fn eval(&self, expression: &Expression) -> Taint {
match expression {
Expression::Const(_) => Taint::Top(expression.bytesize()),
Expression::Var(var) => {
if self.register_taint.get(var).is_some() {
Taint::Tainted(var.size)
} else {
Taint::Top(var.size)
}
}
Expression::BinOp { op, lhs, rhs } => {
let lhs_taint = self.eval(lhs);
let rhs_taint = self.eval(rhs);
lhs_taint.bin_op(*op, &rhs_taint)
}
Expression::UnOp { op, arg } => self.eval(arg).un_op(*op),
Expression::Unknown { size, .. } => Taint::Top(*size),
Expression::Cast { op, size, arg } => self.eval(arg).cast(*op, *size),
Expression::Subpiece {
low_byte,
size,
arg,
} => self.eval(arg).subpiece(*low_byte, *size),
}
}
/// Get the current pointer inference state if it is contained as an intermediate value in the state.
pub fn get_pointer_inference_state(&self) -> Option<&PointerInferenceState> {
self.pointer_inference_state.as_ref()
}
/// Set the current pointer inference state for `self`.
pub fn set_pointer_inference_state(&mut self, pi_state: Option<PointerInferenceState>) {
self.pointer_inference_state = pi_state;
}
/// Return whether the value at the given address (with the given size) is tainted.
pub fn load_taint_from_memory(&self, address: &Data, size: ByteSize) -> Taint {
let mut taint = Taint::Top(size);
if let Data::Pointer(pointer) = address {
for (mem_id, offset) in pointer.targets().iter() {
if let (Some(mem_region), BitvectorDomain::Value(position)) =
(self.memory_taint.get(mem_id), offset)
{
taint = taint.merge(&mem_region.get(position.clone(), size));
}
}
}
taint
}
/// Mark the value at the given address with the given taint.
///
/// If the address may point to more than one object,
/// we merge the taint object with the object at the targets,
/// possibly tainting all possible targets.
pub fn save_taint_to_memory(&mut self, address: &Data, taint: Taint) {
if let Data::Pointer(pointer) = address {
if pointer.targets().len() == 1 {
for (mem_id, offset) in pointer.targets().iter() {
if let BitvectorDomain::Value(position) = offset {
if let Some(mem_region) = self.memory_taint.get_mut(mem_id) {
mem_region.add(taint, position.clone());
} else {
let mut mem_region = MemRegion::new(address.bytesize());
mem_region.add(taint, position.clone());
self.memory_taint.insert(mem_id.clone(), mem_region);
}
}
}
} else {
for (mem_id, offset) in pointer.targets().iter() {
if let BitvectorDomain::Value(position) = offset {
if let Some(mem_region) = self.memory_taint.get_mut(mem_id) {
let old_taint = mem_region.get(position.clone(), taint.bytesize());
mem_region.add(old_taint.merge(&taint), position.clone());
} else {
let mut mem_region = MemRegion::new(address.bytesize());
mem_region.add(taint, position.clone());
self.memory_taint.insert(mem_id.clone(), mem_region);
}
}
}
}
}
}
/// Remove all knowledge about taints contained in memory objects.
pub fn remove_all_memory_taints(&mut self) {
self.memory_taint = HashMap::new();
}
/// Set the taint of a register.
pub fn set_register_taint(&mut self, register: &Variable, taint: Taint) {
if taint.is_top() {
self.register_taint.remove(register);
} else {
self.register_taint.insert(register.clone(), taint);
}
}
/// Check whether a generic function call may contain tainted values in its parameters.
/// Since we don't know the actual calling convention of the call,
/// we approximate the parameters with all parameter registers of the standard calling convention of the project.
pub fn check_generic_function_params_for_taint(&self, project: &Project) -> bool {
if let Some(calling_conv) = project.get_standard_calling_convention() {
for (register, taint) in &self.register_taint {
if calling_conv
.parameter_register
.iter()
.any(|param| *param == register.name)
&& !taint.is_top()
{
return true;
}
}
false
} else {
// No standard calling convention found. Assume all registers may be parameters.
self.register_taint.values().any(|taint| !taint.is_top())
}
}
/// Remove the taint from all registers not contained in the callee-saved register list of the given calling convention.
pub fn remove_non_callee_saved_taint(&mut self, calling_conv: &CallingConvention) {
self.register_taint = self
.register_taint
.iter()
.filter_map(|(register, taint)| {
if calling_conv
.callee_saved_register
.iter()
.any(|callee_saved_reg| register.name == *callee_saved_reg)
{
Some((register.clone(), *taint))
} else {
None
}
})
.collect();
}
/// Check whether `self` contains any taint at all.
pub fn is_empty(&self) -> bool {
self.memory_taint.is_empty() && self.register_taint.is_empty()
}
}
impl State {
/// Get a more compact json-representation of the state.
/// Intended for pretty printing, not useable for serialization/deserialization.
#[allow(dead_code)]
pub fn to_json_compact(&self) -> serde_json::Value {
use serde_json::*;
use std::iter::FromIterator;
let register: Vec<(String, Value)> = self
.register_taint
.iter()
.map(|(var, data)| (var.name.clone(), json!(format!("{}", data))))
.collect();
let mut memory = Vec::new();
for (tid, mem_region) in self.memory_taint.iter() {
let mut elements = Vec::new();
for (offset, elem) in mem_region.iter() {
elements.push((offset.to_string(), json!(elem.to_string())));
}
memory.push((format!("{}", tid), Value::Object(Map::from_iter(elements))));
}
let mut state_map = Vec::new();
state_map.push((
"register".to_string(),
Value::Object(Map::from_iter(register)),
));
state_map.push(("memory".to_string(), Value::Object(Map::from_iter(memory))));
Value::Object(Map::from_iter(state_map))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::abstract_domain::*;
impl State {
pub fn mock() -> State {
State {
register_taint: HashMap::new(),
memory_taint: HashMap::new(),
pointer_inference_state: None,
}
}
pub fn mock_with_pi_state() -> (State, PointerInferenceState) {
let arg1 = Arg::Register(register("RAX"));
let arg2 = Arg::Stack {
offset: 0,
size: ByteSize::new(8),
};
let pi_state = PointerInferenceState::new(&register("RSP"), Tid::new("func"));
let symbol = ExternSymbol {
tid: Tid::new("extern_symbol".to_string()),
addresses: vec![],
name: "extern_symbol".into(),
calling_convention: None,
parameters: Vec::new(),
return_values: vec![arg1, arg2],
no_return: false,
};
let state = State::new(&symbol, &register("RSP"), Some(&pi_state));
(state, pi_state)
}
}
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
size: ByteSize::new(8),
is_temp: false,
}
}
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
fn new_id(name: &str) -> AbstractIdentifier {
AbstractIdentifier::new(
Tid::new("time0"),
AbstractLocation::Register(name.into(), ByteSize::new(8)),
)
}
fn new_pointer_domain(location: &str, offset: i64) -> PointerDomain<BitvectorDomain> {
let id = new_id(location);
PointerDomain::new(id, bv(offset))
}
#[test]
fn merge_state() {
let taint = Taint::Tainted(ByteSize::new(8));
let top = Taint::Top(ByteSize::new(8));
let mut state = State::mock();
state.set_register_taint(&register("RAX"), taint.clone());
let mut other_state = State::mock();
let address = Data::Pointer(new_pointer_domain("mem", 10));
other_state.save_taint_to_memory(&address, taint);
let merged_state = state.merge(&other_state);
assert_eq!(
merged_state.register_taint.get(&register("RAX")),
Some(&taint)
);
assert_eq!(merged_state.register_taint.get(&register("RBX")), None);
assert_eq!(
merged_state.load_taint_from_memory(&address, ByteSize::new(8)),
taint.clone()
);
let other_address = Data::Pointer(new_pointer_domain("mem", 18));
assert_eq!(
merged_state.load_taint_from_memory(&other_address, ByteSize::new(8)),
top.clone()
);
}
#[test]
fn new_state() {
let (state, pi_state) = State::mock_with_pi_state();
let taint = Taint::Tainted(ByteSize::new(8));
assert_eq!(state.register_taint.get(&register("RAX")), Some(&taint));
assert_eq!(state.register_taint.get(&register("RSP")), None);
let address = Expression::Var(register("RSP"));
assert_eq!(
state.load_taint_from_memory(&pi_state.eval(&address).unwrap(), ByteSize::new(8)),
taint
);
}
#[test]
fn eval_expression() {
let (state, _pi_state) = State::mock_with_pi_state();
let expr = Expression::BinOp {
lhs: Box::new(Expression::Var(register("RAX"))),
op: BinOpType::IntAdd,
rhs: Box::new(Expression::Var(register("RBX"))),
};
assert!(state.eval(&expr).is_tainted());
let expr = Expression::UnOp {
op: UnOpType::Int2Comp,
arg: Box::new(Expression::Var(register("RSP"))),
};
assert!(state.eval(&expr).is_top());
}
}
use crate::abstract_domain::{AbstractDomain, HasByteSize, HasTop, RegisterDomain};
use crate::intermediate_representation::*;
use crate::prelude::*;
use std::fmt::Display;
/// An abstract domain representing a value that is either tainted or not.
///
/// Note that the [merge](Taint::merge)-function does not respect the partial order
/// that is implied by the naming scheme of the variants!
/// In fact the whole analysis does not enforce any partial order for this domain.
/// This means that in theory the fixpoint computation may not actually converge to a fixpoint,
/// but in practice the analysis can make more precise decisions
/// whether a value should be tainted or not.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Taint {
/// A tainted value of a particular bytesize.
Tainted(ByteSize),
/// An untainted value of a particular bytesize
Top(ByteSize),
}
impl Display for Taint {
/// Print the value of a `Taint` object.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Tainted(size) => write!(f, "Tainted:{}", size),
Self::Top(size) => write!(f, "Top:{}", size),
}
}
}
impl AbstractDomain for Taint {
/// The result of merging two `Taint` values is tainted if at least one input was tainted.
fn merge(&self, other: &Self) -> Self {
use Taint::*;
match (self, other) {
(Tainted(size), _) | (_, Tainted(size)) => Tainted(*size),
_ => Top(self.bytesize()),
}
}
/// Checks whether the value is an untainted `Top`-value.
fn is_top(&self) -> bool {
matches!(self, Taint::Top(_))
}
}
impl HasByteSize for Taint {
/// The size in bytes of the `Taint` value.
fn bytesize(&self) -> ByteSize {
match self {
Self::Tainted(size) | Self::Top(size) => *size,
}
}
}
impl HasTop for Taint {
/// Get a new `Top`-value with the same bytesize as `self`.
fn top(&self) -> Self {
Self::Top(self.bytesize())
}
}
impl RegisterDomain for Taint {
/// Get a new `Top`-value with the given bytesize.
fn new_top(bytesize: ByteSize) -> Self {
Self::Top(bytesize)
}
/// The result of a binary operation is tainted if at least one input value was tainted.
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
match (self, rhs) {
(Self::Tainted(_), _) | (_, Self::Tainted(_)) => {
Self::Tainted(self.bin_op_bytesize(op, rhs))
}
_ => Self::Top(self.bin_op_bytesize(op, rhs)),
}
}
/// The result of a unary operation is tainted if the input was tainted.
fn un_op(&self, _op: UnOpType) -> Self {
*self
}
/// A subpiece of a tainted value is again tainted.
fn subpiece(&self, _low_byte: ByteSize, size: ByteSize) -> Self {
if let Self::Tainted(_) = self {
Self::Tainted(size)
} else {
Self::Top(size)
}
}
/// The result of a cast operation is tainted if the input was tainted.
fn cast(&self, _kind: CastOpType, width: ByteSize) -> Self {
if let Self::Tainted(_) = self {
Self::Tainted(width)
} else {
Self::Top(width)
}
}
}
impl Taint {
/// Checks whether the given value is in fact tainted.
pub fn is_tainted(&self) -> bool {
matches!(self, Taint::Tainted(_))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn abstract_domain() {
let taint = Taint::Tainted(ByteSize::new(4));
let top = Taint::Top(ByteSize::new(4));
assert_eq!(taint.merge(&top), taint);
assert_eq!(top.merge(&top), top);
assert_eq!(taint.is_top(), false);
}
#[test]
fn register_domain() {
use crate::intermediate_representation::*;
let taint = Taint::Tainted(ByteSize::new(4));
let top = Taint::Top(ByteSize::new(4));
assert_eq!(taint.bin_op(BinOpType::IntAdd, &top), taint);
assert_eq!(top.bin_op(BinOpType::IntMult, &top), top);
assert_eq!(taint.un_op(UnOpType::FloatFloor), taint);
assert_eq!(taint.subpiece(ByteSize::new(0), ByteSize::new(4)), taint);
assert_eq!(top.cast(CastOpType::IntZExt, ByteSize::new(4)), top);
}
}
......@@ -98,9 +98,10 @@ fn generate_cwe_warning(sub: &Term<Sub>, jmp: &Term<Jmp>, permission_const: u64)
///
/// Only the basic block right before the umask call is evaluated when trying to determine the parameter value of umask.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
_cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let mut cwes = Vec::new();
let mut log_messages = Vec::new();
let umask_symbol_map = get_symbol_map(project, &["umask".to_string()]);
......
......@@ -18,10 +18,11 @@ False Negatives
* None known
*/
use crate::prelude::*;
use std::collections::HashMap;
use crate::{
intermediate_representation::{ExternSymbol, Program, Project, Sub, Term, Tid},
intermediate_representation::{ExternSymbol, Program, Sub, Term, Tid},
utils::{
log::{CweWarning, LogMessage},
symbol_utils::get_calls_to_symbols,
......@@ -106,9 +107,10 @@ pub fn resolve_symbols<'a>(
}
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let prog: &Term<Program> = &project.program;
let subfunctions: &Vec<Term<Sub>> = &prog.term.subs;
......
......@@ -14,10 +14,11 @@ False Negatives:
* There are other ways to expose I/O control without access control.
*/
use crate::prelude::*;
use std::collections::HashMap;
use crate::{
intermediate_representation::{Program, Project, Sub, Term, Tid},
intermediate_representation::{Program, Sub, Term, Tid},
utils::{
log::{CweWarning, LogMessage},
symbol_utils::{find_symbol, get_calls_to_symbols},
......@@ -66,9 +67,10 @@ pub fn generate_cwe_warning(calls: &[(&str, &Tid, &str)]) -> Vec<CweWarning> {
}
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
_cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let prog: &Term<Program> = &project.program;
let mut warnings: Vec<CweWarning> = Vec::new();
if let Some((tid, name)) = find_symbol(prog, "ioctl") {
......
......@@ -17,7 +17,8 @@ fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarni
let config: crate::analysis::pointer_inference::Config =
serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone())
.unwrap();
let (mut logs, cwes) = crate::analysis::pointer_inference::run(&project, config, false);
let pi_analysis = crate::analysis::pointer_inference::run(&project, config, false);
let (mut logs, cwes) = pi_analysis.collected_logs;
all_logs.append(&mut logs);
(
cwes,
......
......@@ -331,6 +331,13 @@ impl Project {
pub fn get_pointer_bytesize(&self) -> ByteSize {
self.stack_pointer_register.size
}
/// Try to guess a standard calling convention from the list of calling conventions in the project.
pub fn get_standard_calling_convention(&self) -> Option<&CallingConvention> {
self.calling_conventions
.iter()
.find(|cconv| cconv.name == "__stdcall")
}
}
impl Project {
......@@ -437,6 +444,54 @@ impl Project {
mod tests {
use super::*;
impl Blk {
pub fn mock() -> Term<Blk> {
Term {
tid: Tid::new("block"),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
}
}
}
impl Sub {
pub fn mock(name: impl ToString) -> Term<Sub> {
Term {
tid: Tid::new(name.to_string()),
term: Sub {
name: name.to_string(),
blocks: Vec::new(),
},
}
}
}
impl Program {
pub fn mock_empty() -> Program {
Program {
subs: Vec::new(),
extern_symbols: Vec::new(),
entry_points: Vec::new(),
}
}
}
impl Project {
pub fn mock_empty() -> Project {
Project {
program: Term {
tid: Tid::new("program_tid"),
term: Program::mock_empty(),
},
cpu_architecture: "x86_64".to_string(),
stack_pointer_register: Variable::mock("RSP", 8u64),
calling_conventions: Vec::new(),
}
}
}
#[test]
fn retarget_nonexisting_jumps() {
let mut jmp_term = Term {
......
......@@ -15,3 +15,18 @@ pub struct Variable {
pub size: ByteSize,
pub is_temp: bool,
}
#[cfg(test)]
mod tests {
use super::*;
impl Variable {
pub fn mock(name: impl ToString, size_in_bytes: impl Into<ByteSize>) -> Variable {
Variable {
name: name.to_string(),
size: size_in_bytes.into(),
is_temp: false,
}
}
}
}
......@@ -7,6 +7,7 @@ Parts of the cwe_checker that are written in Rust.
#[macro_use]
extern crate ocaml;
use crate::analysis::pointer_inference::PointerInference;
use crate::intermediate_representation::Project;
use crate::utils::log::{CweWarning, LogMessage};
......@@ -27,11 +28,13 @@ mod prelude {
pub use crate::bil::{BitSize, Bitvector};
pub use crate::intermediate_representation::ByteSize;
pub use crate::intermediate_representation::{Term, Tid};
pub use crate::AnalysisResults;
pub use anyhow::{anyhow, Error};
}
/// The generic function signature for the main function of a CWE module
pub type CweModuleFn = fn(&Project, &serde_json::Value) -> (Vec<LogMessage>, Vec<CweWarning>);
pub type CweModuleFn =
fn(&AnalysisResults, &serde_json::Value) -> (Vec<LogMessage>, Vec<CweWarning>);
/// A structure containing general information about a CWE analysis module,
/// including the function to be called to run the analysis.
......@@ -55,9 +58,51 @@ pub fn get_modules() -> Vec<&'static CweModule> {
&crate::checkers::cwe_332::CWE_MODULE,
&crate::checkers::cwe_426::CWE_MODULE,
&crate::checkers::cwe_467::CWE_MODULE,
&crate::checkers::cwe_476::CWE_MODULE,
&crate::checkers::cwe_560::CWE_MODULE,
&crate::checkers::cwe_782::CWE_MODULE,
&crate::checkers::cwe_676::CWE_MODULE,
&crate::checkers::cwe_782::CWE_MODULE,
&crate::analysis::pointer_inference::CWE_MODULE,
]
}
/// A struct containing pointers to all known analysis results
/// that may be needed as input for other analyses and CWE checks.
#[derive(Clone, Copy)]
pub struct AnalysisResults<'a> {
/// A pointer to the project struct
pub project: &'a Project,
/// The result of the pointer inference analysis if already computed.
pub pointer_inference: Option<&'a PointerInference<'a>>,
}
impl<'a> AnalysisResults<'a> {
/// Create a new `AnalysisResults` struct with only the project itself known.
pub fn new(project: &'a Project) -> AnalysisResults<'a> {
AnalysisResults {
project,
pointer_inference: None,
}
}
/// Compute the pointer inference analysis.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_pointer_inference(&self, config: &serde_json::Value) -> PointerInference<'a> {
crate::analysis::pointer_inference::run(
self.project,
serde_json::from_value(config.clone()).unwrap(),
false,
)
}
/// Create a new `AnalysisResults` struct containing the given pointer inference analysis results.
pub fn set_pointer_inference<'b: 'a>(
self,
pi_results: Option<&'b PointerInference<'a>>,
) -> AnalysisResults<'b> {
AnalysisResults {
pointer_inference: pi_results,
..self
}
}
}
use crate::prelude::*;
use std::thread::JoinHandle;
/// A CWE warning message.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Default)]
......@@ -180,3 +181,94 @@ pub fn print_all_messages(
print!("{}", output);
}
}
/// The message types a logging thread can receive.
/// See the [`LogThread`] type for more information.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum LogThreadMsg {
/// A normal log message.
Log(LogMessage),
/// A CWE warning
Cwe(CweWarning),
/// If the log collector thread receives this signal,
/// it should stop receiving new messages
/// and instead terminate and return the collected messages prior to receiving the termination signal.
Terminate,
}
/// A type for managing threads for collecting log messages.
///
/// With [`LogThread::spawn()`] one can create a new log thread
/// whose handle is contained in the returned `LogThread` struct.
/// By calling the [`collect()`](LogThread::collect()) method
/// one can tell the log thread to shut down
/// and return the logs collected to this point.
/// If the `LogThread` object gets dropped before calling `collect()`,
/// the corresponding logging thread will be stopped
/// and all collected logs will be discarded.
///
/// If one deliberately wants to discard all logging messages,
/// one can simply create a sender to a disconnected channel
/// via [`LogThread::create_disconnected_sender()`].
pub struct LogThread {
msg_sender: crossbeam_channel::Sender<LogThreadMsg>,
thread_handle: Option<JoinHandle<(Vec<LogMessage>, Vec<CweWarning>)>>,
}
impl Drop for LogThread {
/// If the logging thread still exists,
/// send it the `Terminate` signal.
/// Then wait until the logging thread stopped.
fn drop(&mut self) {
// Make sure the logging thread gets terminated when dropping this.
let _ = self.msg_sender.send(LogThreadMsg::Terminate);
if let Some(handle) = self.thread_handle.take() {
let _ = handle.join();
}
}
}
impl LogThread {
/// Create a new `LogThread` object with a handle to a freshly spawned logging collector thread.
///
/// The parameter is the function containing the actual log collection logic.
/// I.e. the function should receive messages through the given receiver until the channel disconnects
/// or until it receives a [`LogThread::Terminate`] message.
/// After that it should return the logs collected up to that point.
pub fn spawn<F>(collector_func: F) -> LogThread
where
F: FnOnce(crossbeam_channel::Receiver<LogThreadMsg>) -> (Vec<LogMessage>, Vec<CweWarning>)
+ Send
+ 'static,
{
let (sender, receiver) = crossbeam_channel::unbounded();
let thread_handle = std::thread::spawn(move || collector_func(receiver));
LogThread {
msg_sender: sender,
thread_handle: Some(thread_handle),
}
}
/// Just create a disconnected sender to a (non-existing) logging thread.
/// Can be used like a sender to a channel that deliberately discards all messages sent to it.
pub fn create_disconnected_sender() -> crossbeam_channel::Sender<LogThreadMsg> {
let (sender, _) = crossbeam_channel::unbounded();
sender
}
/// Get a sender that can be used to send messages to the logging thread corresponding to this `LogThread` instance.
pub fn get_msg_sender(&self) -> crossbeam_channel::Sender<LogThreadMsg> {
self.msg_sender.clone()
}
/// Stop the logging thread by sending it the `Terminate` signal
/// and then return all logs collected until that point.
pub fn collect(mut self) -> (Vec<LogMessage>, Vec<CweWarning>) {
let _ = self.msg_sender.send(LogThreadMsg::Terminate);
if let Some(handle) = self.thread_handle.take() {
handle.join().unwrap()
} else {
(Vec::new(), Vec::new())
}
}
}
......@@ -363,6 +363,35 @@ mod tests {
#[test]
#[ignore]
fn cwe_476() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_476", "CWE476");
// TODO: Check reason for failure!
mark_skipped(&mut tests, "mips64", "gcc");
mark_skipped(&mut tests, "mips64el", "gcc");
mark_skipped(&mut tests, "mips", "gcc");
mark_skipped(&mut tests, "mipsel", "gcc");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
if let Err(error) = test_case.run_test("[CWE476]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
}
if !error_log.is_empty() {
print_errors(error_log);
panic!();
}
}
#[test]
#[ignore]
fn cwe_560() {
let mut error_log = Vec::new();
let mut tests = linux_test_cases("cwe_560", "CWE560");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment