Unverified Commit 64238b18 by Enkelmann Committed by GitHub

Reimplement CWE 476 check in Rust (#111)

parent 1e367e58
use cwe_checker_rs::intermediate_representation::Project;
use cwe_checker_rs::utils::log::print_all_messages;
use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_rs::AnalysisResults;
use std::collections::HashSet;
use std::path::Path;
use std::process::Command;
......@@ -138,6 +139,17 @@ fn run_with_ghidra(args: CmdlineArgs) {
let mut project = get_project_from_ghidra(&Path::new(&args.binary.unwrap()));
// Normalize the project and gather log messages generated from it.
let mut all_logs = project.normalize();
let mut analysis_results = AnalysisResults::new(&project);
let pointer_inference_results = if modules
.iter()
.any(|module| module.name == "CWE476" || module.name == "Memory")
{
Some(analysis_results.compute_pointer_inference(&config["Memory"]))
} else {
None
};
analysis_results = analysis_results.set_pointer_inference(pointer_inference_results.as_ref());
// Print debug and then return.
// Right now there is only one debug printing function.
......@@ -154,7 +166,7 @@ fn run_with_ghidra(args: CmdlineArgs) {
// Execute the modules and collect their logs and CWE-warnings.
let mut all_cwes = Vec::new();
for module in modules {
let (mut logs, mut cwes) = (module.run)(&project, &config[&module.name]);
let (mut logs, mut cwes) = (module.run)(&analysis_results, &config[&module.name]);
all_logs.append(&mut logs);
all_cwes.append(&mut cwes);
}
......
......@@ -203,17 +203,7 @@ impl RegisterDomain for BitvectorDomain {
BitvectorDomain::new_top(self.bytesize())
}
},
_ => match op {
Piece => BitvectorDomain::new_top(self.bytesize() + rhs.bytesize()),
IntAdd | IntSub | IntMult | IntDiv | IntSDiv | IntRem | IntSRem | IntLeft
| IntRight | IntSRight | IntAnd | IntOr | IntXOr | FloatAdd | FloatSub
| FloatMult | FloatDiv => BitvectorDomain::new_top(self.bytesize()),
IntEqual | IntNotEqual | IntLess | IntLessEqual | IntSLess | IntSLessEqual
| IntCarry | IntSCarry | IntSBorrow | BoolAnd | BoolOr | BoolXOr | FloatEqual
| FloatNotEqual | FloatLess | FloatLessEqual => {
BitvectorDomain::new_top(ByteSize::new(1))
}
},
_ => BitvectorDomain::new_top(self.bin_op_bytesize(op, rhs)),
}
}
......
......@@ -114,6 +114,12 @@ impl<T: AbstractDomain + HasByteSize + RegisterDomain + std::fmt::Debug> MemRegi
pub fn add(&mut self, value: T, position: Bitvector) {
assert_eq!(ByteSize::from(position.width()), self.address_bytesize);
let position = Int::from(position).try_to_i64().unwrap();
self.insert_at_byte_index(value, position);
}
/// Insert a value into the memory region at the given position.
/// The position is the index (in bytes) in the memory region.
pub fn insert_at_byte_index(&mut self, value: T, position: i64) {
let size_in_bytes = u64::from(value.bytesize()) as i64;
assert!(size_in_bytes > 0);
......
......@@ -69,4 +69,19 @@ pub trait RegisterDomain: AbstractDomain + HasByteSize + HasTop {
/// Perform a typecast to extend a bitvector or to cast between integer and floating point types.
fn cast(&self, kind: CastOpType, width: ByteSize) -> Self;
/// Return the bytesize of the result of the given binary operation.
/// Has a generic implementation that should not be overwritten!
fn bin_op_bytesize(&self, op: BinOpType, rhs: &Self) -> ByteSize {
use BinOpType::*;
match op {
Piece => self.bytesize() + rhs.bytesize(),
IntAdd | IntSub | IntMult | IntDiv | IntSDiv | IntRem | IntSRem | IntLeft
| IntRight | IntSRight | IntAnd | IntOr | IntXOr | FloatAdd | FloatSub | FloatMult
| FloatDiv => self.bytesize(),
IntEqual | IntNotEqual | IntLess | IntLessEqual | IntSLess | IntSLessEqual
| IntCarry | IntSCarry | IntSBorrow | BoolAnd | BoolOr | BoolXOr | FloatEqual
| FloatNotEqual | FloatLess | FloatLessEqual => ByteSize::new(1),
}
}
}
......@@ -23,14 +23,12 @@ pub struct Context<'a> {
pub project: &'a Project,
/// Maps the TIDs of functions that shall be treated as extern symbols to the `ExternSymbol` object representing it.
pub extern_symbol_map: BTreeMap<Tid, &'a ExternSymbol>,
/// A channel where found CWE warnings should be sent to.
/// A channel where found CWE warnings and log messages should be sent to.
/// The receiver may filter or modify the warnings before presenting them to the user.
/// For example, the same CWE warning will be found several times
/// if the fixpoint computation does not instantly stabilize at the corresponding code point.
/// These duplicates need to be filtered out.
pub cwe_collector: crossbeam_channel::Sender<CweWarning>,
/// A channel where log messages should be sent to.
pub log_collector: crossbeam_channel::Sender<LogMessage>,
pub log_collector: crossbeam_channel::Sender<LogThreadMsg>,
/// Names of `malloc`-like extern functions.
pub allocation_symbols: Vec<String>,
/// Names of `free`-like extern functions.
......@@ -43,8 +41,7 @@ impl<'a> Context<'a> {
pub fn new(
project: &Project,
config: Config,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
log_collector: crossbeam_channel::Sender<LogMessage>,
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
) -> Context {
let mut extern_symbol_map = BTreeMap::new();
for symbol in project.program.term.extern_symbols.iter() {
......@@ -63,7 +60,6 @@ impl<'a> Context<'a> {
graph,
project,
extern_symbol_map,
cwe_collector,
log_collector,
allocation_symbols: config.allocation_symbols,
deallocation_symbols: config.deallocation_symbols,
......@@ -78,7 +74,7 @@ impl<'a> Context<'a> {
if let Some(loc) = location {
log_message = log_message.location(loc.clone());
};
self.log_collector.send(log_message).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Log(log_message));
}
}
......@@ -196,7 +192,7 @@ impl<'a> Context<'a> {
call.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
} else {
self.log_debug(
......@@ -244,7 +240,7 @@ impl<'a> Context<'a> {
extern_symbol.name, call.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
}
Err(err) => self.log_debug(
......@@ -349,12 +345,7 @@ impl<'a> Context<'a> {
/// We also assume that the function does not use any parameters saved on the stack,
/// which may greatly reduce correctness of the analysis for the x86_32 architecture.
fn handle_call_to_generic_unknown_function(&self, state_before_call: &State) -> Option<State> {
if let Some(calling_conv) = self
.project
.calling_conventions
.iter()
.find(|cconv| cconv.name == "__stdcall")
{
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
let mut new_state = state_before_call.clone();
new_state.clear_non_callee_saved_register(&calling_conv.callee_saved_register[..]);
// Adjust stack register value (for x86 architecture).
......
......@@ -108,9 +108,8 @@ fn context_problem_implementation() {
use Expression::*;
let (project, config) = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, config, cwe_sender, log_sender);
let context = Context::new(&project, config, log_sender);
let mut state = State::new(&register("RSP"), Tid::new("main"));
let def = Term {
......@@ -271,9 +270,8 @@ fn update_return() {
use crate::analysis::pointer_inference::object::ObjectType;
use crate::analysis::pointer_inference::Data;
let (project, config) = mock_project();
let (cwe_sender, _cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, _log_receiver) = crossbeam_channel::unbounded();
let context = Context::new(&project, config, cwe_sender, log_sender);
let context = Context::new(&project, config, log_sender);
let state_before_return = State::new(&register("RSP"), Tid::new("callee"));
let mut state_before_return = context
.update_def(
......
......@@ -29,7 +29,7 @@ impl<'a> crate::analysis::interprocedural_fixpoint::Context<'a> for Context<'a>
def.tid.address
),
};
self.cwe_collector.send(warning).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Cwe(warning));
}
match &def.term {
......
......@@ -38,11 +38,11 @@ const VERSION: &str = "0.1";
pub static CWE_MODULE: crate::CweModule = crate::CweModule {
name: "Memory",
version: VERSION,
run: run_analysis,
run: extract_pi_analysis_results,
};
/// The abstract domain type for representing register values.
type Data = DataDomain<BitvectorDomain>;
pub type Data = DataDomain<BitvectorDomain>;
/// Configurable parameters for the analysis.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
......@@ -60,7 +60,8 @@ pub struct Config {
/// A wrapper struct for the pointer inference computation object.
pub struct PointerInference<'a> {
computation: Computation<'a, Context<'a>>,
log_collector: crossbeam_channel::Sender<LogMessage>,
log_collector: crossbeam_channel::Sender<LogThreadMsg>,
pub collected_logs: (Vec<LogMessage>, Vec<CweWarning>),
}
impl<'a> PointerInference<'a> {
......@@ -68,10 +69,9 @@ impl<'a> PointerInference<'a> {
pub fn new(
project: &'a Project,
config: Config,
cwe_sender: crossbeam_channel::Sender<CweWarning>,
log_sender: crossbeam_channel::Sender<LogMessage>,
log_sender: crossbeam_channel::Sender<LogThreadMsg>,
) -> PointerInference<'a> {
let context = Context::new(project, config, cwe_sender, log_sender.clone());
let context = Context::new(project, config, log_sender.clone());
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
......@@ -108,12 +108,10 @@ impl<'a> PointerInference<'a> {
.collect();
let mut fixpoint_computation =
super::interprocedural_fixpoint::Computation::new(context, None);
log_sender
.send(LogMessage::new_debug(format!(
let _ = log_sender.send(LogThreadMsg::Log(LogMessage::new_debug(format!(
"Pointer Inference: Adding {} entry points",
entry_sub_to_entry_node_map.len()
)))
.unwrap();
))));
for (sub_tid, start_node_index) in entry_sub_to_entry_node_map.into_iter() {
fixpoint_computation.set_node_value(
start_node_index,
......@@ -126,6 +124,7 @@ impl<'a> PointerInference<'a> {
PointerInference {
computation: fixpoint_computation,
log_collector: log_sender,
collected_logs: (Vec::new(), Vec::new()),
}
}
......@@ -175,6 +174,14 @@ impl<'a> PointerInference<'a> {
self.computation.get_graph()
}
pub fn get_context(&self) -> &Context {
self.computation.get_context()
}
pub fn get_node_value(&self, node_id: NodeIndex) -> Option<&NodeValue<State>> {
self.computation.get_node_value(node_id)
}
/// Add speculative entry points to the fixpoint algorithm state.
///
/// Since indirect jumps and calls are not handled yet (TODO: change that),
......@@ -258,92 +265,107 @@ impl<'a> PointerInference<'a> {
fn log_debug(&self, msg: impl Into<String>) {
let log_msg = LogMessage::new_debug(msg.into());
self.log_collector.send(log_msg).unwrap();
let _ = self.log_collector.send(LogThreadMsg::Log(log_msg));
}
}
/// The main entry point for executing the pointer inference analysis.
pub fn run_analysis(
project: &Project,
analysis_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let config: Config = serde_json::from_value(analysis_params.clone()).unwrap();
run(project, config, false)
/// Compute the results of the pointer inference fixpoint algorithm.
/// Successively adds more functions as possible entry points
/// to increase code coverage.
pub fn compute_with_speculative_entry_points(&mut self, project: &Project) {
self.compute();
self.count_blocks_with_state();
// Now compute again with speculative entry points added
self.add_speculative_entry_points(project, true);
self.compute();
self.count_blocks_with_state();
// Now compute again with all missed functions as additional entry points
self.add_speculative_entry_points(project, false);
self.compute();
self.count_blocks_with_state();
}
}
/// Generate and execute the pointer inference analysis.
/// Returns a vector of all found CWE warnings and a vector of all log messages generated during analysis.
pub fn run(
project: &Project,
config: Config,
print_debug: bool,
/// The entry point for the memory analysis check.
/// Does not actually compute anything
/// but just extracts the results of the already computed pointer inference analysis.
pub fn extract_pi_analysis_results(
analysis_results: &AnalysisResults,
_analysis_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let (log_sender, log_receiver) = crossbeam_channel::unbounded();
let warning_collector_thread = std::thread::spawn(move || collect_cwe_warnings(cwe_receiver));
let log_collector_thread = std::thread::spawn(move || collect_logs(log_receiver));
{
// Scope the computation object so that it is dropped before the warning collector thread is joined.
// Else the warning collector thread will not terminate (the cwe_sender needs to be dropped for it to terminate).
let mut computation = PointerInference::new(project, config, cwe_sender, log_sender);
let pi_anaylsis = analysis_results.pointer_inference.unwrap();
pi_anaylsis.collected_logs.clone()
}
computation.compute();
computation.count_blocks_with_state();
/// Compute the pointer inference analysis and return its results.
///
/// If `print_debug` is set to `true` print debug information to *stdout*.
/// Note that the format of the debug information is currently unstable and subject to change.
pub fn run(project: &Project, config: Config, print_debug: bool) -> PointerInference {
let logging_thread = LogThread::spawn(collect_all_logs);
// Now compute again with speculative entry points added
computation.add_speculative_entry_points(project, true);
computation.compute();
computation.count_blocks_with_state();
let mut computation = PointerInference::new(project, config, logging_thread.get_msg_sender());
// Now compute again with all missed functions as additional entry points
computation.add_speculative_entry_points(project, false);
computation.compute();
computation.count_blocks_with_state();
computation.compute_with_speculative_entry_points(project);
if print_debug {
computation.print_compact_json();
}
}
// Return the CWE warnings
(
log_collector_thread.join().unwrap(),
warning_collector_thread.join().unwrap(),
)
}
/// Collect CWE warnings from the receiver until the channel is closed. Then return them.
fn collect_cwe_warnings(receiver: crossbeam_channel::Receiver<CweWarning>) -> Vec<CweWarning> {
let mut collected_warnings = HashMap::new();
while let Ok(warning) = receiver.recv() {
match &warning.addresses[..] {
[] => unimplemented!(),
[address, ..] => {
collected_warnings.insert(address.clone(), warning);
}
}
}
collected_warnings
.drain()
.map(|(_key, value)| value)
.collect()
// save the logs and CWE warnings
computation.collected_logs = logging_thread.collect();
computation
}
/// Collect log messages from the receiver until the channel is closed. Then return them.
fn collect_logs(receiver: crossbeam_channel::Receiver<LogMessage>) -> Vec<LogMessage> {
/// This function is responsible for collecting logs and CWE warnings.
/// For warnings with the same origin address only the last one is kept.
/// This prevents duplicates but may suppress some log messages
/// in the rare case that several different log messages with the same origin address are generated.
fn collect_all_logs(
receiver: crossbeam_channel::Receiver<LogThreadMsg>,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let mut logs_with_address = HashMap::new();
let mut general_logs = Vec::new();
while let Ok(log_message) = receiver.recv() {
let mut collected_cwes = HashMap::new();
while let Ok(log_thread_msg) = receiver.recv() {
match log_thread_msg {
LogThreadMsg::Log(log_message) => {
if let Some(ref tid) = log_message.location {
logs_with_address.insert(tid.address.clone(), log_message);
} else {
general_logs.push(log_message);
}
}
logs_with_address
LogThreadMsg::Cwe(cwe_warning) => match &cwe_warning.addresses[..] {
[] => panic!("Unexpected CWE warning without origin address"),
[address, ..] => {
collected_cwes.insert(address.clone(), cwe_warning);
}
},
LogThreadMsg::Terminate => break,
}
}
let logs = logs_with_address
.values()
.cloned()
.chain(general_logs.into_iter())
.collect()
.collect();
let cwes = collected_cwes.drain().map(|(_key, value)| value).collect();
(logs, cwes)
}
#[cfg(test)]
mod tests {
use super::*;
impl<'a> PointerInference<'a> {
pub fn mock(project: &'a Project) -> PointerInference<'a> {
let config = Config {
allocation_symbols: vec!["malloc".to_string()],
deallocation_symbols: vec!["free".to_string()],
};
let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(project, config, log_sender)
}
}
}
......@@ -2,6 +2,7 @@ pub mod cwe_190;
pub mod cwe_332;
pub mod cwe_426;
pub mod cwe_467;
pub mod cwe_476;
pub mod cwe_560;
pub mod cwe_676;
pub mod cwe_782;
......@@ -100,9 +100,10 @@ fn generate_cwe_warning(callsite: &Tid, called_symbol: &ExternSymbol) -> CweWarn
/// For each call to one of the symbols configured in config.json
/// we check whether the block containing the call also contains a multiplication instruction.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
let symbol_map = get_symbol_map(project, &config.symbols);
......
......@@ -20,7 +20,6 @@
//!
//! - It is not checked whether the seeding function gets called before the random number generator function.
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage};
use crate::utils::symbol_utils::find_symbol;
......@@ -56,9 +55,10 @@ fn generate_cwe_warning(secure_initializer_func: &str, rand_func: &str) -> CweWa
/// Run the CWE check. See the module-level description for more information.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
......
......@@ -70,9 +70,10 @@ fn generate_cwe_warning(sub: &Term<Sub>) -> CweWarning {
/// We check whether a function calls both `system(..)` and a privilege changing function.
/// For each such function a CWE warning is generated.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
let mut privilege_changing_symbols = HashMap::new();
......
......@@ -103,9 +103,10 @@ fn generate_cwe_warning(jmp: &Term<Jmp>, extern_symbol: &ExternSymbol) -> CweWar
/// we check whether a parameter has value `sizeof(void*)`,
/// which may indicate an instance of CWE 467.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
......
//! This module implements a check for CWE-476: NULL Pointer Dereference.
//!
//! Functions like `malloc()` may return NULL values instead of pointers to indicate
//! failed calls. If one tries to access memory through this return value without
//! checking it for being NULL first, this can crash the program.
//!
//! See <https://cwe.mitre.org/data/definitions/476.html> for a detailed description.
//!
//! ## How the check works
//!
//! Using dataflow analysis we search for an execution path where a memory access using the return value of
//! a symbol happens before the return value is checked through a conditional jump instruction.
//!
//! ### Symbols configurable in config.json
//!
//! The symbols are the functions whose return values are assumed to be potential
//! NULL pointers.
//!
//! ## False Positives
//!
//! - If a possible NULL pointer is temporarily saved in a memory location
//! that the [Pointer Inference analysis](crate::analysis::pointer_inference) could not track,
//! the analysis may miss a correct NULL pointer check and thus generate false positives.
//! - The analysis is intraprocedural.
//! If a parameter to a function is a potential NULL pointer,
//! this gets flagged as a CWE hit even if the function may expect NULL pointers in its parameters.
//! If a function returns a potential NULL pointer this gets flagged as a CWE hit,
//! although the function may be supposed to return potential NULL pointers.
//!
//! ## False Negatives
//!
//! - We do not check whether an access to a potential NULL pointer happens regardless
//! of a prior check.
//! - We do not check whether the conditional jump instruction checks specifically
//! for the return value being NULL or something else
//! - For functions with more than one return value we do not distinguish between
//! the return values.
use crate::analysis::graph::{Edge, Node};
use crate::analysis::interprocedural_fixpoint::Computation;
use crate::analysis::interprocedural_fixpoint::Context as _;
use crate::analysis::interprocedural_fixpoint::NodeValue;
use crate::intermediate_representation::*;
use crate::prelude::*;
use crate::utils::log::{CweWarning, LogMessage};
use crate::CweModule;
use petgraph::visit::EdgeRef;
use std::collections::HashMap;
mod state;
use state::*;
mod taint;
use taint::*;
mod context;
use context::*;
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE476",
version: "0.3",
run: check_cwe,
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Config {
/// The names of symbols for which the analysis should check
/// whether the return values are checked for being a Null pointer by the analysed binary.
symbols: Vec<String>,
}
/// Run the CWE check.
/// We check whether the return values of symbols configurable in the config file are being checked for Null pointers
/// before any memory access (and thus potential Null pointer dereferences) through these values happen.
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let symbol_map = crate::utils::symbol_utils::get_symbol_map(project, &config.symbols[..]);
let general_context = Context::new(project, &pointer_inference_results, cwe_sender);
for edge in general_context.get_graph().edge_references() {
if let Edge::ExternCallStub(jmp) = edge.weight() {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = symbol_map.get(target) {
let node = edge.target();
let current_sub = match general_context.get_graph()[node] {
Node::BlkStart(_blk, sub) => sub,
_ => panic!(),
};
let mut context = general_context.clone();
context.set_taint_source(jmp, current_sub);
let pi_state_at_taint_source =
match pointer_inference_results.get_node_value(node) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
};
let mut computation = Computation::new(context, None);
computation.set_node_value(
node,
NodeValue::Value(State::new(
symbol,
&project.stack_pointer_register,
pi_state_at_taint_source.as_ref(),
)),
);
computation.compute_with_max_steps(100);
}
}
}
}
let mut cwe_warnings = HashMap::new();
for cwe in cwe_receiver.try_iter() {
match &cwe.addresses[..] {
[taint_source_address, ..] => cwe_warnings.insert(taint_source_address.clone(), cwe),
_ => panic!(),
};
}
let cwe_warnings = cwe_warnings.into_iter().map(|(_, cwe)| cwe).collect();
(Vec::new(), cwe_warnings)
}
use crate::abstract_domain::{AbstractDomain, HasByteSize, HasTop, RegisterDomain};
use crate::intermediate_representation::*;
use crate::prelude::*;
use std::fmt::Display;
/// An abstract domain representing a value that is either tainted or not.
///
/// Note that the [merge](Taint::merge)-function does not respect the partial order
/// that is implied by the naming scheme of the variants!
/// In fact the whole analysis does not enforce any partial order for this domain.
/// This means that in theory the fixpoint computation may not actually converge to a fixpoint,
/// but in practice the analysis can make more precise decisions
/// whether a value should be tainted or not.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Taint {
/// A tainted value of a particular bytesize.
Tainted(ByteSize),
/// An untainted value of a particular bytesize
Top(ByteSize),
}
impl Display for Taint {
/// Print the value of a `Taint` object.
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Tainted(size) => write!(f, "Tainted:{}", size),
Self::Top(size) => write!(f, "Top:{}", size),
}
}
}
impl AbstractDomain for Taint {
/// The result of merging two `Taint` values is tainted if at least one input was tainted.
fn merge(&self, other: &Self) -> Self {
use Taint::*;
match (self, other) {
(Tainted(size), _) | (_, Tainted(size)) => Tainted(*size),
_ => Top(self.bytesize()),
}
}
/// Checks whether the value is an untainted `Top`-value.
fn is_top(&self) -> bool {
matches!(self, Taint::Top(_))
}
}
impl HasByteSize for Taint {
/// The size in bytes of the `Taint` value.
fn bytesize(&self) -> ByteSize {
match self {
Self::Tainted(size) | Self::Top(size) => *size,
}
}
}
impl HasTop for Taint {
/// Get a new `Top`-value with the same bytesize as `self`.
fn top(&self) -> Self {
Self::Top(self.bytesize())
}
}
impl RegisterDomain for Taint {
/// Get a new `Top`-value with the given bytesize.
fn new_top(bytesize: ByteSize) -> Self {
Self::Top(bytesize)
}
/// The result of a binary operation is tainted if at least one input value was tainted.
fn bin_op(&self, op: BinOpType, rhs: &Self) -> Self {
match (self, rhs) {
(Self::Tainted(_), _) | (_, Self::Tainted(_)) => {
Self::Tainted(self.bin_op_bytesize(op, rhs))
}
_ => Self::Top(self.bin_op_bytesize(op, rhs)),
}
}
/// The result of a unary operation is tainted if the input was tainted.
fn un_op(&self, _op: UnOpType) -> Self {
*self
}
/// A subpiece of a tainted value is again tainted.
fn subpiece(&self, _low_byte: ByteSize, size: ByteSize) -> Self {
if let Self::Tainted(_) = self {
Self::Tainted(size)
} else {
Self::Top(size)
}
}
/// The result of a cast operation is tainted if the input was tainted.
fn cast(&self, _kind: CastOpType, width: ByteSize) -> Self {
if let Self::Tainted(_) = self {
Self::Tainted(width)
} else {
Self::Top(width)
}
}
}
impl Taint {
/// Checks whether the given value is in fact tainted.
pub fn is_tainted(&self) -> bool {
matches!(self, Taint::Tainted(_))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn abstract_domain() {
let taint = Taint::Tainted(ByteSize::new(4));
let top = Taint::Top(ByteSize::new(4));
assert_eq!(taint.merge(&top), taint);
assert_eq!(top.merge(&top), top);
assert_eq!(taint.is_top(), false);
}
#[test]
fn register_domain() {
use crate::intermediate_representation::*;
let taint = Taint::Tainted(ByteSize::new(4));
let top = Taint::Top(ByteSize::new(4));
assert_eq!(taint.bin_op(BinOpType::IntAdd, &top), taint);
assert_eq!(top.bin_op(BinOpType::IntMult, &top), top);
assert_eq!(taint.un_op(UnOpType::FloatFloor), taint);
assert_eq!(taint.subpiece(ByteSize::new(0), ByteSize::new(4)), taint);
assert_eq!(top.cast(CastOpType::IntZExt, ByteSize::new(4)), top);
}
}
......@@ -98,9 +98,10 @@ fn generate_cwe_warning(sub: &Term<Sub>, jmp: &Term<Jmp>, permission_const: u64)
///
/// Only the basic block right before the umask call is evaluated when trying to determine the parameter value of umask.
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
_cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let mut cwes = Vec::new();
let mut log_messages = Vec::new();
let umask_symbol_map = get_symbol_map(project, &["umask".to_string()]);
......
......@@ -18,10 +18,11 @@ False Negatives
* None known
*/
use crate::prelude::*;
use std::collections::HashMap;
use crate::{
intermediate_representation::{ExternSymbol, Program, Project, Sub, Term, Tid},
intermediate_representation::{ExternSymbol, Program, Sub, Term, Tid},
utils::{
log::{CweWarning, LogMessage},
symbol_utils::get_calls_to_symbols,
......@@ -106,9 +107,10 @@ pub fn resolve_symbols<'a>(
}
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let prog: &Term<Program> = &project.program;
let subfunctions: &Vec<Term<Sub>> = &prog.term.subs;
......
......@@ -14,10 +14,11 @@ False Negatives:
* There are other ways to expose I/O control without access control.
*/
use crate::prelude::*;
use std::collections::HashMap;
use crate::{
intermediate_representation::{Program, Project, Sub, Term, Tid},
intermediate_representation::{Program, Sub, Term, Tid},
utils::{
log::{CweWarning, LogMessage},
symbol_utils::{find_symbol, get_calls_to_symbols},
......@@ -66,9 +67,10 @@ pub fn generate_cwe_warning(calls: &[(&str, &Tid, &str)]) -> Vec<CweWarning> {
}
pub fn check_cwe(
project: &Project,
analysis_results: &AnalysisResults,
_cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let prog: &Term<Program> = &project.program;
let mut warnings: Vec<CweWarning> = Vec::new();
if let Some((tid, name)) = find_symbol(prog, "ioctl") {
......
......@@ -17,7 +17,8 @@ fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarni
let config: crate::analysis::pointer_inference::Config =
serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone())
.unwrap();
let (mut logs, cwes) = crate::analysis::pointer_inference::run(&project, config, false);
let pi_analysis = crate::analysis::pointer_inference::run(&project, config, false);
let (mut logs, cwes) = pi_analysis.collected_logs;
all_logs.append(&mut logs);
(
cwes,
......
......@@ -331,6 +331,13 @@ impl Project {
pub fn get_pointer_bytesize(&self) -> ByteSize {
self.stack_pointer_register.size
}
/// Try to guess a standard calling convention from the list of calling conventions in the project.
pub fn get_standard_calling_convention(&self) -> Option<&CallingConvention> {
self.calling_conventions
.iter()
.find(|cconv| cconv.name == "__stdcall")
}
}
impl Project {
......@@ -437,6 +444,54 @@ impl Project {
mod tests {
use super::*;
impl Blk {
pub fn mock() -> Term<Blk> {
Term {
tid: Tid::new("block"),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
}
}
}
impl Sub {
pub fn mock(name: impl ToString) -> Term<Sub> {
Term {
tid: Tid::new(name.to_string()),
term: Sub {
name: name.to_string(),
blocks: Vec::new(),
},
}
}
}
impl Program {
pub fn mock_empty() -> Program {
Program {
subs: Vec::new(),
extern_symbols: Vec::new(),
entry_points: Vec::new(),
}
}
}
impl Project {
pub fn mock_empty() -> Project {
Project {
program: Term {
tid: Tid::new("program_tid"),
term: Program::mock_empty(),
},
cpu_architecture: "x86_64".to_string(),
stack_pointer_register: Variable::mock("RSP", 8u64),
calling_conventions: Vec::new(),
}
}
}
#[test]
fn retarget_nonexisting_jumps() {
let mut jmp_term = Term {
......
......@@ -15,3 +15,18 @@ pub struct Variable {
pub size: ByteSize,
pub is_temp: bool,
}
#[cfg(test)]
mod tests {
use super::*;
impl Variable {
pub fn mock(name: impl ToString, size_in_bytes: impl Into<ByteSize>) -> Variable {
Variable {
name: name.to_string(),
size: size_in_bytes.into(),
is_temp: false,
}
}
}
}
......@@ -7,6 +7,7 @@ Parts of the cwe_checker that are written in Rust.
#[macro_use]
extern crate ocaml;
use crate::analysis::pointer_inference::PointerInference;
use crate::intermediate_representation::Project;
use crate::utils::log::{CweWarning, LogMessage};
......@@ -27,11 +28,13 @@ mod prelude {
pub use crate::bil::{BitSize, Bitvector};
pub use crate::intermediate_representation::ByteSize;
pub use crate::intermediate_representation::{Term, Tid};
pub use crate::AnalysisResults;
pub use anyhow::{anyhow, Error};
}
/// The generic function signature for the main function of a CWE module
pub type CweModuleFn = fn(&Project, &serde_json::Value) -> (Vec<LogMessage>, Vec<CweWarning>);
pub type CweModuleFn =
fn(&AnalysisResults, &serde_json::Value) -> (Vec<LogMessage>, Vec<CweWarning>);
/// A structure containing general information about a CWE analysis module,
/// including the function to be called to run the analysis.
......@@ -55,9 +58,51 @@ pub fn get_modules() -> Vec<&'static CweModule> {
&crate::checkers::cwe_332::CWE_MODULE,
&crate::checkers::cwe_426::CWE_MODULE,
&crate::checkers::cwe_467::CWE_MODULE,
&crate::checkers::cwe_476::CWE_MODULE,
&crate::checkers::cwe_560::CWE_MODULE,
&crate::checkers::cwe_782::CWE_MODULE,
&crate::checkers::cwe_676::CWE_MODULE,
&crate::checkers::cwe_782::CWE_MODULE,
&crate::analysis::pointer_inference::CWE_MODULE,
]
}
/// A struct containing pointers to all known analysis results
/// that may be needed as input for other analyses and CWE checks.
#[derive(Clone, Copy)]
pub struct AnalysisResults<'a> {
/// A pointer to the project struct
pub project: &'a Project,
/// The result of the pointer inference analysis if already computed.
pub pointer_inference: Option<&'a PointerInference<'a>>,
}
impl<'a> AnalysisResults<'a> {
/// Create a new `AnalysisResults` struct with only the project itself known.
pub fn new(project: &'a Project) -> AnalysisResults<'a> {
AnalysisResults {
project,
pointer_inference: None,
}
}
/// Compute the pointer inference analysis.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_pointer_inference(&self, config: &serde_json::Value) -> PointerInference<'a> {
crate::analysis::pointer_inference::run(
self.project,
serde_json::from_value(config.clone()).unwrap(),
false,
)
}
/// Create a new `AnalysisResults` struct containing the given pointer inference analysis results.
pub fn set_pointer_inference<'b: 'a>(
self,
pi_results: Option<&'b PointerInference<'a>>,
) -> AnalysisResults<'b> {
AnalysisResults {
pointer_inference: pi_results,
..self
}
}
}
use crate::prelude::*;
use std::thread::JoinHandle;
/// A CWE warning message.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord, Default)]
......@@ -180,3 +181,94 @@ pub fn print_all_messages(
print!("{}", output);
}
}
/// The message types a logging thread can receive.
/// See the [`LogThread`] type for more information.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
pub enum LogThreadMsg {
/// A normal log message.
Log(LogMessage),
/// A CWE warning
Cwe(CweWarning),
/// If the log collector thread receives this signal,
/// it should stop receiving new messages
/// and instead terminate and return the collected messages prior to receiving the termination signal.
Terminate,
}
/// A type for managing threads for collecting log messages.
///
/// With [`LogThread::spawn()`] one can create a new log thread
/// whose handle is contained in the returned `LogThread` struct.
/// By calling the [`collect()`](LogThread::collect()) method
/// one can tell the log thread to shut down
/// and return the logs collected to this point.
/// If the `LogThread` object gets dropped before calling `collect()`,
/// the corresponding logging thread will be stopped
/// and all collected logs will be discarded.
///
/// If one deliberately wants to discard all logging messages,
/// one can simply create a sender to a disconnected channel
/// via [`LogThread::create_disconnected_sender()`].
pub struct LogThread {
msg_sender: crossbeam_channel::Sender<LogThreadMsg>,
thread_handle: Option<JoinHandle<(Vec<LogMessage>, Vec<CweWarning>)>>,
}
impl Drop for LogThread {
/// If the logging thread still exists,
/// send it the `Terminate` signal.
/// Then wait until the logging thread stopped.
fn drop(&mut self) {
// Make sure the logging thread gets terminated when dropping this.
let _ = self.msg_sender.send(LogThreadMsg::Terminate);
if let Some(handle) = self.thread_handle.take() {
let _ = handle.join();
}
}
}
impl LogThread {
/// Create a new `LogThread` object with a handle to a freshly spawned logging collector thread.
///
/// The parameter is the function containing the actual log collection logic.
/// I.e. the function should receive messages through the given receiver until the channel disconnects
/// or until it receives a [`LogThread::Terminate`] message.
/// After that it should return the logs collected up to that point.
pub fn spawn<F>(collector_func: F) -> LogThread
where
F: FnOnce(crossbeam_channel::Receiver<LogThreadMsg>) -> (Vec<LogMessage>, Vec<CweWarning>)
+ Send
+ 'static,
{
let (sender, receiver) = crossbeam_channel::unbounded();
let thread_handle = std::thread::spawn(move || collector_func(receiver));
LogThread {
msg_sender: sender,
thread_handle: Some(thread_handle),
}
}
/// Just create a disconnected sender to a (non-existing) logging thread.
/// Can be used like a sender to a channel that deliberately discards all messages sent to it.
pub fn create_disconnected_sender() -> crossbeam_channel::Sender<LogThreadMsg> {
let (sender, _) = crossbeam_channel::unbounded();
sender
}
/// Get a sender that can be used to send messages to the logging thread corresponding to this `LogThread` instance.
pub fn get_msg_sender(&self) -> crossbeam_channel::Sender<LogThreadMsg> {
self.msg_sender.clone()
}
/// Stop the logging thread by sending it the `Terminate` signal
/// and then return all logs collected until that point.
pub fn collect(mut self) -> (Vec<LogMessage>, Vec<CweWarning>) {
let _ = self.msg_sender.send(LogThreadMsg::Terminate);
if let Some(handle) = self.thread_handle.take() {
handle.join().unwrap()
} else {
(Vec::new(), Vec::new())
}
}
}
......@@ -363,6 +363,35 @@ mod tests {
#[test]
#[ignore]
fn cwe_476() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_476", "CWE476");
// TODO: Check reason for failure!
mark_skipped(&mut tests, "mips64", "gcc");
mark_skipped(&mut tests, "mips64el", "gcc");
mark_skipped(&mut tests, "mips", "gcc");
mark_skipped(&mut tests, "mipsel", "gcc");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests {
let num_expected_occurences = 1;
if let Err(error) = test_case.run_test("[CWE476]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
}
if !error_log.is_empty() {
print_errors(error_log);
panic!();
}
}
#[test]
#[ignore]
fn cwe_560() {
let mut error_log = Vec::new();
let mut tests = linux_test_cases("cwe_560", "CWE560");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment