Unverified Commit d926369b by Melvin Klimke Committed by GitHub

add check for CWE 78: OS Command Injection (#130)

parent 9a414bab
......@@ -149,7 +149,8 @@ fn run_with_ghidra(args: CmdlineArgs) {
let mut analysis_results = AnalysisResults::new(&binary, &runtime_memory_image, &project);
let modules_depending_on_pointer_inference = vec!["CWE243", "CWE367", "CWE476", "Memory"];
let modules_depending_on_pointer_inference =
vec!["CWE78", "CWE243", "CWE367", "CWE476", "Memory"];
let pointer_inference_results = if modules
.iter()
.any(|module| modules_depending_on_pointer_inference.contains(&module.name))
......
......@@ -200,6 +200,11 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegionData<T
self.values.values()
}
/// Get the map of all elements including their offset into the memory region.
pub fn entry_map(&self) -> &BTreeMap<i64, T> {
&self.values
}
/// Get an iterator over all values in the memory region for in-place manipulation.
/// Note that one can changes values to *Top* using the iterator.
/// These values should be removed from the memory region using `clear_top_values()`.
......
......@@ -88,6 +88,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
&self,
target_value: Option<&u64>,
return_value: Option<&u64>,
_caller_sub: &Term<Sub>,
_call: &Term<Jmp>,
_return_: &Term<Jmp>,
) -> Option<u64> {
......@@ -105,7 +106,11 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
}
/// Simply copy the value
fn split_return_stub(&self, combined_value: &u64) -> Option<u64> {
fn split_return_stub(
&self,
combined_value: &u64,
_returned_from_sub: &Term<Sub>,
) -> Option<u64> {
Some(*combined_value)
}
......
......@@ -59,6 +59,7 @@ pub trait Context<'a> {
&self,
target_value: Option<&Self::Value>,
return_value: Option<&Self::Value>,
caller_sub: &Term<Sub>,
call: &Term<Jmp>,
return_: &Term<Jmp>,
) -> Option<Self::Value>;
......@@ -71,7 +72,11 @@ pub trait Context<'a> {
/// Transition function for return stub split.
/// Has access to the value at the ReturnCombine node and
/// decides which data is transferred along the Return Stub Edge.
fn split_return_stub(&self, combined_value: &Self::Value) -> Option<Self::Value>;
fn split_return_stub(
&self,
combined_value: &Self::Value,
returned_from_sub: &Term<Sub>,
) -> Option<Self::Value>;
/// Transition function for calls to functions not contained in the binary.
/// The corresponding edge goes from the callsite to the returned-to block.
......@@ -165,10 +170,16 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
}),
// The user has the ability to split the node value at the BlkStart return node
// to only send specific data along the ReturnStub Edge to the last BlkEnd node called subroutine
Edge::CRReturnStub => self
.context
.split_return_stub(node_value.unwrap_value())
.map(NodeValue::Value),
Edge::CRReturnStub => {
// The subroutine term from which the program returns
let returned_from_sub = match graph.node_weight(end_node) {
Some(Node::BlkEnd { 0: _, 1: sub_term }) => sub_term,
_ => panic!("Malformed Control flow graph"),
};
self.context
.split_return_stub(node_value.unwrap_value(), returned_from_sub)
.map(NodeValue::Value)
}
// The CallCombine Edge merges the values coming in from the CallStub Edge and Call Edge
// It also gives the user access to the call and return term.
......@@ -178,17 +189,18 @@ impl<'a, T: Context<'a>> GeneralFPContext for GeneralizedContext<'a, T> {
call_stub,
interprocedural_flow,
} => {
let call_block = match graph.node_weight(start_node) {
let (call_block, caller_sub) = match graph.node_weight(start_node) {
Some(Node::CallSource {
source: (call_block, ..),
source: (call_block, call_sub),
target: _,
}) => call_block,
}) => (call_block, call_sub),
_ => panic!("Malformed Control flow graph"),
};
let call_term = &call_block.term.jmps[0];
match self.context.update_callsite(
interprocedural_flow.as_ref(),
call_stub.as_ref(),
caller_sub,
call_term,
return_term,
) {
......
......@@ -17,7 +17,7 @@ pub struct State {
/// The list of all known memory objects.
pub memory: AbstractObjectList,
/// The abstract identifier of the current stack frame.
/// It points to the to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
/// It points to the base of the stack frame, i.e. only negative offsets point into the current stack frame.
pub stack_id: AbstractIdentifier,
/// All known IDs of caller stack frames.
/// Note that these IDs are named after the callsite,
......
......@@ -8,4 +8,5 @@ pub mod cwe_467;
pub mod cwe_476;
pub mod cwe_560;
pub mod cwe_676;
pub mod cwe_78;
pub mod cwe_782;
......@@ -50,8 +50,8 @@ use std::collections::HashMap;
mod state;
use state::*;
mod taint;
use taint::*;
pub mod taint;
pub use taint::*;
mod context;
use context::*;
......
//! This module implements a check for CWE-78: Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection').
//!
//! The software constructs all or part of an OS command using externally-influenced input from an upstream component,
//! but it does not neutralize or incorrectly neutralizes special elements that could modify the intended OS command
//! when it is sent to a downstream component.
//!
//! See <https://cwe.mitre.org/data/definitions/78.html> for a detailed description.
//!
//! ## How the check works
//!
//! Using backward dataflow analysis we search for an executation path from a system call parameter (string) to an user input
//! to identify possible command injections.
//!
//! To find relevant string related functions, such as sprintf, it is assumed that the first input parameter points
//! to the memory position that will be used as the return location. (e.g. char *strcat(char *dest, const char *src)
//! where 'char *dest' will contain the return value)
//!
//! For instance:
//! ...
//! MOV RAX, qword ptr [RBP + local_10]
//! MOV RDI, RAX // RDI is the first input parameter for the strcat call and it points to [RBP + local_10]
//! CALL strcat
//! MOV RAX, qword ptr [RBP + local_10] // In the backwards analysis [RBP + local_10] will be tainted and it contains the return value
//! ...
//!
//! ### Symbols configurable in config.json
//!
//! The symbols are the functions which
//! 1. make system calls (e.g. system)
//! 2. manipulate strings (e.g. sprintf, strcat, memcpy, etc.)
//! 3. take user input (e.g. scanf)
//!
//! ## False Positives
//!
//! - The input comes from the user but proper sanitization was not detected by the analysis even though it exists.
//! - The input comes from the user but the format string's input format could not be distinguished as non-string input.
//!
//! ## False Negatives
//!
//! - Missing Taints due to lost track of pointer targets
//! - Non tracked function parameters cause incomplete taints that could miss possible dangerous inputs
use std::collections::HashMap;
use crate::{
analysis::{
backward_interprocedural_fixpoint::{create_computation, Context as _},
graph::{self, Edge, Node},
interprocedural_fixpoint_generic::NodeValue,
},
intermediate_representation::{Jmp, Project, Sub},
prelude::*,
utils::log::{CweWarning, LogMessage},
AnalysisResults, CweModule,
};
use petgraph::{graph::NodeIndex, visit::EdgeRef};
mod state;
use state::*;
mod context;
use context::*;
pub static CWE_MODULE: CweModule = CweModule {
name: "CWE78",
version: "0.1",
run: check_cwe,
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Config {
/// The names of the system call symbols
system_symbols: Vec<String>,
/// The names of the string manipulating symbols
string_symbols: Vec<String>,
/// The name of the user input symbols
user_input_symbols: Vec<String>,
}
/// This check searches for system calls and sets their parameters as taint source if available.
/// Then the fixpoint computation is executed and its result may generate cwe warnings if
/// the parameters can be tracked back to user inputs
pub fn check_cwe(
analysis_results: &AnalysisResults,
cwe_params: &serde_json::Value,
) -> (Vec<LogMessage>, Vec<CweWarning>) {
let project = analysis_results.project;
let pointer_inference_results = analysis_results.pointer_inference.unwrap();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded();
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let system_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.system_symbols[..]);
let string_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.string_symbols[..]);
let user_input_symbols =
crate::utils::symbol_utils::get_symbol_map(project, &config.user_input_symbols[..]);
let general_context = Context::new(
project,
analysis_results.runtime_memory_image,
&pointer_inference_results,
string_symbols,
user_input_symbols,
cwe_sender,
);
let entry_sub_to_entry_node_map = get_entry_sub_to_entry_node_map(project, &general_context);
for edge in general_context.get_pi_graph().edge_references() {
if let Edge::ExternCallStub(jmp) = edge.weight() {
if let Jmp::Call { target, .. } = &jmp.term {
if let Some(symbol) = system_symbols.get(target) {
let node = edge.source();
let current_sub = match general_context.get_pi_graph()[node] {
Node::BlkEnd(_blk, sub) => sub,
_ => panic!(),
};
let mut context = general_context.clone();
context.set_taint_source(jmp, &symbol.name, current_sub);
let pi_state_at_taint_source =
match pointer_inference_results.get_node_value(node) {
Some(NodeValue::Value(val)) => Some(val.clone()),
_ => None,
};
let mut computation = create_computation(context.clone(), None);
computation.set_node_value(
node,
NodeValue::Value(State::new(
symbol,
&project.stack_pointer_register,
pi_state_at_taint_source.as_ref(),
current_sub,
)),
);
computation.compute_with_max_steps(100);
for (sub_name, node_index) in entry_sub_to_entry_node_map.iter() {
if let Some(node_weight) = computation.get_node_value(*node_index) {
let state = node_weight.unwrap_value();
if !state.is_empty() {
context.generate_cwe_warning(sub_name);
}
}
}
}
}
}
}
let mut cwe_warnings = HashMap::new();
for cwe in cwe_receiver.try_iter() {
match &cwe.addresses[..] {
[taint_source_address, ..] => cwe_warnings.insert(taint_source_address.clone(), cwe),
_ => panic!(),
};
}
let cwe_warnings = cwe_warnings.into_iter().map(|(_, cwe)| cwe).collect();
(Vec::new(), cwe_warnings)
}
/// Returns a map from subroutine names to their corresponding start node index
fn get_entry_sub_to_entry_node_map(
project: &Project,
context: &Context,
) -> HashMap<String, NodeIndex> {
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
.program
.term
.subs
.iter()
.map(|sub| (sub.tid.clone(), sub))
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.get(0) {
entry_sub_to_entry_blocks_map.insert(
(sub_tid.clone(), sub.term.name.clone()),
entry_block.tid.clone(),
);
}
}
}
let mut tid_to_graph_indices_map = HashMap::new();
for node in context.get_graph().node_indices() {
if let graph::Node::BlkStart(block, sub) = context.get_graph()[node] {
tid_to_graph_indices_map.insert((block.tid.clone(), sub.tid.clone()), node);
}
}
entry_sub_to_entry_blocks_map
.into_iter()
.filter_map(|((sub_tid, name), block_tid)| {
if let Some(start_node_index) = tid_to_graph_indices_map.get(&(block_tid, sub_tid)) {
Some((name, *start_node_index))
} else {
None
}
})
.collect()
}
use std::{
collections::{HashMap, HashSet},
sync::Arc,
};
use petgraph::{graph::NodeIndex, visit::IntoNodeReferences};
use super::{state::State, CWE_MODULE};
use crate::{
abstract_domain::AbstractDomain,
analysis::{
forward_interprocedural_fixpoint::Context as PiContext,
graph::{Graph, Node},
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
},
checkers::cwe_476::Taint,
intermediate_representation::*,
utils::{binary::RuntimeMemoryImage, log::CweWarning},
};
#[derive(Clone)]
pub struct Context<'a> {
/// A pointer to the corresponding project struct.
project: &'a Project,
/// A pointer to the representation of the runtime memory image.
runtime_memory_image: &'a RuntimeMemoryImage,
/// The reversed control flow graph for the analysis
graph: Graph<'a>,
/// A pointer to the results of the pointer inference analysis.
/// They are used to determine the targets of pointers to memory,
/// which in turn is used to keep track of taint on the stack or on the heap.
pub pointer_inference_results: &'a PointerInferenceComputation<'a>,
/// A map to get the node index of the `BlkStart` node containing a given [`Def`] as the last `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
block_start_last_def_map: Arc<HashMap<(Tid, Tid), NodeIndex>>,
/// A set containing a given [`Def`] as the first `Def` of the block.
/// The keys are of the form `(Def-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
block_first_def_set: Arc<HashSet<(Tid, Tid)>>,
/// Maps the TID of an extern symbol to the extern symbol struct.
extern_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// Maps the TID of an extern string related symbol to the corresponding extern symbol struct.
string_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
user_input_symbol_map: Arc<HashMap<Tid, &'a ExternSymbol>>,
/// A map to get the node index of the `BlkEnd` node containing a given [`Jmp`].
/// The keys are of the form `(Jmp-TID, Current-Sub-TID)`
/// to distinguish the nodes for blocks contained in more than one function.
jmp_to_blk_end_node_map: Arc<HashMap<(Tid, Tid), NodeIndex>>,
/// The call whose parameter values are the sources for taint for the analysis.
pub taint_source: Option<&'a Term<Jmp>>,
/// The subroutine from which the taint source originates
pub taint_source_sub: Option<&'a Term<Sub>>,
/// The name of the function, whose parameter values are the taint sources.
pub taint_source_name: Option<String>,
/// A channel where found CWE hits can be sent to.
cwe_collector: crossbeam_channel::Sender<CweWarning>,
}
impl<'a> Context<'a> {
/// Creates a new context for the CWE 78 taint analysis.
pub fn new(
project: &'a Project,
runtime_memory_image: &'a RuntimeMemoryImage,
pointer_inference_results: &'a PointerInferenceComputation<'a>,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
user_input_symbols: HashMap<Tid, &'a ExternSymbol>,
cwe_collector: crossbeam_channel::Sender<CweWarning>,
) -> Self {
let mut block_first_def_set = HashSet::new();
let mut block_start_last_def_map = HashMap::new();
let mut extern_symbol_map = HashMap::new();
for symbol in project.program.term.extern_symbols.iter() {
extern_symbol_map.insert(symbol.tid.clone(), symbol);
}
let mut jmp_to_blk_end_node_map = HashMap::new();
let graph = pointer_inference_results.get_graph();
for (node_id, node) in graph.node_references() {
match node {
Node::BlkStart(block, sub) => match block.term.defs.len() {
0 => (),
num_of_defs => {
let first_def = block.term.defs.get(0).unwrap();
let last_def = block.term.defs.get(num_of_defs - 1).unwrap();
block_first_def_set.insert((first_def.tid.clone(), sub.tid.clone()));
block_start_last_def_map
.insert((last_def.tid.clone(), sub.tid.clone()), node_id);
}
},
Node::BlkEnd(block, sub) => {
for jmp in block.term.jmps.iter() {
jmp_to_blk_end_node_map.insert((jmp.tid.clone(), sub.tid.clone()), node_id);
}
}
_ => (),
}
}
let mut cwe_78_graph = graph.clone();
cwe_78_graph.reverse();
Context {
project,
runtime_memory_image,
graph: cwe_78_graph,
pointer_inference_results,
block_start_last_def_map: Arc::new(block_start_last_def_map),
block_first_def_set: Arc::new(block_first_def_set),
extern_symbol_map: Arc::new(extern_symbol_map),
string_symbol_map: Arc::new(string_symbols),
user_input_symbol_map: Arc::new(user_input_symbols),
jmp_to_blk_end_node_map: Arc::new(jmp_to_blk_end_node_map),
taint_source: None,
taint_source_sub: None,
taint_source_name: None,
cwe_collector,
}
}
/// Generates the CWE Warning for the CWE 78 check
pub fn generate_cwe_warning(&self, sub_name: &str) {
let source = self.taint_source.unwrap();
let name = self.taint_source_name.clone().unwrap();
let description: String = format!(
"(Potential OS Command Injection) Input for call to {} is not properly sanitized in function {} ({})",
name, sub_name, source.tid.address,
);
let cwe_warning = CweWarning::new(
String::from(CWE_MODULE.name),
String::from(CWE_MODULE.version),
description,
)
.addresses(vec![source.tid.address.clone()])
.tids(vec![format!("{}", source.tid)])
.symbols(vec![String::from(sub_name)])
.other(vec![vec![String::from("OS Command Injection"), name]]);
let _ = self.cwe_collector.send(cwe_warning);
}
/// Set the taint source and the current function for the analysis.
pub fn set_taint_source(
&mut self,
taint_source: &'a Term<Jmp>,
taint_source_symbol_name: &str,
taint_source_sub: &'a Term<Sub>,
) {
self.taint_source = Some(taint_source);
self.taint_source_sub = Some(taint_source_sub);
self.taint_source_name = Some(taint_source_symbol_name.to_string());
}
/// Returns the pointer inference graph
pub fn get_pi_graph(&self) -> &Graph<'a> {
self.pointer_inference_results.get_graph()
}
/// This function taints the registers and stack positions of the parameter pointers for string functions
/// such as sprintf, snprintf, etc.
/// The size parameter is ignored if available (e.g. snprintf, strncat etc.)
pub fn taint_string_function_parameters(
&self,
state: &State,
string_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
// Check whether the parameter points to a tainted memory target
// Since the first parameter of these string functions is also the return parameter,
// this will serve as an indicator whether the function call is relevant to the taint analysis.
let relevant_fuction_call = if let Some(param) = string_symbol.parameters.get(0) {
self.first_param_points_to_memory_taint(pi_state, &mut new_state, param)
} else {
panic!("Missing parameters for string related function!");
};
if relevant_fuction_call {
for parameter in string_symbol.parameters.iter() {
match parameter {
Arg::Register(var) => {
new_state.set_register_taint(var, Taint::Tainted(var.size))
}
Arg::Stack { size, .. } => {
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
new_state.save_taint_to_memory(&address, Taint::Tainted(*size))
}
}
}
}
}
}
new_state
}
/// Checks whether the firt parameter of a string related function points to a taint.
/// If so, removes the taint at the target memory.
pub fn first_param_points_to_memory_taint(
&self,
pi_state: &PointerInferenceState,
state: &mut State,
parameter: &Arg,
) -> bool {
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if state.check_if_address_points_to_taint(address.clone(), pi_state) {
state.remove_mem_taint_at_target(&address);
return true;
}
}
false
}
/// This function taints the registers and stack positions of the parameter pointers of external functions
/// If the function is one of the specified string functions, the processing of the call is transferred to
/// the string function processor
pub fn taint_generic_function_parameters_and_remove_non_callee_saved(
&self,
state: &State,
symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
// Check if the extern symbol is a string symbol, since the return register is not tainted for these.
// Instead, is has to be checked whether the first function parameter points to a tainted memory address
if self.string_symbol_map.get(&symbol.tid).is_some() {
new_state.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
new_state = self.taint_string_function_parameters(&new_state, symbol, call_source_node);
} else {
// Check whether the return register is tainted before the call
// If so, taint the parameter registers and memory addresses of possible stack parameters
let return_registers = symbol
.return_values
.iter()
.filter_map(|ret| match ret {
Arg::Register(var) => Some(var.name.clone()),
_ => None,
})
.collect::<Vec<String>>();
if new_state.check_return_registers_for_taint(return_registers) {
new_state
.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
// TODO: Parameter detection since targets of input parameters are the return locations
// Taint memory for string inputs
if self.user_input_symbol_map.get(&symbol.tid).is_some() {
self.generate_cwe_warning(
&new_state.get_current_sub().as_ref().unwrap().term.name,
);
}
return self.taint_parameters(
&new_state,
symbol.parameters.clone(),
call_source_node,
);
}
}
new_state
}
/// Taints a stack parameter given a size and an offset
pub fn taint_stack_parameters(
&self,
state: State,
call_source_node: NodeIndex,
offset: i64,
size: ByteSize,
) -> State {
let mut new_state = state;
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
let address_exp =
Expression::Var(self.project.stack_pointer_register.clone()).plus_const(offset);
if let Ok(address) = pi_state.eval(&address_exp) {
new_state.save_taint_to_memory(&address, Taint::Tainted(size));
}
}
new_state
}
/// Iterates over the given parameters of a function and returns an updated state
pub fn taint_parameters(
&self,
state: &State,
parameters: Vec<Arg>,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
for parameter in parameters {
match parameter {
Arg::Register(param) => {
new_state.set_register_taint(&param, Taint::Tainted(param.size))
}
Arg::Stack { offset, size } => {
new_state = self.taint_stack_parameters(
new_state.clone(),
call_source_node,
offset,
size,
);
}
}
}
new_state
}
/// Checks whether the current def term is the last def term
/// of its corresponding block and if so, returns the node index of the BlkStart node.
pub fn get_blk_start_node_if_last_def(
&self,
state: &State,
def: &Term<Def>,
) -> Option<NodeIndex> {
if let Some(sub) = state.get_current_sub() {
if let Some(node) = self
.block_start_last_def_map
.get(&(def.tid.clone(), sub.tid.clone()))
{
return Some(*node);
}
} else {
panic!("Missing current Sub.");
}
None
}
/// Creates a map from def terms to their corresponding pointer inference states
/// by taking the pointer inference state of the BlkStart node and updating it
/// for each def term in the block.
pub fn create_pi_def_map(
&self,
block_start_node: NodeIndex,
) -> Option<HashMap<Tid, PointerInferenceState>> {
if let Some(block_node) = self.get_pi_graph().node_weight(block_start_node) {
if let Some(pi_value) = self
.pointer_inference_results
.get_node_value(block_start_node)
{
let mut pi_def_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
let pi_context = self.pointer_inference_results.get_context();
let mut new_pi_state = Some(pi_value.unwrap_value().clone());
for def in block_node.get_block().term.defs.iter() {
// Add the pi state to the map that is available after the def was executed
// If no state is available after the update_def() call, none is added
if new_pi_state.is_none() {
break;
}
new_pi_state = pi_context.update_def(&new_pi_state.unwrap(), def);
if let Some(new_state) = new_pi_state.clone() {
pi_def_map.insert(def.tid.clone(), new_state);
}
}
return Some(pi_def_map);
}
} else {
panic!("Unexpected node index for BlkStart Node.");
}
None
}
/// Handles assignment and load definition updates
pub fn handle_assign_and_load(
&self,
state: State,
def: &Term<Def>,
var: &Variable,
input: &Expression,
) -> State {
let mut new_state = state;
if let Some(taint) = new_state.get_register_taint(var) {
if taint.is_tainted() {
new_state.set_expression_taint_and_store_constants(
&def.tid,
var,
input,
&self.project.stack_pointer_register,
)
}
}
new_state
}
/// Gets the BlkEnd node of an external function call
pub fn get_source_node(&self, state: &State, call_source: &Tid) -> NodeIndex {
let blk_end_node_id = self.jmp_to_blk_end_node_map.get(&(
call_source.clone(),
state.get_current_sub().as_ref().unwrap().tid.clone(),
));
if let Some(blk_end_node) = blk_end_node_id {
*blk_end_node
} else {
panic!("Malformed Control Flow Graph.");
}
}
/// Updates the target state at the callsite by removing non parameter register taints
/// and by merging callee saved register taints from the return state if available
pub fn update_target_state_for_callsite(
&self,
return_state: Option<&State>,
target_state: Option<&State>,
caller_sub: &Term<Sub>,
) -> Option<State> {
if let Some(target) = target_state {
let mut new_state = target.clone();
new_state.remove_non_parameter_taints_for_generic_function(self.project);
new_state.set_current_sub(caller_sub);
if let Some(return_) = return_state {
new_state.merge_callee_saved_taints_from_return_state(
return_,
self.project.get_standard_calling_convention(),
);
}
return Some(new_state);
}
None
}
}
impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State;
/// Get the underlying graph of the fixpoint computation
fn get_graph(&self) -> &Graph<'a> {
&self.graph
}
/// Merge two states
fn merge(&self, state1: &State, state2: &State) -> State {
state1.merge(state2)
}
/// Updates State according to side effects of the definition
fn update_def(&self, state: &State, def: &Term<Def>) -> Option<State> {
if state.is_empty() {
// Without taint there is nothing to propagate.
return None;
}
let mut new_state = state.clone();
// Check whether the def is the last def of a block and if so, create the
// Def Pi Map
if let Some(blk_start_node) = self.get_blk_start_node_if_last_def(&new_state, def) {
new_state.set_pi_def_map(self.create_pi_def_map(blk_start_node));
}
match &def.term {
Def::Assign { var, value: input }
| Def::Load {
var,
address: input,
} => new_state = self.handle_assign_and_load(new_state, def, var, input),
Def::Store { address, value } => new_state.taint_value_to_be_stored(
&def.tid,
address,
value,
&self.project.stack_pointer_register,
),
}
// Check whether the current def term is the first of the block and if so, remove
// the pi_def_map for the current state to save memory
if self
.block_first_def_set
.get(&(
def.tid.clone(),
new_state.get_current_sub().as_ref().unwrap().tid.clone(),
))
.is_some()
{
new_state.set_pi_def_map(None);
}
Some(new_state)
}
/// Either returns a copy of the input state when there is no conditional
/// Or merges both incoming states from the branch and conditional branch
fn update_jumpsite(
&self,
state_after_jump: &State,
_jump: &Term<Jmp>,
_untaken_conditional: Option<&Term<Jmp>>,
_jumpsite: &Term<Blk>,
) -> Option<State> {
Some(state_after_jump.clone())
}
/// The specific execution is dependent on the existence of a return and target state
/// If there is no return state and the taint source is not in the callee, none is returned.
/// If there is no return state and the taint source is in the callee, the target state is copied.
/// If there is a return state and no target state, non callee saved registers are removed and the
/// updated return state is let through.
/// If there is a return state and a target state, non parameter taints are removed from the target state
/// and the remaining taints plus the callee saved taints from the return state are combined in a new state
fn update_callsite(
&self,
target_state: Option<&State>,
return_state: Option<&State>,
caller_sub: &Term<Sub>,
_call: &Term<Jmp>,
_return_: &Term<Jmp>,
) -> Option<State> {
// Return state is present
if let Some(return_) = return_state {
// Update the target state if there is one. Otherwise clone the return state and
// remove all non callee saved register taints
let new_state =
self.update_target_state_for_callsite(return_state, target_state, caller_sub);
if new_state.is_none() {
let mut new_state = return_.clone();
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
new_state.remove_non_callee_saved_taint(calling_conv);
}
return Some(new_state);
}
return new_state;
// No return state: check for taint source
} else {
// If the called subroutine contains the taint source, update the target state if there is one.
// Otherwise return None.
if let Some(source_sub) = self.taint_source_sub {
if source_sub.tid == caller_sub.tid {
return self.update_target_state_for_callsite(
return_state,
target_state,
caller_sub,
);
}
}
}
None
}
/// Simply sends a copy of the state after the call return to the callsite
/// Will be used at the callsite to restore non-volatile registers
fn split_call_stub(&self, combined_state: &State) -> Option<State> {
Some(combined_state.clone())
}
/// Removes all register taints except for possible return register taints
fn split_return_stub(
&self,
combined_state: &State,
returned_from_sub: &Term<Sub>,
) -> Option<State> {
let mut new_state = combined_state.clone();
if let Some(calling_conv) = self.project.get_standard_calling_convention() {
let return_registers: HashSet<String> =
calling_conv.return_register.iter().cloned().collect();
new_state.remove_all_except_return_register_taints(return_registers);
}
new_state.set_current_sub(returned_from_sub);
Some(new_state)
}
/// Check whether the extern call is direct and if so, taint the extern symbol parameters and
/// remove non callee saved registers.
fn update_call_stub(&self, state_after_call: &State, call: &Term<Jmp>) -> Option<State> {
if state_after_call.is_empty() {
return None;
}
let mut new_state = state_after_call.clone();
match &call.term {
Jmp::Call { target, .. } => {
let source_node = self.get_source_node(&new_state, &call.tid);
if let Some(extern_symbol) = self.extern_symbol_map.get(target) {
new_state = self.taint_generic_function_parameters_and_remove_non_callee_saved(
&new_state,
extern_symbol,
source_node,
)
} else {
panic!("Extern symbol not found.");
}
}
_ => panic!("Malformed control flow graph encountered."),
}
Some(new_state)
}
/// Just returns a copy of the input state.
fn specialize_conditional(
&self,
state: &State,
_condition: &Expression,
_is_true: bool,
) -> Option<State> {
Some(state.clone())
}
}
#[cfg(test)]
mod tests;
use super::*;
use crate::analysis::backward_interprocedural_fixpoint::Context as BackwardContext;
use crate::{
abstract_domain::{BitvectorDomain, DataDomain, PointerDomain, SizedDomain},
analysis::pointer_inference::{Data, State as PointerInferenceState},
bil::Bitvector,
intermediate_representation::{Expression, Variable},
};
// TODO: change actual mock function for blocks to receive a TID parameter and then remove this function
fn mock_block(tid: &str) -> Term<Blk> {
Term {
tid: Tid::new(tid),
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
}
}
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
impl ExternSymbol {
fn mock_string() -> Self {
ExternSymbol {
tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sprintf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI"), Arg::mock_register("RSI")],
return_values: vec![Arg::mock_register("RAX")],
no_return: false,
}
}
}
struct Setup {
project: Project,
state: State,
pi_state: PointerInferenceState,
string_sym: ExternSymbol,
taint_source: Term<Jmp>,
base_eight_offset: DataDomain<BitvectorDomain>,
base_sixteen_offset: DataDomain<BitvectorDomain>,
}
impl Setup {
fn new() -> Self {
let (state, pi_state) = State::mock_with_pi_state();
let stack_id = pi_state.stack_id.clone();
let taint_source = Term {
tid: Tid::new("taint_source"),
term: Jmp::Call {
target: Tid::new("system"),
return_: None,
},
};
let mut project = Project::mock_empty();
let mut sub = Sub::mock("func");
let mut block1 = mock_block("block1");
let block2 = mock_block("block2");
let def1 = Def::assign(
"def1",
Variable::mock("RBP", 8 as u64),
Expression::var("RSP"),
);
let def2 = Def::assign(
"def2",
Variable::mock("RDI", 8 as u64),
Expression::var("RBP").plus_const(-8),
);
let jump = Jmp::call("call_string", "sprintf", Some("block2"));
block1.term.defs.push(def1);
block1.term.defs.push(def2);
block1.term.jmps.push(jump.clone());
sub.term.blocks.push(block1);
sub.term.blocks.push(block2);
project
.program
.term
.extern_symbols
.push(ExternSymbol::mock_string());
project
.program
.term
.extern_symbols
.push(ExternSymbol::mock());
project.program.term.subs.push(sub);
project.program.term.entry_points.push(Tid::new("func"));
project.calling_conventions.push(CallingConvention::mock());
Setup {
project,
state,
pi_state,
string_sym: ExternSymbol::mock_string(),
taint_source,
base_eight_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))),
base_sixteen_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16))),
}
}
}
impl<'a> Context<'a> {
fn mock(
project: &'a Project,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
pi_results: &'a PointerInferenceComputation<'a>,
mem_image: &'a RuntimeMemoryImage,
) -> Self {
let (cwe_sender, _) = crossbeam_channel::unbounded();
Context::new(
project,
mem_image,
pi_results,
string_symbols,
HashMap::new(),
cwe_sender,
)
}
}
#[test]
fn setting_taint_source() {
let setup = Setup::new();
let current_sub = Sub::mock("func");
let mem_image = RuntimeMemoryImage::mock();
let pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
let mem_image = RuntimeMemoryImage::mock();
let mut context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
context.set_taint_source(&setup.taint_source, &String::from("system"), &current_sub);
assert_eq!(context.taint_source, Some(&setup.taint_source));
assert_eq!(context.taint_source_name, Some(String::from("system")));
assert_eq!(context.taint_source_sub, Some(&current_sub));
}
#[test]
fn tainting_string_function_parameters() {
let mut setup = Setup::new();
let rbp_reg = Variable::mock("RBP", 8 as u64); // callee saved -> will point to RSP
let rdi_reg = Variable::mock("RDI", 8 as u64); // parameter 1 -> will point to RBP - 8
let rsi_reg = Variable::mock("RSI", 8 as u64); // parameter 2
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.save_taint_to_memory(&setup.base_sixteen_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let node_id = context
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
let new_state =
context.taint_string_function_parameters(&setup.state, &setup.string_sym, *node_id);
assert_eq!(
new_state.check_if_address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
true
);
assert_eq!(
new_state.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn first_param_pointing_to_memory_taint() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.pi_state
.set_register(&rdi_reg, setup.base_eight_offset.clone());
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let arg = Arg::Register(rdi_reg);
assert_eq!(
context.first_param_points_to_memory_taint(&setup.pi_state, &mut setup.state, &arg),
true
);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
}
#[test]
fn tainting_generic_function_parameters_and_removing_non_callee_saved() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut string_syms: HashMap<Tid, &ExternSymbol> = HashMap::new();
string_syms.insert(Tid::new("sprintf"), &setup.string_sym);
let context = Context::mock(&setup.project, string_syms, &pi_results, &mem_image);
let node_id = context
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
// Test Case 1: String Symbol
let mut new_state = context.taint_generic_function_parameters_and_remove_non_callee_saved(
&setup.state,
&ExternSymbol::mock_string(),
node_id.clone(),
);
// Parameter
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
new_state.remove_all_register_taints();
new_state.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
// Test Case 2: Other Extern Symbol
new_state = context.taint_generic_function_parameters_and_remove_non_callee_saved(
&new_state,
&ExternSymbol::mock(),
node_id.clone(),
);
// Parameter
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(new_state.get_register_taint(&rsi_reg), None);
// TODO: add test for scanf when parameter detection is implemented
}
#[test]
fn tainting_stack_parameters() {
let setup = Setup::new();
let offset = 4 as i64;
let size = ByteSize::new(8);
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let call_source_node = context
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
let new_state =
context.taint_stack_parameters(setup.state, call_source_node.clone(), offset, size);
assert_eq!(
new_state.check_if_address_points_to_taint(
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(4))),
&setup.pi_state
),
true
);
}
#[test]
fn tainting_parameters() {
let setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let params = vec![
Arg::Register(rdi_reg.clone()),
Arg::Register(rsi_reg.clone()),
Arg::Stack {
offset: 4,
size: ByteSize::new(8),
},
];
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let call_source_node = context
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
let new_state = context.taint_parameters(&setup.state, params, call_source_node.clone());
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
assert_eq!(
new_state.check_if_address_points_to_taint(
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(4))),
&setup.pi_state
),
true
);
}
#[test]
fn creating_pi_def_map() {
let setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
let def1 = Tid::new("def1");
let def2 = Tid::new("def2");
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
let start_node = context
.block_start_last_def_map
.get(&(def2.clone(), current_sub.tid.clone()))
.unwrap();
let pi_def_map = context.create_pi_def_map(start_node.clone()).unwrap();
for (def_tid, pi_state) in pi_def_map.iter() {
if *def_tid == def1 {
assert_eq!(
pi_state.get_register(&rdi_reg).unwrap(),
Data::new_top(rdi_reg.size)
);
} else if *def_tid == def2 {
assert_eq!(
pi_state.get_register(&rdi_reg).unwrap(),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8)))
);
}
}
}
#[test]
fn getting_blk_start_node_if_last_def() {
let mut setup = Setup::new();
let def1 = Def::assign(
"def1",
Variable::mock("RBP", 8 as u64),
Expression::var("RSP"),
);
let def2 = Def::assign(
"def2",
Variable::mock("RDI", 8 as u64),
Expression::var("RBP").plus_const(-8),
);
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
let start_node = context
.block_start_last_def_map
.get(&(def2.tid.clone(), current_sub.tid.clone()))
.unwrap();
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def1),
None
);
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def2),
Some(start_node.clone())
);
}
#[test]
fn getting_source_node() {
let mut setup = Setup::new();
let call_tid = Tid::new("call_string");
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
let blk_end_node_id = context
.jmp_to_blk_end_node_map
.get(&(call_tid.clone(), current_sub.tid.clone()))
.unwrap();
assert_eq!(
context.get_source_node(&setup.state, &call_tid),
*blk_end_node_id
);
}
#[test]
fn updating_target_state_for_callsite() {
let mut setup = Setup::new();
let caller_sub = Sub::mock("caller");
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let mut return_state = setup.state.clone();
// Test Case 1: No target state
assert_eq!(
context.update_target_state_for_callsite(None, None, &caller_sub),
None
);
// Test Case 2: Target state but no return state
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rdi_reg, Taint::Tainted(rdi_reg.size));
let new_state = context
.update_target_state_for_callsite(None, Some(&setup.state), &caller_sub)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(*new_state.get_current_sub().as_ref().unwrap(), caller_sub);
// Test Case 3: Target state and return state
return_state.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
let new_state = context
.update_target_state_for_callsite(Some(&return_state), Some(&setup.state), &caller_sub)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
assert_eq!(*new_state.get_current_sub().as_ref().unwrap(), caller_sub);
}
#[test]
fn handling_assign_and_load() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let mock_assign_register = Def::assign(
"assign",
Variable::mock("R9", 8 as u64),
Expression::var("RDI"),
);
let mock_assign_stack = Def::assign(
"stack_assign",
Variable::mock("R9", 8 as u64),
Expression::var("RSP"),
);
let mock_load = Def::load(
"load",
Variable::mock("R9", 8 as u64),
Expression::var("RDI"),
);
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut new_state = context
.update_def(&setup.state, &mock_assign_register)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case: None State
new_state.remove_all_register_taints();
assert_eq!(context.update_def(&new_state, &mock_assign_register), None);
// Test Case: Assign RSP Register
pi_map.insert(Tid::new("stack_assign"), setup.pi_state.clone());
new_state.set_pointer_inference_map(pi_map.clone());
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_assign_stack).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.check_if_address_points_to_taint(
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0))),
&setup.pi_state
),
true
);
// Test Case: Load
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_load).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
}
#[test]
fn updating_def() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let mock_assign_register = Def::assign(
"assign",
Variable::mock("R9", 8 as u64),
Expression::var("RDI"),
);
let mock_assign_stack = Def::assign(
"stack_assign",
Variable::mock("R9", 8 as u64),
Expression::var("RSP"),
);
let mock_load = Def::load(
"load",
Variable::mock("R9", 8 as u64),
Expression::var("RDI"),
);
let mock_store = Def::store("store", Expression::var("R9"), Expression::var("RDI"));
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
let stack_id = setup.pi_state.stack_id.clone();
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
// Test Case: Assign R9 Register
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut new_state = context
.update_def(&setup.state, &mock_assign_register)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case: None State
new_state.remove_all_register_taints();
assert_eq!(context.update_def(&new_state, &mock_assign_register), None);
// Test Case: Assign RSP Register
pi_map.insert(Tid::new("stack_assign"), setup.pi_state.clone());
new_state.set_pointer_inference_map(pi_map.clone());
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_assign_stack).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.check_if_address_points_to_taint(
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0))),
&setup.pi_state
),
true
);
// Test Case: Load
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state = context.update_def(&new_state, &mock_load).unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case: Store
new_state.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.pi_state
.set_register(&r9_reg, setup.base_eight_offset.clone());
new_state.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &Tid::new("store"));
new_state = context.update_def(&new_state, &mock_store).unwrap();
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state,),
false
);
}
#[test]
fn updating_jumpsite() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let mut new_state = context
.update_jumpsite(
&setup.state,
&Jmp::branch("jump", "block2"),
Some(&Jmp::branch("jump", "block2")),
&Blk::mock(),
)
.unwrap();
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(
new_state.get_register_taint(&r9_reg),
Some(&Taint::Tainted(r9_reg.size))
);
assert_eq!(
new_state.check_if_address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
#[test]
fn updating_callsite() {
let mut setup = Setup::new();
let mut return_state: Option<&State> = None;
let mut target_state: Option<&State> = None;
let jump_term = Jmp::call("call_string", "sprintf", Some("block2"));
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let caller_sub = Sub::mock("caller");
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
// Test Case: No return state
assert_eq!(
context.update_callsite(
target_state,
return_state,
&caller_sub,
&jump_term,
&jump_term
),
None
);
// Test Case: Return state but no target state
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
let cloned_state = setup.state.clone();
return_state = Some(&cloned_state);
let mut new_state = context
.update_callsite(
target_state,
return_state,
&caller_sub,
&jump_term,
&jump_term,
)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Test Case: Return and target state
setup.state.remove_all_register_taints();
setup
.state
.set_register_taint(&rdi_reg, Taint::Tainted(rdi_reg.size));
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
target_state = Some(&setup.state);
new_state = context
.update_callsite(
target_state,
return_state,
&caller_sub,
&jump_term,
&jump_term,
)
.unwrap();
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn splitting_call_stub() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let mut new_state = context.split_call_stub(&setup.state).unwrap();
// Set pi_state to check for memory pointers
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(
new_state.get_register_taint(&r9_reg),
Some(&Taint::Tainted(r9_reg.size))
);
assert_eq!(
new_state.check_if_address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
#[test]
fn splitting_return_stub() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let called_sub = Sub::mock("called");
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
// Set pi_state to check for memory pointers
let mut new_state = context
.split_return_stub(&setup.state, &called_sub)
.unwrap();
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(
new_state.get_register_taint(&rax_reg),
Some(&Taint::Tainted(rax_reg.size))
);
assert_eq!(
new_state.check_if_address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
#[test]
fn updating_call_stub() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64); // non callee saved
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let mock_call = Jmp::call("call_string", "sprintf", Some("block2"));
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.save_taint_to_memory(&setup.base_sixteen_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let mut string_symbols: HashMap<Tid, &ExternSymbol> = HashMap::new();
let sprintf = &ExternSymbol::mock_string();
string_symbols.insert(Tid::new("sprintf"), sprintf);
let context = Context::mock(&setup.project, string_symbols, &pi_results, &mem_image);
let current_sub = Sub::mock("func");
setup.state.set_current_sub(&current_sub);
let new_state = context.update_call_stub(&setup.state, &mock_call).unwrap();
assert_eq!(
new_state.check_if_address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
true
);
assert_eq!(
new_state.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
assert_eq!(new_state.get_register_taint(&r9_reg), None);
}
#[test]
fn specializing_conditional() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let mut new_state = context.split_call_stub(&setup.state).unwrap();
let mut pi_map: HashMap<Tid, PointerInferenceState> = HashMap::new();
pi_map.insert(Tid::new("initial"), setup.pi_state);
new_state.set_pointer_inference_map(pi_map);
assert_eq!(
new_state.get_register_taint(&r9_reg),
Some(&Taint::Tainted(r9_reg.size))
);
assert_eq!(
new_state.check_if_address_points_to_taint(
setup.base_eight_offset,
new_state
.get_pointer_inference_state_at_def(&Tid::new("initial"))
.unwrap()
),
true
);
}
use std::collections::{HashMap, HashSet};
use crate::{
abstract_domain::{
AbstractDomain, AbstractIdentifier, BitvectorDomain, MemRegion, SizedDomain,
},
analysis::pointer_inference::{Data, State as PointerInferenceState},
checkers::cwe_476::Taint,
intermediate_representation::{
Arg, CallingConvention, Expression, ExternSymbol, Project, Sub, Variable,
},
prelude::*,
};
#[derive(Serialize, Deserialize, Debug, Eq, Clone)]
pub struct State {
/// The set of currently tainted registers.
register_taint: HashMap<Variable, Taint>,
/// The Taint contained in memory objects
memory_taint: HashMap<AbstractIdentifier, MemRegion<Taint>>,
/// The set of addresses in the binary where string constants reside
string_constants: HashSet<Bitvector>,
/// A map from Def Tids to their corresponding pointer inference state.
/// The pointer inference states are calculated in a forward manner
/// from the BlkStart node when entering a BlkEnd node through a jump.
#[serde(skip_serializing)]
pi_def_map: Option<HashMap<Tid, PointerInferenceState>>,
/// Holds the currently analyzed subroutine term
current_sub: Option<Term<Sub>>,
}
impl PartialEq for State {
/// Two states are equal if the same values are tainted in both states.
///
/// The equality operator ignores the `pi_def_map` field,
/// since it only denotes an intermediate value.
fn eq(&self, other: &Self) -> bool {
self.register_taint == other.register_taint
&& self.memory_taint == other.memory_taint
&& self.string_constants == other.string_constants
}
}
impl AbstractDomain for State {
/// Merge two states.
/// Any value tainted in at least one input state is also tainted in the merged state.
///
/// The used algorithm for merging the taints contained in memory regions is unsound
/// when merging taints that intersect only partially.
/// However, this should not have an effect in practice,
/// since these values are usually unsound and unused by the program anyway.
fn merge(&self, other: &Self) -> Self {
let mut register_taint = self.register_taint.clone();
for (var, other_taint) in other.register_taint.iter() {
if let Some(taint) = self.register_taint.get(var) {
register_taint.insert(var.clone(), taint.merge(other_taint));
} else {
register_taint.insert(var.clone(), *other_taint);
}
}
let mut memory_taint = self.memory_taint.clone();
for (tid, other_mem_region) in other.memory_taint.iter() {
if let Some(mem_region) = memory_taint.get_mut(tid) {
for (index, taint) in other_mem_region.iter() {
mem_region.insert_at_byte_index(*taint, *index);
// Unsound in theory for partially intersecting taints. Should not matter in practice.
}
} else {
memory_taint.insert(tid.clone(), other_mem_region.clone());
}
}
let constants = self.string_constants.clone();
constants.union(&other.string_constants);
State {
register_taint,
memory_taint,
string_constants: constants,
pi_def_map: None, // At nodes this intermediate value can be safely forgotten.
current_sub: self.current_sub.clone(),
}
}
/// The state has no explicit Top element.
fn is_top(&self) -> bool {
false
}
}
impl State {
/// Get a new state in which only the parameter values of the given extern symbol are tainted.
pub fn new(
taint_source: &ExternSymbol,
stack_pointer_register: &Variable,
pi_state: Option<&PointerInferenceState>,
current_sub: &Term<Sub>,
) -> State {
let mut state = State {
register_taint: HashMap::new(),
memory_taint: HashMap::new(),
string_constants: HashSet::new(),
pi_def_map: None,
current_sub: Some(current_sub.clone()),
};
for parameter in taint_source.parameters.iter() {
match parameter {
Arg::Register(var) => {
state
.register_taint
.insert(var.clone(), Taint::Tainted(var.size));
}
Arg::Stack { offset, size } => {
if let Some(pi_state) = pi_state {
let address_exp =
Expression::Var(stack_pointer_register.clone()).plus_const(*offset);
if let Ok(address) = pi_state.eval(&address_exp) {
state.save_taint_to_memory(&address, Taint::Tainted(*size));
}
}
}
}
}
state
}
/// Mark the value at the given address with the given taint.
///
/// If the address points to more than one object,
/// we merge the taint object with the object at the targets,
/// possibly tainting all possible targets.
pub fn save_taint_to_memory(&mut self, address: &Data, taint: Taint) {
if let Data::Pointer(pointer) = address {
if pointer.targets().len() == 1 {
for (mem_id, offset) in pointer.targets().iter() {
if let BitvectorDomain::Value(position) = offset {
if let Some(mem_region) = self.memory_taint.get_mut(mem_id) {
mem_region.add(taint, position.clone());
} else {
let mut mem_region = MemRegion::new(address.bytesize());
mem_region.add(taint, position.clone());
self.memory_taint.insert(mem_id.clone(), mem_region);
}
}
}
} else {
for (mem_id, offset) in pointer.targets().iter() {
if let BitvectorDomain::Value(position) = offset {
if let Some(mem_region) = self.memory_taint.get_mut(mem_id) {
let old_taint = mem_region.get(position.clone(), taint.bytesize());
mem_region.add(old_taint.merge(&taint), position.clone());
} else {
let mut mem_region = MemRegion::new(address.bytesize());
mem_region.add(taint, position.clone());
self.memory_taint.insert(mem_id.clone(), mem_region);
}
}
}
}
}
}
/// Returns the sub of the currently analysed nodes.
pub fn get_current_sub(&self) -> &Option<Term<Sub>> {
&self.current_sub
}
/// Set the current sub to locate the analysis.
pub fn set_current_sub(&mut self, current_sub: &Term<Sub>) {
self.current_sub = Some(current_sub.clone());
}
/// Sets the pointer inference to definition map for the current state.
pub fn set_pi_def_map(&mut self, pi_def_map: Option<HashMap<Tid, PointerInferenceState>>) {
self.pi_def_map = pi_def_map;
}
/// Gets the taint state of a register if there is one.
pub fn get_register_taint(&self, var: &Variable) -> Option<&Taint> {
self.register_taint.get(var)
}
/// Returns an iterator over currently tainted registers.
pub fn get_register_taints(&self) -> std::collections::hash_map::Iter<Variable, Taint> {
self.register_taint.iter()
}
/// Gets the string constant saved at the given address and saves it to the string constants field.
pub fn evaluate_constant(&mut self, constant: Bitvector) {
// TODO: check whether the constant is a valid memory address in the binary
// If so, get the string constant at that memory address and save it in the state
self.string_constants.insert(constant);
}
/// Taints input registers and evaluates constant memory addresses for simple assignments
/// and taints memory if a pointer is overwritten.
/// The taint on the result register is removed.
pub fn set_expression_taint_and_store_constants(
&mut self,
def_tid: &Tid,
result: &Variable,
expression: &Expression,
stack_pointer_register: &Variable,
) {
self.remove_register_taint(result);
match expression {
Expression::Const(constant) => self.evaluate_constant(constant.clone()),
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { .. } => {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
if let Ok(address) = pi_state.get_register(result) {
self.save_taint_to_memory(&address, Taint::Tainted(result.size));
}
}
}
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => {
self.taint_def_input_register(arg, stack_pointer_register, def_tid)
}
_ => (),
}
}
/// Taints the input register of a store instruction and removes the memory taint at the target address.
pub fn taint_value_to_be_stored(
&mut self,
def_tid: &Tid,
target: &Expression,
value: &Expression,
stack_pointer_register: &Variable,
) {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
if let Ok(address) = pi_state.eval(target) {
if self.check_if_address_points_to_taint(address.clone(), &pi_state) {
self.taint_def_input_register(value, stack_pointer_register, def_tid);
self.remove_mem_taint_at_target(&address);
}
}
}
}
}
/// Taints all input register of an expression.
pub fn taint_def_input_register(
&mut self,
expr: &Expression,
stack_pointer_register: &Variable,
def_tid: &Tid,
) {
match expr {
// TODO: Distinguish integer constants from global addresses in evaluate constant
Expression::Const(constant) => self.evaluate_constant(constant.clone()),
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { lhs, rhs, .. } => {
self.taint_def_input_register(lhs, stack_pointer_register, def_tid);
self.taint_def_input_register(rhs, stack_pointer_register, def_tid);
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => {
self.taint_def_input_register(arg, stack_pointer_register, def_tid)
}
_ => (),
}
}
/// Either taints the input register or a memory position if it is the stack pointer register.
pub fn taint_variable_input(
&mut self,
var: &Variable,
stack_pointer_register: &Variable,
def_tid: &Tid,
) {
if var.name == stack_pointer_register.name {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
if let Ok(address) = pi_state.get_register(stack_pointer_register) {
self.save_taint_to_memory(
&address,
Taint::Tainted(stack_pointer_register.size),
);
}
}
}
} else {
self.set_register_taint(var, Taint::Tainted(var.size));
}
}
/// Remove the taint in the specified memory regions at the specified offsets.
pub fn remove_mem_taint_at_target(&mut self, address: &Data) {
if let Data::Pointer(pointer) = address {
for (mem_id, offset) in pointer.targets().iter() {
if let (Some(mem_region), BitvectorDomain::Value(position)) =
(self.memory_taint.get_mut(mem_id), offset.clone())
{
if let Some(taint) = mem_region.get_unsized(position.clone()) {
mem_region
.remove(position, Bitvector::from_u64(u64::from(taint.bytesize())));
}
}
}
}
}
/// Set the taint of a register.
pub fn set_register_taint(&mut self, register: &Variable, taint: Taint) {
if taint.is_top() {
self.register_taint.remove(register);
} else {
self.register_taint.insert(register.clone(), taint);
}
}
/// Removes a specified register taint
pub fn remove_register_taint(&mut self, register: &Variable) {
self.register_taint.remove(register);
}
/// Return true if the memory object with the given ID contains a tainted value.
pub fn check_mem_id_for_taint(&self, id: &AbstractIdentifier) -> bool {
if let Some(mem_object) = self.memory_taint.get(&id) {
for elem in mem_object.values() {
if elem.is_tainted() {
return true;
}
}
}
false
}
/// If the given address points to the stack,
/// return true if and only if the value at that stack position is tainted.
/// If the given address points to a non-stack memory object,
/// return true if the memory object contains any tainted value (at any position).
pub fn check_if_address_points_to_taint(
&self,
address: Data,
pi_state: &PointerInferenceState,
) -> bool {
use crate::analysis::pointer_inference::object::ObjectType;
if let Data::Pointer(pointer) = address {
for (target, offset) in pointer.targets() {
if let Ok(Some(ObjectType::Stack)) = pi_state.memory.get_object_type(target) {
// Only check if the value at the address is tainted
if let (Some(mem_object), BitvectorDomain::Value(target_offset)) =
(self.memory_taint.get(target), offset)
{
if let Some(taint) = mem_object.get_unsized(target_offset.clone()) {
if taint.is_tainted() {
return true;
}
}
}
} else {
// Check whether the memory object contains any taint.
if self.check_mem_id_for_taint(target) {
return true;
}
}
}
}
false
}
/// Removes all taints of registers that are not generic function parameters.
/// Since we don't know the actual calling convention of the call,
/// we approximate the parameters with all parameter registers of the standard calling convention of the project.
pub fn remove_non_parameter_taints_for_generic_function(&mut self, project: &Project) {
if let Some(calling_conv) = project.get_standard_calling_convention() {
let register_names: HashSet<String> =
calling_conv.parameter_register.iter().cloned().collect();
let taints = self.register_taint.clone();
for (register, _) in taints.iter() {
if register_names.get(&register.name).is_none() {
self.register_taint.remove(&register);
}
}
}
}
/// Remove the taint from all registers not contained in the callee-saved register list of the given calling convention.
pub fn remove_non_callee_saved_taint(&mut self, calling_conv: &CallingConvention) {
self.register_taint = self
.register_taint
.iter()
.filter_map(|(register, taint)| {
if calling_conv
.callee_saved_register
.iter()
.any(|callee_saved_reg| register.name == *callee_saved_reg)
{
Some((register.clone(), *taint))
} else {
None
}
})
.collect();
}
/// Remove all register taints except for the return register taints if available
/// This clears the state on the return stub edge
pub fn remove_all_except_return_register_taints(&mut self, return_registers: HashSet<String>) {
let tainted = self.register_taint.clone();
for (register, _taint) in tainted {
if return_registers.get(&register.name).is_none() {
self.register_taint.remove(&register);
}
}
}
/// Check whether `self` contains any taint at all.
pub fn is_empty(&self) -> bool {
self.memory_taint.is_empty() && self.register_taint.is_empty()
}
/// Checks whether the return registers are contained in the current tainted registers
pub fn check_return_registers_for_taint(&self, register_list: Vec<String>) -> bool {
// Check whether a register contains taint
for (register, taint) in &self.register_taint {
if register_list
.iter()
.any(|reg_name| *reg_name == register.name)
&& !taint.is_top()
{
return true;
}
}
false
}
/// Merges callee saved register taints into the current state
pub fn merge_callee_saved_taints_from_return_state(
&mut self,
return_state: &State,
calling_convention: Option<&CallingConvention>,
) {
if let Some(calling_conv) = calling_convention {
let callee_saved_registers: HashSet<String> =
calling_conv.callee_saved_register.iter().cloned().collect();
for (variable, taint) in return_state.get_register_taints() {
if callee_saved_registers.get(&variable.name).is_some() {
self.set_register_taint(variable, *taint);
}
}
}
}
}
#[cfg(test)]
mod tests;
use crate::{
abstract_domain::{DataDomain, PointerDomain},
intermediate_representation::CastOpType,
};
use super::*;
fn extern_symbol(name: &str, return_args: Vec<Arg>) -> ExternSymbol {
ExternSymbol {
tid: Tid::new(name.to_string()),
addresses: vec![],
name: name.into(),
calling_convention: None,
parameters: Vec::new(),
return_values: return_args,
no_return: false,
}
}
fn bv(value: i64) -> BitvectorDomain {
BitvectorDomain::Value(Bitvector::from_i64(value))
}
impl State {
pub fn mock_with_pi_state() -> (State, PointerInferenceState) {
let arg = Arg::Register(Variable::mock("RAX", 8 as u64));
let pi_state =
PointerInferenceState::new(&Variable::mock("RSP", 8 as u64), Tid::new("func"));
let symbol = extern_symbol("system", vec![arg]);
let current_sub = Sub::mock("current");
let mut state = State::new(
&symbol,
&Variable::mock("RSP", 8 as u64),
Some(&pi_state),
&current_sub,
);
state.pi_def_map = Some(HashMap::new());
(state, pi_state)
}
pub fn set_pointer_inference_state_for_def(
&mut self,
pi_state: Option<PointerInferenceState>,
def_tid: &Tid,
) {
if let Some(pi_state) = pi_state {
if let Some(pid_map) = self.pi_def_map.as_mut() {
pid_map.insert(def_tid.clone(), pi_state);
}
}
}
pub fn set_pointer_inference_map(&mut self, pi_state_map: HashMap<Tid, PointerInferenceState>) {
self.pi_def_map = Some(pi_state_map);
}
pub fn get_pointer_inference_state_at_def(
&self,
def_tid: &Tid,
) -> Option<&PointerInferenceState> {
if let Some(pid_map) = self.pi_def_map.as_ref() {
return pid_map.get(def_tid);
}
None
}
pub fn remove_all_register_taints(&mut self) {
self.register_taint = HashMap::new();
}
}
struct Setup {
state: State,
pi_state: PointerInferenceState,
rdi: Variable,
rsi: Variable,
rsp: Variable,
constant: Bitvector,
def_tid: Tid,
stack_pointer: DataDomain<BitvectorDomain>,
base_eight_offset: DataDomain<BitvectorDomain>,
base_sixteen_offset: DataDomain<BitvectorDomain>,
}
impl Setup {
fn new() -> Self {
let (state, pi_state) = State::mock_with_pi_state();
let stack_id = pi_state.stack_id.clone();
Setup {
state,
pi_state,
rdi: Variable::mock("RDI", 8 as u64),
rsi: Variable::mock("RSI", 8 as u64),
rsp: Variable::mock("RSP", 8 as u64),
constant: Bitvector::from_str_radix(16, "ffcc00").unwrap(),
def_tid: Tid::new("def"),
stack_pointer: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0))),
base_eight_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))),
base_sixteen_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16))),
}
}
}
#[test]
fn setting_expression_and_constants() {
let mut setup = Setup::new();
setup
.pi_state
.set_register(&setup.rdi, setup.base_eight_offset.clone());
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Constants
let copy_const_expr = Expression::const_from_apint(setup.constant.clone());
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&copy_const_expr,
&setup.rsp,
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(setup.state.string_constants.len(), 1);
assert_eq!(
setup.state.string_constants.get(&setup.constant),
Some(&setup.constant)
);
// Test Case 2: Variables
let copy_var_expr = Expression::var("RSI");
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&copy_var_expr,
&setup.rsp,
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup.state.get_register_taint(&setup.rsi),
Some(&Taint::Tainted(setup.rsi.size))
);
// Test Case 2.5: Stack Pointer Assignment
let stack_expression = Expression::var("RSP");
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&stack_expression,
&setup.rsp,
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.stack_pointer, &setup.pi_state),
true
);
// Test Case 3: Bin Ops
let bin_op_expr = Expression::var("RBP").plus_const(-8);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&bin_op_expr,
&setup.rsp,
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
true
);
// Test Case 4: Any other Expression
let cast_expr = Expression::var("RDI")
.subpiece(ByteSize::new(0), ByteSize::new(4))
.cast(CastOpType::IntZExt);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup.state.set_expression_taint_and_store_constants(
&setup.def_tid,
&setup.rdi,
&cast_expr,
&setup.rsp,
);
assert_eq!(
setup.state.get_register_taint(&setup.rdi),
Some(&Taint::Tainted(setup.rdi.size))
);
}
#[test]
fn tainting_values_to_be_stored() {
let mut setup = Setup::new();
let stack_pointer = Variable::mock("RSP", 8 as u64);
// Test Case: Memory target is tainted. --> Taint the input register
setup
.pi_state
.set_register(&setup.rdi, setup.base_eight_offset.clone());
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup.state.taint_value_to_be_stored(
&setup.def_tid,
&Expression::var("RDI"),
&Expression::var("RSI"),
&stack_pointer,
);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(
setup.state.get_register_taint(&setup.rsi),
Some(&Taint::Tainted(setup.rsi.size))
);
// Test Case: Memory target is not tainted. --> Do nothing
setup.state.register_taint.remove(&setup.rsi);
setup
.pi_state
.set_register(&setup.rdi, setup.base_sixteen_offset.clone());
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
setup.state.taint_value_to_be_stored(
&setup.def_tid,
&Expression::var("RDI"),
&Expression::var("RSI"),
&stack_pointer,
);
assert_eq!(setup.state.get_register_taint(&setup.rsi), None);
}
#[test]
fn tainting_def_input_register() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
let stack_pointer = Variable::mock("RSP", 8 as u64);
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Variable input
setup
.state
.taint_def_input_register(&Expression::var("RDI"), &stack_pointer, &setup.def_tid);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case 2: Stack Pointer input
setup
.state
.taint_def_input_register(&Expression::var("RSP"), &stack_pointer, &setup.def_tid);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.stack_pointer.clone(), &setup.pi_state),
true
);
setup.state.remove_all_register_taints();
// Test Case 3: Bin Op Input
setup.state.taint_def_input_register(
&Expression::var("RDI").plus_const(8),
&stack_pointer,
&setup.def_tid,
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
setup.state.remove_all_register_taints();
// Test Case 4: Cast Op Input
setup.state.taint_def_input_register(
&Expression::var("RDI").cast(CastOpType::IntZExt),
&stack_pointer,
&setup.def_tid,
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
}
#[test]
fn tainting_variable_input() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8 as u64);
let stack_pointer = Variable::mock("RSP", 8 as u64);
setup
.state
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Register input
setup
.state
.taint_variable_input(&rdi_reg, &stack_pointer, &setup.def_tid);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case 2: Stack Pointer input
setup
.state
.taint_variable_input(&stack_pointer, &stack_pointer, &setup.def_tid);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.stack_pointer.clone(), &setup.pi_state),
true
);
}
#[test]
fn removing_memory_taint_at_target() {
let mut setup = Setup::new();
// Test Case: Memory was tainted and taint is removed
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset.clone(), &setup.pi_state),
true
);
setup
.state
.remove_mem_taint_at_target(&setup.base_eight_offset);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
// Test Case: Memory was not tainted and nothing happens
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_sixteen_offset.clone(), &setup.pi_state),
false
);
setup
.state
.remove_mem_taint_at_target(&setup.base_sixteen_offset);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
false
);
}
#[test]
fn saving_taint_to_memory() {
let mut setup = Setup::new();
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset.clone(), &setup.pi_state),
false
);
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset.clone(), &setup.pi_state),
true
);
}
#[test]
fn removing_non_parameter_taints_for_generic_function() {
let mut setup = Setup::new();
let mut mock_project = Project::mock_empty();
mock_project
.calling_conventions
.push(CallingConvention::mock());
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
setup
.state
.set_register_taint(&setup.rsi, Taint::Tainted(setup.rsi.size));
setup
.state
.remove_non_parameter_taints_for_generic_function(&mock_project);
assert_eq!(
setup.state.get_register_taint(&setup.rdi),
Some(&Taint::Tainted(setup.rdi.size))
);
assert_eq!(setup.state.get_register_taint(&setup.rsi), None);
}
#[test]
fn removing_non_callee_saved_taint() {
let mut setup = Setup::new();
let cconv = CallingConvention::mock();
let rbp_reg = Variable::mock("RBP", 8 as u64);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rsi.size));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup.state.remove_non_callee_saved_taint(&cconv);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup.state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn removing_all_but_return() {
let mut setup = Setup::new();
let mut return_regs: HashSet<String> = HashSet::new();
return_regs.insert("RAX".to_string());
let rax_reg = Variable::mock("RAX", 8 as u64);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rsi.size));
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
setup
.state
.remove_all_except_return_register_taints(return_regs);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
setup.state.get_register_taint(&rax_reg),
Some(&Taint::Tainted(rax_reg.size))
);
}
#[test]
fn checking_if_address_points_to_taint() {
let mut setup = Setup::new();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
true
);
assert_eq!(
setup
.state
.check_if_address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
false
);
}
#[test]
fn checking_return_registers_for_taint() {
let mut setup = Setup::new();
let rax_reg = Variable::mock("RAX", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
// Test Case: Empty Taint
assert_eq!(
setup
.state
.check_return_registers_for_taint(vec!["RAX".to_string()]),
false
);
// Test Case: No return register tainted
setup
.state
.set_register_taint(&rdi_reg, Taint::Tainted(rdi_reg.size));
assert_eq!(
setup
.state
.check_return_registers_for_taint(vec!["RAX".to_string()]),
false
);
// Test Case: Return register tainted
setup
.state
.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
assert_eq!(
setup
.state
.check_return_registers_for_taint(vec!["RAX".to_string()]),
true
);
}
#[cfg(test)]
use apint::ApInt;
#[cfg(test)]
use super::{CastOpType, Variable};
use super::{BinOpType, Expression};
use crate::prelude::*;
/// ## Helper functions for building expressions
impl Expression {
/// Shortcut for creating a constant expression from an i64 value
#[cfg(test)]
pub fn const_from_i64(value: i64) -> Expression {
Expression::Const(Bitvector::from_i64(value))
}
/// Shortcut for creating a constant expression from an apint value (e.g. copy of global address)
#[cfg(test)]
pub fn const_from_apint(value: ApInt) -> Expression {
Expression::Const(value)
}
/// Shortcut for creating a variable expression
#[cfg(test)]
pub fn var(name: &str) -> Expression {
Expression::Var(Variable {
name: name.into(),
size: ByteSize::new(8),
is_temp: false,
})
}
/// Shortcut for creating a cast expression
#[cfg(test)]
pub fn cast(self, op: CastOpType) -> Expression {
Expression::Cast {
op,
size: ByteSize::new(8),
arg: Box::new(self),
}
}
/// Shortcut for creating a subpiece expression
#[cfg(test)]
pub fn subpiece(self, low_byte: ByteSize, size: ByteSize) -> Expression {
Expression::Subpiece {
low_byte,
size,
arg: Box::new(self),
}
}
/// Shortcut for creating an `IntAdd`-expression
pub fn plus(self, rhs: Expression) -> Expression {
Expression::BinOp {
......
......@@ -3,6 +3,8 @@ use crate::prelude::*;
use crate::utils::log::LogMessage;
use std::collections::HashSet;
pub mod builder;
/// A term identifier consisting of an ID string (which is required to be unique)
/// and an address to indicate where the term is located.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
......
#[cfg(test)]
use crate::intermediate_representation::{Expression, Variable};
#[cfg(test)]
use super::{Def, Jmp, Term, Tid};
/// ## Helper functions for building defs
#[cfg(test)]
impl Def {
/// Shortcut for creating a assign def
pub fn assign(tid: &str, var: Variable, value: Expression) -> Term<Def> {
Term {
tid: Tid::new(tid),
term: Def::Assign { var, value },
}
}
/// Shortcut for creating a load def
pub fn load(tid: &str, var: Variable, address: Expression) -> Term<Def> {
Term {
tid: Tid::new(tid),
term: Def::Load { var, address },
}
}
/// Shortcut for creating a store def
pub fn store(tid: &str, address: Expression, value: Expression) -> Term<Def> {
Term {
tid: Tid::new(tid),
term: Def::Store { address, value },
}
}
}
/// ## Helper functions for building jmps
#[cfg(test)]
impl Jmp {
/// Shortcut for creating a call
pub fn call(tid: &str, target_tid: &str, return_tid: Option<&str>) -> Term<Jmp> {
let return_tid = return_tid.map(|tid_name| Tid::new(tid_name));
Term {
tid: Tid::new(tid),
term: Jmp::Call {
target: Tid::new(target_tid),
return_: return_tid,
},
}
}
/// Shortcut for creating a branch
pub fn branch(tid: &str, target_tid: &str) -> Term<Jmp> {
Term {
tid: Tid::new(tid),
term: Jmp::Branch(Tid::new(target_tid)),
}
}
}
......@@ -55,6 +55,7 @@ impl std::fmt::Display for CweModule {
/// Get a list of all known analysis modules.
pub fn get_modules() -> Vec<&'static CweModule> {
vec![
&crate::checkers::cwe_78::CWE_MODULE,
&crate::checkers::cwe_190::CWE_MODULE,
&crate::checkers::cwe_215::CWE_MODULE,
&crate::checkers::cwe_243::CWE_MODULE,
......
{
"CWE78": {
"system_symbols": [
"system",
"execl"
],
"string_symbols": [
"sprintf",
"snprintf",
"strcat",
"strncat"
],
"user_input_symbols": [
"scanf",
"__isoc99_scanf"
]
},
"CWE190": {
"symbols": [
"xmalloc",
......
#include <string.h>
#include <stdlib.h>
int constant_system() {
system("ls");
}
int main(int argc, char **argv) {
char *dest = "usr/bin/cat ";
strcat(dest, argv[1]);
system(dest);
constant_system();
return 0;
}
......@@ -170,6 +170,38 @@ mod tests {
#[test]
#[ignore]
fn cwe_78() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_78", "CWE78");
// Ghidra does not recognize all extern function calls in the disassembly step for MIPS.
// Needs own control flow graph analysis to be fixed.
mark_architecture_skipped(&mut tests, "mips64");
mark_architecture_skipped(&mut tests, "mips64el");
mark_architecture_skipped(&mut tests, "mips");
mark_architecture_skipped(&mut tests, "mipsel");
mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "clang"); // Return value detection insufficient for x86
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // Pointer Inference returns insufficient results for PE
for test_case in tests {
let num_expected_occurences = 1;
if let Err(error) = test_case.run_test("[CWE78]", num_expected_occurences) {
error_log.push((test_case.get_filepath(), error));
}
}
if !error_log.is_empty() {
print_errors(error_log);
panic!();
}
}
#[test]
#[ignore]
fn cwe_190() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_190", "CWE190");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment