Unverified Commit 60f934d6 by Melvin Klimke Committed by GitHub

Variable parameter detection for CWE 78 check (#182)

parent 257d5f2b
......@@ -12,8 +12,16 @@
],
"user_input_symbols": [
"scanf",
"__isoc99_scanf"
]
"__isoc99_scanf",
"sscanf"
],
"format_string_index": {
"sprintf": 1,
"snprintf": 2,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1
}
},
"CWE190": {
"symbols": [
......
......@@ -6,6 +6,7 @@ edition = "2018"
[dependencies]
apint = "0.2"
regex = "1.4.5"
serde = {version = "1.0", features = ["derive", "rc"]}
serde_json = "1.0"
serde_yaml = "0.8"
......
......@@ -72,14 +72,17 @@ pub static CWE_MODULE: CweModule = CweModule {
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Config {
/// The names of the system call symbols
system_symbols: Vec<String>,
/// The names of the string manipulating symbols
string_symbols: Vec<String>,
/// The name of the user input symbols
/// The names of the user input symbols
user_input_symbols: Vec<String>,
/// Contains the index of the format string parameter
/// for external symbols.
format_string_index: HashMap<String, usize>,
}
/// This check searches for system calls and sets their parameters as taint source if available.
......@@ -215,10 +218,13 @@ fn get_entry_sub_to_entry_node_map(
/// - Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
/// - extern_symbol_map:
/// - Maps the TID of an extern symbol to the extern symbol struct.
/// - format_string_index:
/// - Maps a symbol name to the index of its format string parameter.
pub struct SymbolMaps<'a> {
string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
_user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
}
impl<'a> SymbolMaps<'a> {
......@@ -233,11 +239,12 @@ impl<'a> SymbolMaps<'a> {
project,
&config.string_symbols[..],
),
user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
_user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project,
&config.user_input_symbols[..],
),
extern_symbol_map,
format_string_index: config.format_string_index.clone(),
}
}
}
......
......@@ -10,7 +10,6 @@ use crate::{
abstract_domain::{AbstractDomain, DataDomain, IntervalDomain},
analysis::{
forward_interprocedural_fixpoint::Context as PiContext, graph::Graph,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
},
......@@ -19,6 +18,8 @@ use crate::{
utils::{binary::RuntimeMemoryImage, log::CweWarning},
};
pub mod parameter_detection;
#[derive(Clone)]
pub struct Context<'a> {
/// A pointer to the corresponding project struct.
......@@ -123,63 +124,6 @@ impl<'a> Context<'a> {
self.pointer_inference_results.get_graph()
}
/// This function taints the registers and stack positions of the parameter pointers for string functions
/// such as sprintf, snprintf, etc.
/// The size parameter is ignored if available (e.g. snprintf, strncat etc.)
pub fn taint_string_function_parameters(
&self,
state: &State,
string_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
// Check whether the parameter points to a tainted memory target
// Since the first parameter of these string functions is also the return parameter,
// this will serve as an indicator whether the function call is relevant to the taint analysis.
let relevant_fuction_call = if let Some(param) = string_symbol.parameters.get(0) {
self.first_param_points_to_memory_taint(pi_state, &mut new_state, param)
} else {
panic!("Missing parameters for string related function!");
};
if relevant_fuction_call {
self.taint_function_arguments(
&mut new_state,
pi_state,
string_symbol.parameters.clone(),
);
}
}
new_state
}
/// Taints register and stack function arguments.
pub fn taint_function_arguments(
&self,
state: &mut State,
pi_state: &PointerInferenceState,
parameters: Vec<Arg>,
) {
for parameter in parameters.iter() {
match parameter {
Arg::Register(var) => state.set_register_taint(var, Taint::Tainted(var.size)),
Arg::Stack { size, .. } => {
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
state.save_taint_to_memory(&address, Taint::Tainted(*size))
}
}
}
}
}
/// Checks whether the firt parameter of a string related function points to a taint.
/// If so, removes the taint at the target memory.
pub fn first_param_points_to_memory_taint(
......@@ -198,6 +142,13 @@ impl<'a> Context<'a> {
self.add_temporary_callee_saved_register_taints_to_mem_taints(pi_state, state);
if state.address_points_to_taint(address.clone(), pi_state) {
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
state.remove_callee_saved_taint_if_destination_parameter(
&address,
pi_state,
standard_cconv,
);
}
state.remove_mem_taint_at_target(&address);
points_to_memory_taint = true;
}
......@@ -234,68 +185,6 @@ impl<'a> Context<'a> {
temp_mem_taints
}
/// This function taints the registers and stack positions of the parameter pointers of external functions
/// If the function is one of the specified string functions, the processing of the call is transferred to
/// the string function processor
pub fn taint_generic_function_parameters_and_remove_non_callee_saved(
&self,
state: &State,
symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
// Check if the extern symbol is a string symbol, since the return register is not tainted for these.
// Instead, is has to be checked whether the first function parameter points to a tainted memory address
if self
.symbol_maps
.string_symbol_map
.get(&symbol.tid)
.is_some()
{
new_state.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
new_state = self.taint_string_function_parameters(&new_state, symbol, call_source_node);
} else {
// Check whether the return register is tainted before the call
// If so, taint the parameter registers and memory addresses of possible stack parameters
let return_registers = symbol
.return_values
.iter()
.filter_map(|ret| match ret {
Arg::Register(var) => Some(var.name.clone()),
_ => None,
})
.collect::<Vec<String>>();
if new_state.check_return_registers_for_taint(return_registers) {
new_state
.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
// TODO: Parameter detection since targets of input parameters are the return locations
// Taint memory for string inputs
if self
.symbol_maps
.user_input_symbol_map
.get(&symbol.tid)
.is_some()
{
self.generate_cwe_warning(
&new_state.get_current_sub().as_ref().unwrap().term.name,
);
}
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
self.taint_function_arguments(
&mut new_state,
pi_state,
symbol.parameters.clone(),
);
}
}
}
new_state
}
/// Checks whether the current def term is the last def term
/// of its corresponding block and if so, returns the node index of the BlkStart node.
pub fn get_blk_start_node_if_last_def(
......@@ -372,6 +261,7 @@ impl<'a> Context<'a> {
var,
input,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
}
}
......@@ -456,6 +346,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
address,
value,
&self.project.stack_pointer_register,
self.runtime_memory_image,
),
}
......@@ -572,7 +463,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
Jmp::Call { target, .. } => {
let source_node = self.get_source_node(&new_state, &call.tid);
if let Some(extern_symbol) = self.symbol_maps.extern_symbol_map.get(target) {
new_state = self.taint_generic_function_parameters_and_remove_non_callee_saved(
new_state = self.taint_generic_extern_symbol_parameters(
&new_state,
extern_symbol,
source_node,
......
use petgraph::graph::NodeIndex;
use regex::Regex;
use crate::{
abstract_domain::{DataDomain, IntervalDomain, TryToBitvec},
analysis::interprocedural_fixpoint_generic::NodeValue,
intermediate_representation::{Arg, ByteSize, CallingConvention, ExternSymbol, Variable},
};
use crate::{
analysis::pointer_inference::State as PointerInferenceState, checkers::cwe_476::Taint,
};
use super::{Context, State};
impl<'a> Context<'a> {
/// This function determines whether the taint procedure for string related, user input related,
/// or other extern symbols is used based on the symbol's tid.
pub fn taint_generic_extern_symbol_parameters(
&self,
state: &State,
symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
if self.is_string_symbol(symbol) {
return self.taint_extern_string_symbol_parameters(state, symbol, call_source_node);
}
self.taint_other_extern_symbol_parameters(state, call_source_node, symbol)
}
/// Checks whether the current symbol is a string symbol as defined in the symbol configuration.
pub fn is_string_symbol(&self, symbol: &ExternSymbol) -> bool {
self.symbol_maps
.string_symbol_map
.get(&symbol.tid)
.is_some()
}
/// Taints the parameters of a non string related extern symbol if it is relevant to the taint analysis.
/// To determine whether the symbol is relevant, it is checked if either the arch's return registers are tainted
pub fn taint_other_extern_symbol_parameters(
&self,
state: &State,
call_source_node: NodeIndex,
symbol: &ExternSymbol,
) -> State {
let mut new_state = state.clone();
// Check whether the return register is tainted before the call
// If so, taint the parameter registers and memory addresses of possible stack parameters
let return_registers = Context::get_return_registers_from_symbol(symbol);
if new_state.check_return_registers_for_taint(return_registers) {
new_state.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
self.taint_function_parameters(&mut new_state, pi_state, symbol.parameters.clone());
}
}
new_state
}
/// Returns all return registers of a symbol as a vector of strings.
fn get_return_registers_from_symbol(symbol: &ExternSymbol) -> Vec<String> {
symbol
.return_values
.iter()
.filter_map(|ret| match ret {
Arg::Register(var) => Some(var.name.clone()),
_ => None,
})
.collect::<Vec<String>>()
}
/// This function taints the registers and stack positions of the parameter pointers for string functions
/// such as sprintf, snprintf, etc.
/// The size parameter is ignored if available (e.g. snprintf, strncat etc.).
/// If the string function has a variable amount of parameters, the fixed parameters are overwritten
/// as they only represent the destination of the incoming variable parameters.
pub fn taint_extern_string_symbol_parameters(
&self,
state: &State,
string_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
new_state.remove_non_callee_saved_taint(string_symbol.get_calling_convention(self.project));
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
if self.is_relevant_string_function_call(string_symbol, pi_state, &mut new_state) {
let mut parameters = string_symbol.parameters.clone();
if string_symbol.has_var_args {
parameters = self.get_variable_number_parameters(pi_state, string_symbol);
}
self.taint_function_parameters(&mut new_state, pi_state, parameters);
}
}
new_state
}
/// Checks whether a string function call is a relevant call to the taint analysis.
/// Since the first parameter of these string functions is also the return parameter,
/// it is checked whether is points to a tainted memory address.
pub fn is_relevant_string_function_call(
&self,
string_symbol: &ExternSymbol,
pi_state: &PointerInferenceState,
state: &mut State,
) -> bool {
if let Some(param) = string_symbol.parameters.get(0) {
self.first_param_points_to_memory_taint(pi_state, state, param)
} else {
panic!("Missing parameters for string related function!");
}
}
/// Taints register and stack function arguments.
pub fn taint_function_parameters(
&self,
state: &mut State,
pi_state: &PointerInferenceState,
parameters: Vec<Arg>,
) {
for parameter in parameters.iter() {
match parameter {
Arg::Register(var) => state.set_register_taint(var, Taint::Tainted(var.size)),
Arg::Stack { size, .. } => {
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
state.save_taint_to_memory(&address, Taint::Tainted(*size))
}
}
}
}
}
/// Parses the input format string for the corresponding string function.
pub fn get_input_format_string(
&self,
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
format_string_index: usize,
) -> String {
if let Some(format_string) = extern_symbol.parameters.get(format_string_index) {
if let Ok(address) = pi_state.eval_parameter_arg(
format_string,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
self.parse_format_string_destination_and_return_content(address)
} else {
panic!("Could not parse target address of format string pointer.");
}
} else {
panic!(
"No format string parameter at specified index {} for function {}",
format_string_index, extern_symbol.name
);
}
}
/// Parses the destiniation address of the format string.
/// It checks whether the address points to another pointer in memory.
/// If so, it will use the target address of that pointer read the format string from memory.
pub fn parse_format_string_destination_and_return_content(
&self,
address: DataDomain<IntervalDomain>,
) -> String {
if let Ok(address_vector) = address.try_to_bitvec() {
match self
.runtime_memory_image
.read_string_until_null_terminator(&address_vector)
{
Ok(format_string) => format_string.to_string(),
Err(e) => panic!("{}", e),
}
} else {
panic!("Could not translate format string address to bitvector.");
}
}
/// Parses the format string parameters using a regex, determines their data types,
/// and calculates their positions (register or memory).
pub fn parse_format_string_parameters(&self, format_string: &str) -> Vec<(String, ByteSize)> {
let re = Regex::new(r#"%\d{0,2}([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S])"#)
.expect("No valid regex!");
re.captures_iter(format_string)
.map(|cap| {
(
cap[1].to_string(),
self.map_format_specifier_to_bytesize(cap[1].to_string()),
)
})
.collect()
}
/// Maps a given format specifier to the bytesize of its corresponding data type.
pub fn map_format_specifier_to_bytesize(&self, specifier: String) -> ByteSize {
if Context::is_integer(&specifier) {
return self.project.datatype_properties.integer_size;
}
if Context::is_float(&specifier) {
return self.project.datatype_properties.double_size;
}
if Context::is_pointer(&specifier) {
return self.project.datatype_properties.pointer_size;
}
panic!("Unknown format specifier.")
}
/// Returns an argument vector of detected variable parameters if they are of type string.
pub fn get_variable_number_parameters(
&self,
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
) -> Vec<Arg> {
let format_string_index = match self
.symbol_maps
.format_string_index
.get(&extern_symbol.name)
{
Some(index) => *index,
None => panic!("External Symbol does not contain a format string parameter."),
};
let format_string =
self.get_input_format_string(pi_state, extern_symbol, format_string_index);
let parameters = self.parse_format_string_parameters(format_string.as_str());
if parameters
.iter()
.any(|(specifier, _)| Context::is_string(specifier))
{
return self.calculate_parameter_locations(
parameters,
extern_symbol.get_calling_convention(self.project),
format_string_index,
);
}
vec![]
}
/// Calculates the register and stack positions of format string parameters.
/// The parameters are then returned as an argument vector for later tainting.
pub fn calculate_parameter_locations(
&self,
parameters: Vec<(String, ByteSize)>,
calling_convention: &CallingConvention,
format_string_index: usize,
) -> Vec<Arg> {
let mut var_args: Vec<Arg> = Vec::new();
// The number of the remaining integer argument registers are calculated
// from the format string position since it is the last fixed argument.
let mut integer_arg_register_count =
calling_convention.integer_parameter_register.len() - (format_string_index + 1);
let mut float_arg_register_count = calling_convention.float_parameter_register.len();
let mut stack_offset: i64 = 0;
for (type_name, size) in parameters.iter() {
if Context::is_integer(type_name) || Context::is_pointer(type_name) {
if integer_arg_register_count > 0 {
if Context::is_string(type_name) {
let register_name = calling_convention.integer_parameter_register
[calling_convention.integer_parameter_register.len()
- integer_arg_register_count]
.clone();
var_args.push(Context::create_string_register_arg(
self.project.get_pointer_bytesize(),
register_name,
));
}
integer_arg_register_count -= 1;
} else {
if Context::is_string(type_name) {
var_args.push(Context::create_string_stack_arg(*size, stack_offset));
}
stack_offset += u64::from(*size) as i64
}
} else if float_arg_register_count > 0 {
float_arg_register_count -= 1;
} else {
stack_offset += u64::from(*size) as i64;
}
}
var_args
}
/// Creates a string stack parameter given a size and stack offset.
pub fn create_string_stack_arg(size: ByteSize, stack_offset: i64) -> Arg {
Arg::Stack {
offset: stack_offset,
size,
}
}
/// Creates a string register parameter given a register name.
pub fn create_string_register_arg(size: ByteSize, register_name: String) -> Arg {
Arg::Register(Variable {
name: register_name,
size,
is_temp: false,
})
}
/// Checks whether the format specifier is of type int.
pub fn is_integer(specifier: &str) -> bool {
matches!(specifier, "d" | "i" | "o" | "x" | "X" | "u" | "c" | "C")
}
/// Checks whether the format specifier is of type pointer.
pub fn is_pointer(specifier: &str) -> bool {
matches!(specifier, "s" | "S" | "n" | "p")
}
/// Checks whether the format specifier is of type float.
pub fn is_float(specifier: &str) -> bool {
matches!(specifier, "f" | "F" | "e" | "E" | "a" | "A" | "g" | "G")
}
/// Checks whether the format specifier is a string pointer
/// or a string.
pub fn is_string(specifier: &str) -> bool {
matches!(specifier, "s" | "S")
}
}
#[cfg(test)]
mod tests;
use crate::checkers::cwe_476::Taint;
use crate::intermediate_representation::{
Arg, BinOpType, Bitvector, ByteSize, Expression, ExternSymbol, Tid, Variable,
};
use crate::utils::binary::RuntimeMemoryImage;
use crate::{
abstract_domain::{DataDomain, IntervalDomain, PointerDomain},
intermediate_representation::CallingConvention,
};
use crate::{
analysis::pointer_inference::{Data, PointerInference as PointerInferenceComputation},
intermediate_representation::DatatypeProperties,
};
use super::super::tests::{bv, Setup};
use super::Context;
use std::collections::{HashMap, HashSet};
#[test]
fn tainting_generic_extern_symbol_parameters() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut string_syms: HashMap<Tid, &ExternSymbol> = HashMap::new();
string_syms.insert(Tid::new("sprintf"), &setup.string_sym);
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock(
&setup.project,
string_syms,
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
// Test Case 1: String Symbol
let mut new_state = context.taint_generic_extern_symbol_parameters(
&setup.state,
&ExternSymbol::mock_string(),
node_id.clone(),
);
// Parameter
assert_eq!(new_state.get_register_taint(&rdi_reg), None,);
assert_eq!(new_state.get_register_taint(&rsi_reg), None,);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
new_state.remove_all_register_taints();
new_state.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
// Test Case 2: Other Extern Symbol
new_state = context.taint_generic_extern_symbol_parameters(
&new_state,
&ExternSymbol::mock(),
node_id.clone(),
);
// Parameter
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(new_state.get_register_taint(&rsi_reg), None);
// TODO: add test for scanf when parameter detection is implemented
}
#[test]
fn tainting_extern_string_symbol_parameters() {
let mut setup = Setup::new();
let rbp_reg = Variable::mock("RBP", 8 as u64); // callee saved -> will point to RSP
let rdi_reg = Variable::mock("RDI", 8 as u64); // parameter 1 -> will point to RBP - 8
let rsi_reg = Variable::mock("RSI", 8 as u64); // parameter 2
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.save_taint_to_memory(&setup.base_sixteen_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
let new_state =
context.taint_extern_string_symbol_parameters(&setup.state, &setup.string_sym, *node_id);
assert_eq!(
new_state.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
true
);
assert_eq!(
new_state.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(new_state.get_register_taint(&rdi_reg), None,);
assert_eq!(new_state.get_register_taint(&rsi_reg), None,);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn tainting_function_arguments() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8);
let args = vec![
Arg::Register(rdi_reg.clone()),
Arg::Stack {
offset: 24,
size: ByteSize::from(8),
},
];
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(24))),
},
&Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(32))),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
context.taint_function_parameters(&mut setup.state, &setup.pi_state, args);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert!(setup.state.address_points_to_taint(
Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(32))),
&setup.pi_state
));
}
#[test]
fn test_is_string_symbol() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut string_symbol_map: HashMap<Tid, &ExternSymbol> = HashMap::new();
let sprintf_symbol = ExternSymbol::mock_string();
let mut memcpy_symbol = ExternSymbol::mock();
memcpy_symbol.tid = Tid::new("memcpy");
string_symbol_map.insert(Tid::new("sprintf"), &sprintf_symbol);
let context = Context::mock(
&setup.project,
string_symbol_map,
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
assert!(context.is_string_symbol(&sprintf_symbol));
assert!(!context.is_string_symbol(&memcpy_symbol));
}
#[test]
fn test_get_return_registers_from_symbol() {
assert_eq!(
vec!["RAX"],
Context::get_return_registers_from_symbol(&ExternSymbol::mock_string())
);
}
#[test]
fn test_get_input_format_string() {
let mut setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let sprintf_symbol = ExternSymbol::mock_string();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let global_address = Bitvector::from_str_radix(16, "3002").unwrap();
setup.pi_state.set_register(
&Variable::mock("RSI", 8 as u64),
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address)),
);
assert_eq!(
"Hello World",
context.get_input_format_string(&setup.pi_state, &sprintf_symbol, 1)
);
}
#[test]
fn test_parse_format_string_destination_and_return_content() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
// Test Case 2: Global memory location contains string itself.
let string_address_vector = Bitvector::from_str_radix(16, "3002").unwrap();
let string_address = DataDomain::Value(IntervalDomain::new(
string_address_vector.clone(),
string_address_vector,
));
assert_eq!(
"Hello World",
context.parse_format_string_destination_and_return_content(string_address)
);
}
#[test]
fn test_parse_format_string_parameter() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let test_cases: Vec<&str> = vec![
"%s \"%s\" %s",
"ifconfig eth0 add 3ffe:501:ffff:101:2%02x:%02xff:fe%02x:%02x%02x/64",
"/dev/sd%c%d",
"%s: Unable to open \'%s\', errno=%d\n",
];
let properties = DatatypeProperties::mock();
let expected_outputs: Vec<Vec<(String, ByteSize)>> = vec![
vec![
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
],
vec![
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
],
vec![
("c".to_string(), properties.integer_size),
("d".to_string(), properties.integer_size),
],
vec![
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
("d".to_string(), properties.integer_size),
],
];
for (case, output) in test_cases.into_iter().zip(expected_outputs.into_iter()) {
assert_eq!(output, context.parse_format_string_parameters(case));
}
}
#[test]
fn test_map_format_specifier_to_bytesize() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
assert_eq!(
ByteSize::new(4),
context.map_format_specifier_to_bytesize("s".to_string())
);
assert_eq!(
ByteSize::new(8),
context.map_format_specifier_to_bytesize("f".to_string())
);
assert_eq!(
ByteSize::new(4),
context.map_format_specifier_to_bytesize("d".to_string())
);
}
#[test]
#[should_panic]
fn test_map_invalid_format_specifier_to_bytesize() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
context.map_format_specifier_to_bytesize("w".to_string());
}
#[test]
fn test_get_variable_number_parameters() {
let mut setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let sprintf_symbol = ExternSymbol::mock_string();
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 0);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let global_address = Bitvector::from_str_radix(16, "5000").unwrap();
setup.pi_state.set_register(
&Variable::mock("RDI", 8 as u64),
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address)),
);
let mut output: Vec<Arg> = Vec::new();
assert_eq!(
output,
context.get_variable_number_parameters(&setup.pi_state, &sprintf_symbol)
);
output.push(Arg::Stack {
offset: 0,
size: ByteSize::new(4),
});
let global_address = Bitvector::from_str_radix(16, "500c").unwrap();
setup.pi_state.set_register(
&Variable::mock("RDI", 8 as u64),
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address)),
);
assert_eq!(
output,
context.get_variable_number_parameters(&setup.pi_state, &sprintf_symbol)
);
}
#[test]
fn test_calculate_parameter_locations() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let cconv = CallingConvention::mock_with_parameter_registers(
vec![
"RDI".to_string(),
"RSI".to_string(),
"R8".to_string(),
"R9".to_string(),
],
vec!["XMM0".to_string()],
);
let format_string_index: usize = 1;
let mut parameters: Vec<(String, ByteSize)> = Vec::new();
parameters.push(("d".to_string(), ByteSize::new(4)));
parameters.push(("f".to_string(), ByteSize::new(8)));
parameters.push(("s".to_string(), ByteSize::new(4)));
let mut expected_args = vec![Arg::Register(Variable::mock("R9", ByteSize::new(8)))];
// Test Case 1: The string parameter is still written in the R9 register since 'f' is contained in the float register.
assert_eq!(
expected_args,
context.calculate_parameter_locations(parameters.clone(), &cconv, format_string_index)
);
parameters.push(("s".to_string(), ByteSize::new(4)));
expected_args.push(Arg::Stack {
offset: 0,
size: ByteSize::new(4),
});
// Test Case 2: A second string parameter does not fit into the registers anymore and is written into the stack.
assert_eq!(
expected_args,
context.calculate_parameter_locations(parameters, &cconv, format_string_index)
);
}
#[test]
fn test_create_string_stack_arg() {
assert_eq!(
Arg::Stack {
size: ByteSize::new(8),
offset: 8,
},
Context::create_string_stack_arg(ByteSize::new(8), 8),
)
}
#[test]
fn test_create_string_register_arg() {
assert_eq!(
Arg::Register(Variable::mock("R9", ByteSize::new(8))),
Context::create_string_register_arg(ByteSize::new(8), "R9".to_string()),
);
}
#[test]
fn test_is_integer() {
assert!(Context::is_integer("d"));
assert!(Context::is_integer("i"));
assert!(!Context::is_integer("f"));
}
#[test]
fn test_is_pointer() {
assert!(Context::is_pointer("s"));
assert!(Context::is_pointer("S"));
assert!(Context::is_pointer("n"));
assert!(Context::is_pointer("p"));
assert!(!Context::is_pointer("g"));
}
#[test]
fn test_is_float() {
assert!(Context::is_float("f"));
assert!(Context::is_float("A"));
assert!(!Context::is_float("s"));
}
#[test]
fn test_is_string() {
assert!(Context::is_string("s"));
assert!(Context::is_string("S"));
assert!(!Context::is_string("g"));
}
......@@ -21,12 +21,12 @@ fn mock_block(tid: &str) -> Term<Blk> {
}
}
fn bv(value: i64) -> ValueDomain {
pub fn bv(value: i64) -> ValueDomain {
ValueDomain::from(Bitvector::from_i64(value))
}
impl ExternSymbol {
fn mock_string() -> Self {
pub fn mock_string() -> Self {
ExternSymbol {
tid: Tid::new("sprintf"),
addresses: vec!["UNKNOWN".to_string()],
......@@ -39,18 +39,18 @@ impl ExternSymbol {
}
}
}
struct Setup {
project: Project,
state: State,
pi_state: PointerInferenceState,
string_sym: ExternSymbol,
taint_source: Term<Jmp>,
base_eight_offset: DataDomain<ValueDomain>,
base_sixteen_offset: DataDomain<ValueDomain>,
pub struct Setup {
pub project: Project,
pub state: State,
pub pi_state: PointerInferenceState,
pub string_sym: ExternSymbol,
pub taint_source: Term<Jmp>,
pub base_eight_offset: DataDomain<ValueDomain>,
pub base_sixteen_offset: DataDomain<ValueDomain>,
}
impl Setup {
fn new() -> Self {
pub fn new() -> Self {
let (state, pi_state) = State::mock_with_pi_state();
let stack_id = pi_state.stack_id.clone();
let taint_source = Term {
......@@ -74,9 +74,15 @@ impl Setup {
Variable::mock("RDI", 8 as u64),
Expression::var("RBP").plus_const(-8),
);
let def3 = Def::assign(
"def3",
Variable::mock("RSI", 8 as u64),
Expression::Const(Bitvector::from_str_radix(16, "3002").unwrap()),
);
let jump = Jmp::call("call_string", "sprintf", Some("block2"));
block1.term.defs.push(def1);
block1.term.defs.push(def2);
block1.term.defs.push(def3);
block1.term.jmps.push(jump.clone());
sub.term.blocks.push(block1);
sub.term.blocks.push(block2);
......@@ -107,9 +113,11 @@ impl Setup {
}
impl<'a> Context<'a> {
fn mock(
pub fn mock(
project: &'a Project,
string_symbols: HashMap<Tid, &'a ExternSymbol>,
user_input_symbols: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
pi_results: &'a PointerInferenceComputation<'a>,
mem_image: &'a RuntimeMemoryImage,
) -> Self {
......@@ -154,8 +162,9 @@ impl<'a> Context<'a> {
let symbol_maps: SymbolMaps = SymbolMaps {
string_symbol_map: string_symbols,
user_input_symbol_map: HashMap::new(),
_user_input_symbol_map: user_input_symbols,
extern_symbol_map,
format_string_index,
};
Context::new(
......@@ -179,7 +188,14 @@ fn setting_taint_source() {
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
let mem_image = RuntimeMemoryImage::mock();
let mut context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let mut context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
context.set_taint_source(&setup.taint_source, &String::from("system"), &current_sub);
assert_eq!(context.taint_source, Some(&setup.taint_source));
......@@ -188,110 +204,6 @@ fn setting_taint_source() {
}
#[test]
fn tainting_string_function_parameters() {
let mut setup = Setup::new();
let rbp_reg = Variable::mock("RBP", 8 as u64); // callee saved -> will point to RSP
let rdi_reg = Variable::mock("RDI", 8 as u64); // parameter 1 -> will point to RBP - 8
let rsi_reg = Variable::mock("RSI", 8 as u64); // parameter 2
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.save_taint_to_memory(&setup.base_sixteen_offset, Taint::Tainted(ByteSize::new(8)));
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
let new_state =
context.taint_string_function_parameters(&setup.state, &setup.string_sym, *node_id);
assert_eq!(
new_state.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
true
);
assert_eq!(
new_state.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
}
#[test]
fn tainting_function_arguments() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8);
let args = vec![
Arg::Register(rdi_reg.clone()),
Arg::Stack {
offset: 24,
size: ByteSize::from(8),
},
];
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(24))),
},
&Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(32))),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
context.taint_function_arguments(&mut setup.state, &setup.pi_state, args);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert!(setup.state.address_points_to_taint(
Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(32))),
&setup.pi_state
));
}
#[test]
fn adding_temporary_callee_saved_register_taints_to_mem_taints() {
let mut setup = Setup::new();
let rbp_reg = Variable::mock("RBP", 8 as u64);
......@@ -314,7 +226,14 @@ fn adding_temporary_callee_saved_register_taints_to_mem_taints() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let result = context.add_temporary_callee_saved_register_taints_to_mem_taints(
&setup.pi_state,
......@@ -344,7 +263,14 @@ fn first_param_pointing_to_memory_taint() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let arg = Arg::Register(rdi_reg);
assert_eq!(
......@@ -360,99 +286,13 @@ fn first_param_pointing_to_memory_taint() {
}
#[test]
fn tainting_generic_function_parameters_and_removing_non_callee_saved() {
let mut setup = Setup::new();
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let rsi_reg = Variable::mock("RSI", 8 as u64);
let rax_reg = Variable::mock("RAX", 8 as u64);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.state
.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
setup
.state
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut string_syms: HashMap<Tid, &ExternSymbol> = HashMap::new();
string_syms.insert(Tid::new("sprintf"), &setup.string_sym);
let context = Context::mock(&setup.project, string_syms, &pi_results, &mem_image);
let node_id = context
.block_maps
.jmp_to_blk_end_node_map
.get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap();
// Test Case 1: String Symbol
let mut new_state = context.taint_generic_function_parameters_and_remove_non_callee_saved(
&setup.state,
&ExternSymbol::mock_string(),
node_id.clone(),
);
// Parameter
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
new_state.remove_all_register_taints();
new_state.set_register_taint(&rbp_reg, Taint::Tainted(rbp_reg.size));
new_state.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
new_state.set_register_taint(&rax_reg, Taint::Tainted(rax_reg.size));
// Test Case 2: Other Extern Symbol
new_state = context.taint_generic_function_parameters_and_remove_non_callee_saved(
&new_state,
&ExternSymbol::mock(),
node_id.clone(),
);
// Parameter
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Callee Saved
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
);
// Non Callee Saved
assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(new_state.get_register_taint(&rsi_reg), None);
// TODO: add test for scanf when parameter detection is implemented
}
#[test]
fn creating_pi_def_map() {
let setup = Setup::new();
let rsi_reg = Variable::mock("RSI", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
let def1 = Tid::new("def1");
let def2 = Tid::new("def2");
let def3 = Tid::new("def3");
let stack_id = setup.pi_state.stack_id.clone();
......@@ -461,23 +301,30 @@ fn creating_pi_def_map() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
let start_node = context
.block_maps
.block_start_last_def_map
.get(&(def2.clone(), current_sub.tid.clone()))
.get(&(def3.clone(), current_sub.tid.clone()))
.unwrap();
let pi_def_map = context.create_pi_def_map(start_node.clone()).unwrap();
for (def_tid, pi_state) in pi_def_map.iter() {
if *def_tid == def1 {
assert_eq!(pi_state.get_register(&rdi_reg), Data::new_top(rdi_reg.size));
assert_eq!(pi_state.get_register(&rsi_reg), Data::new_top(rsi_reg.size));
} else if *def_tid == def2 {
assert_eq!(
pi_state.get_register(&rdi_reg),
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8)))
Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))),
);
}
}
......@@ -497,19 +344,32 @@ fn getting_blk_start_node_if_last_def() {
Expression::var("RBP").plus_const(-8),
);
let def3 = Def::assign(
"def3",
Variable::mock("RDI", 8 as u64),
Expression::Const(Bitvector::from_str_radix(16, "3002").unwrap()),
);
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
let start_node = context
.block_maps
.block_start_last_def_map
.get(&(def2.tid.clone(), current_sub.tid.clone()))
.get(&(def3.tid.clone(), current_sub.tid.clone()))
.unwrap();
assert_eq!(
......@@ -518,6 +378,10 @@ fn getting_blk_start_node_if_last_def() {
);
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def2),
None
);
assert_eq!(
context.get_blk_start_node_if_last_def(&setup.state, &def3),
Some(start_node.clone())
);
}
......@@ -532,7 +396,14 @@ fn getting_source_node() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
......@@ -561,7 +432,14 @@ fn updating_target_state_for_callsite() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut return_state = setup.state.clone();
......@@ -638,7 +516,14 @@ fn handling_assign_and_load() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
......@@ -715,7 +600,14 @@ fn updating_def() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let current_sub = setup.project.program.term.subs.get(0).unwrap();
setup.state.set_current_sub(current_sub);
......@@ -797,7 +689,14 @@ fn updating_jumpsite() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut new_state = context
.update_jumpsite(
......@@ -844,7 +743,14 @@ fn updating_callsite() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
// Test Case: No return state
assert_eq!(
......@@ -936,7 +842,14 @@ fn splitting_call_stub() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut new_state = context.split_call_stub(&setup.state).unwrap();
......@@ -982,7 +895,14 @@ fn splitting_return_stub() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
// Set pi_state to check for memory pointers
let mut new_state = context
......@@ -1040,8 +960,17 @@ fn updating_call_stub() {
let mut string_symbols: HashMap<Tid, &ExternSymbol> = HashMap::new();
let sprintf = &ExternSymbol::mock_string();
string_symbols.insert(Tid::new("sprintf"), sprintf);
let context = Context::mock(&setup.project, string_symbols, &pi_results, &mem_image);
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock(
&setup.project,
string_symbols,
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
let current_sub = Sub::mock("func");
setup.state.set_current_sub(&current_sub);
......@@ -1055,14 +984,8 @@ fn updating_call_stub() {
new_state.address_points_to_taint(setup.base_eight_offset, &setup.pi_state),
false
);
assert_eq!(
new_state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
assert_eq!(
new_state.get_register_taint(&rsi_reg),
Some(&Taint::Tainted(rsi_reg.size))
);
assert_eq!(new_state.get_register_taint(&rdi_reg), None,);
assert_eq!(new_state.get_register_taint(&rsi_reg), None,);
assert_eq!(
new_state.get_register_taint(&rbp_reg),
Some(&Taint::Tainted(rbp_reg.size))
......@@ -1087,7 +1010,14 @@ fn specializing_conditional() {
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(&setup.project, HashMap::new(), &pi_results, &mem_image);
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
let mut new_state = context.split_call_stub(&setup.state).unwrap();
......
use std::collections::{HashMap, HashSet};
use crate::{
abstract_domain::{AbstractDomain, AbstractIdentifier, MemRegion, SizedDomain, TryToBitvec},
abstract_domain::{
AbstractDomain, AbstractIdentifier, DataDomain, IntervalDomain, MemRegion, SizedDomain,
TryToBitvec,
},
analysis::pointer_inference::{Data, State as PointerInferenceState},
checkers::cwe_476::Taint,
intermediate_representation::{
Arg, CallingConvention, Expression, ExternSymbol, Project, Sub, Variable,
},
prelude::*,
utils::binary::RuntimeMemoryImage,
};
#[derive(Serialize, Deserialize, Debug, Eq, Clone)]
......@@ -17,7 +21,7 @@ pub struct State {
/// The Taint contained in memory objects
memory_taint: HashMap<AbstractIdentifier, MemRegion<Taint>>,
/// The set of addresses in the binary where string constants reside
string_constants: HashSet<Bitvector>,
string_constants: HashSet<String>,
/// A map from Def Tids to their corresponding pointer inference state.
/// The pointer inference states are calculated in a forward manner
/// from the BlkStart node when entering a BlkEnd node through a jump.
......@@ -206,10 +210,20 @@ impl State {
}
/// Gets the string constant saved at the given address and saves it to the string constants field.
pub fn evaluate_constant(&mut self, constant: Bitvector) {
// TODO: check whether the constant is a valid memory address in the binary
// If so, get the string constant at that memory address and save it in the state
self.string_constants.insert(constant);
pub fn evaluate_constant(
&mut self,
runtime_memory_image: &RuntimeMemoryImage,
constant: Bitvector,
) {
if runtime_memory_image.is_global_memory_address(&constant) {
match runtime_memory_image.read_string_until_null_terminator(&constant) {
Ok(format_string) => {
self.string_constants.insert(format_string.to_string());
}
// TODO: Change to log
Err(e) => panic!("{}", e),
}
}
}
/// Taints input registers and evaluates constant memory addresses for simple assignments
......@@ -221,10 +235,13 @@ impl State {
result: &Variable,
expression: &Expression,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) {
self.remove_register_taint(result);
match expression {
Expression::Const(constant) => self.evaluate_constant(constant.clone()),
Expression::Const(constant) => {
self.evaluate_constant(runtime_memory_image, constant.clone())
}
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { .. } => {
if let Some(pid_map) = self.pi_def_map.as_ref() {
......@@ -236,9 +253,12 @@ impl State {
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => {
self.taint_def_input_register(arg, stack_pointer_register, def_tid)
}
| Expression::Subpiece { arg, .. } => self.taint_def_input_register(
arg,
stack_pointer_register,
def_tid,
runtime_memory_image,
),
_ => (),
}
}
......@@ -250,12 +270,18 @@ impl State {
target: &Expression,
value: &Expression,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
let address = pi_state.eval(target);
if self.address_points_to_taint(address.clone(), &pi_state) {
self.taint_def_input_register(value, stack_pointer_register, def_tid);
self.taint_def_input_register(
value,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
self.remove_mem_taint_at_target(&address);
}
}
......@@ -268,20 +294,35 @@ impl State {
expr: &Expression,
stack_pointer_register: &Variable,
def_tid: &Tid,
runtime_memory_image: &RuntimeMemoryImage,
) {
match expr {
// TODO: Distinguish integer constants from global addresses in evaluate constant
Expression::Const(constant) => self.evaluate_constant(constant.clone()),
Expression::Const(constant) => {
self.evaluate_constant(runtime_memory_image, constant.clone())
}
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { lhs, rhs, .. } => {
self.taint_def_input_register(lhs, stack_pointer_register, def_tid);
self.taint_def_input_register(rhs, stack_pointer_register, def_tid);
self.taint_def_input_register(
lhs,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
self.taint_def_input_register(
rhs,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => {
self.taint_def_input_register(arg, stack_pointer_register, def_tid)
}
| Expression::Subpiece { arg, .. } => self.taint_def_input_register(
arg,
stack_pointer_register,
def_tid,
runtime_memory_image,
),
_ => (),
}
}
......@@ -400,6 +441,22 @@ impl State {
}
}
/// Removes the taint of a callee saved register if it was identified as the return target of
/// a string symbol.
pub fn remove_callee_saved_taint_if_destination_parameter(
&mut self,
destination_address: &DataDomain<IntervalDomain>,
pi_state: &PointerInferenceState,
standard_cconv: &CallingConvention,
) {
for (var, _) in self.get_callee_saved_register_taints(standard_cconv).iter() {
let callee_saved_address = pi_state.eval(&Expression::Var(var.clone()));
if callee_saved_address == *destination_address {
self.remove_register_taint(var);
}
}
}
/// Remove the taint from all registers not contained in the callee-saved register list of the given calling convention.
pub fn remove_non_callee_saved_taint(&mut self, calling_conv: &CallingConvention) {
self.register_taint = self
......
......@@ -78,7 +78,8 @@ struct Setup {
rdi: Variable,
rsi: Variable,
rsp: Variable,
constant: Bitvector,
constant: String,
constant_address: Bitvector,
def_tid: Tid,
stack_pointer: DataDomain<ValueDomain>,
base_eight_offset: DataDomain<ValueDomain>,
......@@ -95,7 +96,8 @@ impl Setup {
rdi: Variable::mock("RDI", 8 as u64),
rsi: Variable::mock("RSI", 8 as u64),
rsp: Variable::mock("RSP", 8 as u64),
constant: Bitvector::from_str_radix(16, "ffcc00").unwrap(),
constant: String::from("Hello World"),
constant_address: Bitvector::from_u32(12290),
def_tid: Tid::new("def"),
stack_pointer: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0))),
base_eight_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))),
......@@ -116,7 +118,7 @@ fn setting_expression_and_constants() {
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Constants
let copy_const_expr = Expression::const_from_apint(setup.constant.clone());
let copy_const_expr = Expression::const_from_apint(setup.constant_address);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
......@@ -126,6 +128,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&copy_const_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(setup.state.string_constants.len(), 1);
......@@ -145,6 +148,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&copy_var_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
......@@ -162,6 +166,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&stack_expression,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
......@@ -182,6 +187,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&bin_op_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
......@@ -204,6 +210,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&cast_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&setup.rdi),
......@@ -231,6 +238,7 @@ fn tainting_values_to_be_stored() {
&Expression::var("RDI"),
&Expression::var("RSI"),
&stack_pointer,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup
......@@ -256,6 +264,7 @@ fn tainting_values_to_be_stored() {
&Expression::var("RDI"),
&Expression::var("RSI"),
&stack_pointer,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rsi), None);
}
......@@ -271,18 +280,24 @@ fn tainting_def_input_register() {
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Variable input
setup
.state
.taint_def_input_register(&Expression::var("RDI"), &stack_pointer, &setup.def_tid);
setup.state.taint_def_input_register(
&Expression::var("RDI"),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case 2: Stack Pointer input
setup
.state
.taint_def_input_register(&Expression::var("RSP"), &stack_pointer, &setup.def_tid);
setup.state.taint_def_input_register(
&Expression::var("RSP"),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup
......@@ -298,6 +313,7 @@ fn tainting_def_input_register() {
&Expression::var("RDI").plus_const(8),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
......@@ -311,6 +327,7 @@ fn tainting_def_input_register() {
&Expression::var("RDI").cast(CastOpType::IntZExt),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
......
......@@ -630,6 +630,19 @@ mod tests {
callee_saved_register: vec!["RBP".to_string()],
}
}
pub fn mock_with_parameter_registers(
integer_parameter_register: Vec<String>,
float_parameter_register: Vec<String>,
) -> CallingConvention {
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register,
float_parameter_register,
return_register: vec!["RAX".to_string()],
callee_saved_register: vec!["RBP".to_string()],
}
}
}
impl Arg {
......
......@@ -175,15 +175,29 @@ impl RuntimeMemoryImage {
if address >= segment.base_address
&& address <= segment.base_address + segment.bytes.len() as u64
{
let index = (address - segment.base_address) as usize;
let c_str = std::ffi::CStr::from_bytes_with_nul(&segment.bytes[index..])?;
let start_index = (address - segment.base_address) as usize;
if let Some(end_index) = segment.bytes[start_index..].iter().position(|&b| b == 0) {
let c_str = std::ffi::CStr::from_bytes_with_nul(
&segment.bytes[start_index..start_index + end_index + 1],
)?;
return Ok(c_str.to_str()?);
} else {
return Err(anyhow!("Not a valid string in memory."));
}
}
}
Err(anyhow!("Address is not a valid global memory address."))
}
/// Checks whether the constant is a global memory address.
pub fn is_global_memory_address(&self, constant: &Bitvector) -> bool {
if self.read(constant, constant.bytesize()).is_ok() {
return true;
}
false
}
/// Check whether all addresses in the given interval point to a readable segment in the runtime memory image.
///
/// Returns an error if the address interval intersects more than one memory segment
......@@ -304,6 +318,24 @@ pub mod tests {
write_flag: false,
execute_flag: false,
},
MemorySegment {
bytes: [0x02, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00].to_vec(),
base_address: 0x4000,
read_flag: true,
write_flag: false,
execute_flag: false,
},
MemorySegment {
bytes: [
0x2f, 0x64, 0x65, 0x76, 0x2f, 0x73, 0x64, 0x25, 0x63, 0x25, 0x64, 0x00,
0x63, 0x61, 0x74, 0x20, 0x25, 0x73, 0x00,
]
.to_vec(),
base_address: 0x5000,
read_flag: true,
write_flag: false,
execute_flag: false,
},
],
is_little_endian: true,
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment