Unverified Commit 60f934d6 by Melvin Klimke Committed by GitHub

Variable parameter detection for CWE 78 check (#182)

parent 257d5f2b
......@@ -12,8 +12,16 @@
],
"user_input_symbols": [
"scanf",
"__isoc99_scanf"
]
"__isoc99_scanf",
"sscanf"
],
"format_string_index": {
"sprintf": 1,
"snprintf": 2,
"scanf": 0,
"__isoc99_scanf": 0,
"sscanf": 1
}
},
"CWE190": {
"symbols": [
......
......@@ -6,6 +6,7 @@ edition = "2018"
[dependencies]
apint = "0.2"
regex = "1.4.5"
serde = {version = "1.0", features = ["derive", "rc"]}
serde_json = "1.0"
serde_yaml = "0.8"
......
......@@ -72,14 +72,17 @@ pub static CWE_MODULE: CweModule = CweModule {
};
/// The configuration struct
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Config {
/// The names of the system call symbols
system_symbols: Vec<String>,
/// The names of the string manipulating symbols
string_symbols: Vec<String>,
/// The name of the user input symbols
/// The names of the user input symbols
user_input_symbols: Vec<String>,
/// Contains the index of the format string parameter
/// for external symbols.
format_string_index: HashMap<String, usize>,
}
/// This check searches for system calls and sets their parameters as taint source if available.
......@@ -215,10 +218,13 @@ fn get_entry_sub_to_entry_node_map(
/// - Maps the TID of an extern symbol that take input from the user to the corresponding extern symbol struct.
/// - extern_symbol_map:
/// - Maps the TID of an extern symbol to the extern symbol struct.
/// - format_string_index:
/// - Maps a symbol name to the index of its format string parameter.
pub struct SymbolMaps<'a> {
string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
_user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>,
}
impl<'a> SymbolMaps<'a> {
......@@ -233,11 +239,12 @@ impl<'a> SymbolMaps<'a> {
project,
&config.string_symbols[..],
),
user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
_user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project,
&config.user_input_symbols[..],
),
extern_symbol_map,
format_string_index: config.format_string_index.clone(),
}
}
}
......
......@@ -10,7 +10,6 @@ use crate::{
abstract_domain::{AbstractDomain, DataDomain, IntervalDomain},
analysis::{
forward_interprocedural_fixpoint::Context as PiContext, graph::Graph,
interprocedural_fixpoint_generic::NodeValue,
pointer_inference::PointerInference as PointerInferenceComputation,
pointer_inference::State as PointerInferenceState,
},
......@@ -19,6 +18,8 @@ use crate::{
utils::{binary::RuntimeMemoryImage, log::CweWarning},
};
pub mod parameter_detection;
#[derive(Clone)]
pub struct Context<'a> {
/// A pointer to the corresponding project struct.
......@@ -123,63 +124,6 @@ impl<'a> Context<'a> {
self.pointer_inference_results.get_graph()
}
/// This function taints the registers and stack positions of the parameter pointers for string functions
/// such as sprintf, snprintf, etc.
/// The size parameter is ignored if available (e.g. snprintf, strncat etc.)
pub fn taint_string_function_parameters(
&self,
state: &State,
string_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
// Check whether the parameter points to a tainted memory target
// Since the first parameter of these string functions is also the return parameter,
// this will serve as an indicator whether the function call is relevant to the taint analysis.
let relevant_fuction_call = if let Some(param) = string_symbol.parameters.get(0) {
self.first_param_points_to_memory_taint(pi_state, &mut new_state, param)
} else {
panic!("Missing parameters for string related function!");
};
if relevant_fuction_call {
self.taint_function_arguments(
&mut new_state,
pi_state,
string_symbol.parameters.clone(),
);
}
}
new_state
}
/// Taints register and stack function arguments.
pub fn taint_function_arguments(
&self,
state: &mut State,
pi_state: &PointerInferenceState,
parameters: Vec<Arg>,
) {
for parameter in parameters.iter() {
match parameter {
Arg::Register(var) => state.set_register_taint(var, Taint::Tainted(var.size)),
Arg::Stack { size, .. } => {
if let Ok(address) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
state.save_taint_to_memory(&address, Taint::Tainted(*size))
}
}
}
}
}
/// Checks whether the firt parameter of a string related function points to a taint.
/// If so, removes the taint at the target memory.
pub fn first_param_points_to_memory_taint(
......@@ -198,6 +142,13 @@ impl<'a> Context<'a> {
self.add_temporary_callee_saved_register_taints_to_mem_taints(pi_state, state);
if state.address_points_to_taint(address.clone(), pi_state) {
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
state.remove_callee_saved_taint_if_destination_parameter(
&address,
pi_state,
standard_cconv,
);
}
state.remove_mem_taint_at_target(&address);
points_to_memory_taint = true;
}
......@@ -234,68 +185,6 @@ impl<'a> Context<'a> {
temp_mem_taints
}
/// This function taints the registers and stack positions of the parameter pointers of external functions
/// If the function is one of the specified string functions, the processing of the call is transferred to
/// the string function processor
pub fn taint_generic_function_parameters_and_remove_non_callee_saved(
&self,
state: &State,
symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
// Check if the extern symbol is a string symbol, since the return register is not tainted for these.
// Instead, is has to be checked whether the first function parameter points to a tainted memory address
if self
.symbol_maps
.string_symbol_map
.get(&symbol.tid)
.is_some()
{
new_state.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
new_state = self.taint_string_function_parameters(&new_state, symbol, call_source_node);
} else {
// Check whether the return register is tainted before the call
// If so, taint the parameter registers and memory addresses of possible stack parameters
let return_registers = symbol
.return_values
.iter()
.filter_map(|ret| match ret {
Arg::Register(var) => Some(var.name.clone()),
_ => None,
})
.collect::<Vec<String>>();
if new_state.check_return_registers_for_taint(return_registers) {
new_state
.remove_non_callee_saved_taint(symbol.get_calling_convention(self.project));
// TODO: Parameter detection since targets of input parameters are the return locations
// Taint memory for string inputs
if self
.symbol_maps
.user_input_symbol_map
.get(&symbol.tid)
.is_some()
{
self.generate_cwe_warning(
&new_state.get_current_sub().as_ref().unwrap().term.name,
);
}
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
self.taint_function_arguments(
&mut new_state,
pi_state,
symbol.parameters.clone(),
);
}
}
}
new_state
}
/// Checks whether the current def term is the last def term
/// of its corresponding block and if so, returns the node index of the BlkStart node.
pub fn get_blk_start_node_if_last_def(
......@@ -372,6 +261,7 @@ impl<'a> Context<'a> {
var,
input,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
}
}
......@@ -456,6 +346,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
address,
value,
&self.project.stack_pointer_register,
self.runtime_memory_image,
),
}
......@@ -572,7 +463,7 @@ impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Con
Jmp::Call { target, .. } => {
let source_node = self.get_source_node(&new_state, &call.tid);
if let Some(extern_symbol) = self.symbol_maps.extern_symbol_map.get(target) {
new_state = self.taint_generic_function_parameters_and_remove_non_callee_saved(
new_state = self.taint_generic_extern_symbol_parameters(
&new_state,
extern_symbol,
source_node,
......
use std::collections::{HashMap, HashSet};
use crate::{
abstract_domain::{AbstractDomain, AbstractIdentifier, MemRegion, SizedDomain, TryToBitvec},
abstract_domain::{
AbstractDomain, AbstractIdentifier, DataDomain, IntervalDomain, MemRegion, SizedDomain,
TryToBitvec,
},
analysis::pointer_inference::{Data, State as PointerInferenceState},
checkers::cwe_476::Taint,
intermediate_representation::{
Arg, CallingConvention, Expression, ExternSymbol, Project, Sub, Variable,
},
prelude::*,
utils::binary::RuntimeMemoryImage,
};
#[derive(Serialize, Deserialize, Debug, Eq, Clone)]
......@@ -17,7 +21,7 @@ pub struct State {
/// The Taint contained in memory objects
memory_taint: HashMap<AbstractIdentifier, MemRegion<Taint>>,
/// The set of addresses in the binary where string constants reside
string_constants: HashSet<Bitvector>,
string_constants: HashSet<String>,
/// A map from Def Tids to their corresponding pointer inference state.
/// The pointer inference states are calculated in a forward manner
/// from the BlkStart node when entering a BlkEnd node through a jump.
......@@ -206,10 +210,20 @@ impl State {
}
/// Gets the string constant saved at the given address and saves it to the string constants field.
pub fn evaluate_constant(&mut self, constant: Bitvector) {
// TODO: check whether the constant is a valid memory address in the binary
// If so, get the string constant at that memory address and save it in the state
self.string_constants.insert(constant);
pub fn evaluate_constant(
&mut self,
runtime_memory_image: &RuntimeMemoryImage,
constant: Bitvector,
) {
if runtime_memory_image.is_global_memory_address(&constant) {
match runtime_memory_image.read_string_until_null_terminator(&constant) {
Ok(format_string) => {
self.string_constants.insert(format_string.to_string());
}
// TODO: Change to log
Err(e) => panic!("{}", e),
}
}
}
/// Taints input registers and evaluates constant memory addresses for simple assignments
......@@ -221,10 +235,13 @@ impl State {
result: &Variable,
expression: &Expression,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) {
self.remove_register_taint(result);
match expression {
Expression::Const(constant) => self.evaluate_constant(constant.clone()),
Expression::Const(constant) => {
self.evaluate_constant(runtime_memory_image, constant.clone())
}
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { .. } => {
if let Some(pid_map) = self.pi_def_map.as_ref() {
......@@ -236,9 +253,12 @@ impl State {
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => {
self.taint_def_input_register(arg, stack_pointer_register, def_tid)
}
| Expression::Subpiece { arg, .. } => self.taint_def_input_register(
arg,
stack_pointer_register,
def_tid,
runtime_memory_image,
),
_ => (),
}
}
......@@ -250,12 +270,18 @@ impl State {
target: &Expression,
value: &Expression,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) {
if let Some(pid_map) = self.pi_def_map.as_ref() {
if let Some(pi_state) = pid_map.get(def_tid) {
let address = pi_state.eval(target);
if self.address_points_to_taint(address.clone(), &pi_state) {
self.taint_def_input_register(value, stack_pointer_register, def_tid);
self.taint_def_input_register(
value,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
self.remove_mem_taint_at_target(&address);
}
}
......@@ -268,20 +294,35 @@ impl State {
expr: &Expression,
stack_pointer_register: &Variable,
def_tid: &Tid,
runtime_memory_image: &RuntimeMemoryImage,
) {
match expr {
// TODO: Distinguish integer constants from global addresses in evaluate constant
Expression::Const(constant) => self.evaluate_constant(constant.clone()),
Expression::Const(constant) => {
self.evaluate_constant(runtime_memory_image, constant.clone())
}
Expression::Var(var) => self.taint_variable_input(var, stack_pointer_register, def_tid),
Expression::BinOp { lhs, rhs, .. } => {
self.taint_def_input_register(lhs, stack_pointer_register, def_tid);
self.taint_def_input_register(rhs, stack_pointer_register, def_tid);
self.taint_def_input_register(
lhs,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
self.taint_def_input_register(
rhs,
stack_pointer_register,
def_tid,
runtime_memory_image,
);
}
Expression::UnOp { arg, .. }
| Expression::Cast { arg, .. }
| Expression::Subpiece { arg, .. } => {
self.taint_def_input_register(arg, stack_pointer_register, def_tid)
}
| Expression::Subpiece { arg, .. } => self.taint_def_input_register(
arg,
stack_pointer_register,
def_tid,
runtime_memory_image,
),
_ => (),
}
}
......@@ -400,6 +441,22 @@ impl State {
}
}
/// Removes the taint of a callee saved register if it was identified as the return target of
/// a string symbol.
pub fn remove_callee_saved_taint_if_destination_parameter(
&mut self,
destination_address: &DataDomain<IntervalDomain>,
pi_state: &PointerInferenceState,
standard_cconv: &CallingConvention,
) {
for (var, _) in self.get_callee_saved_register_taints(standard_cconv).iter() {
let callee_saved_address = pi_state.eval(&Expression::Var(var.clone()));
if callee_saved_address == *destination_address {
self.remove_register_taint(var);
}
}
}
/// Remove the taint from all registers not contained in the callee-saved register list of the given calling convention.
pub fn remove_non_callee_saved_taint(&mut self, calling_conv: &CallingConvention) {
self.register_taint = self
......
......@@ -78,7 +78,8 @@ struct Setup {
rdi: Variable,
rsi: Variable,
rsp: Variable,
constant: Bitvector,
constant: String,
constant_address: Bitvector,
def_tid: Tid,
stack_pointer: DataDomain<ValueDomain>,
base_eight_offset: DataDomain<ValueDomain>,
......@@ -95,7 +96,8 @@ impl Setup {
rdi: Variable::mock("RDI", 8 as u64),
rsi: Variable::mock("RSI", 8 as u64),
rsp: Variable::mock("RSP", 8 as u64),
constant: Bitvector::from_str_radix(16, "ffcc00").unwrap(),
constant: String::from("Hello World"),
constant_address: Bitvector::from_u32(12290),
def_tid: Tid::new("def"),
stack_pointer: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(0))),
base_eight_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))),
......@@ -116,7 +118,7 @@ fn setting_expression_and_constants() {
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Constants
let copy_const_expr = Expression::const_from_apint(setup.constant.clone());
let copy_const_expr = Expression::const_from_apint(setup.constant_address);
setup
.state
.set_register_taint(&setup.rdi, Taint::Tainted(setup.rdi.size));
......@@ -126,6 +128,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&copy_const_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(setup.state.string_constants.len(), 1);
......@@ -145,6 +148,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&copy_var_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
......@@ -162,6 +166,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&stack_expression,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
......@@ -182,6 +187,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&bin_op_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rdi), None);
assert_eq!(
......@@ -204,6 +210,7 @@ fn setting_expression_and_constants() {
&setup.rdi,
&cast_expr,
&setup.rsp,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&setup.rdi),
......@@ -231,6 +238,7 @@ fn tainting_values_to_be_stored() {
&Expression::var("RDI"),
&Expression::var("RSI"),
&stack_pointer,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup
......@@ -256,6 +264,7 @@ fn tainting_values_to_be_stored() {
&Expression::var("RDI"),
&Expression::var("RSI"),
&stack_pointer,
&RuntimeMemoryImage::mock(),
);
assert_eq!(setup.state.get_register_taint(&setup.rsi), None);
}
......@@ -271,18 +280,24 @@ fn tainting_def_input_register() {
.set_pointer_inference_state_for_def(Some(setup.pi_state.clone()), &setup.def_tid);
// Test Case 1: Variable input
setup
.state
.taint_def_input_register(&Expression::var("RDI"), &stack_pointer, &setup.def_tid);
setup.state.taint_def_input_register(
&Expression::var("RDI"),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
Some(&Taint::Tainted(rdi_reg.size))
);
// Test Case 2: Stack Pointer input
setup
.state
.taint_def_input_register(&Expression::var("RSP"), &stack_pointer, &setup.def_tid);
setup.state.taint_def_input_register(
&Expression::var("RSP"),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup
......@@ -298,6 +313,7 @@ fn tainting_def_input_register() {
&Expression::var("RDI").plus_const(8),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
......@@ -311,6 +327,7 @@ fn tainting_def_input_register() {
&Expression::var("RDI").cast(CastOpType::IntZExt),
&stack_pointer,
&setup.def_tid,
&RuntimeMemoryImage::mock(),
);
assert_eq!(
setup.state.get_register_taint(&rdi_reg),
......
......@@ -630,6 +630,19 @@ mod tests {
callee_saved_register: vec!["RBP".to_string()],
}
}
pub fn mock_with_parameter_registers(
integer_parameter_register: Vec<String>,
float_parameter_register: Vec<String>,
) -> CallingConvention {
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register,
float_parameter_register,
return_register: vec!["RAX".to_string()],
callee_saved_register: vec!["RBP".to_string()],
}
}
}
impl Arg {
......
......@@ -175,15 +175,29 @@ impl RuntimeMemoryImage {
if address >= segment.base_address
&& address <= segment.base_address + segment.bytes.len() as u64
{
let index = (address - segment.base_address) as usize;
let c_str = std::ffi::CStr::from_bytes_with_nul(&segment.bytes[index..])?;
let start_index = (address - segment.base_address) as usize;
if let Some(end_index) = segment.bytes[start_index..].iter().position(|&b| b == 0) {
let c_str = std::ffi::CStr::from_bytes_with_nul(
&segment.bytes[start_index..start_index + end_index + 1],
)?;
return Ok(c_str.to_str()?);
} else {
return Err(anyhow!("Not a valid string in memory."));
}
}
}
Err(anyhow!("Address is not a valid global memory address."))
}
/// Checks whether the constant is a global memory address.
pub fn is_global_memory_address(&self, constant: &Bitvector) -> bool {
if self.read(constant, constant.bytesize()).is_ok() {
return true;
}
false
}
/// Check whether all addresses in the given interval point to a readable segment in the runtime memory image.
///
/// Returns an error if the address interval intersects more than one memory segment
......@@ -304,6 +318,24 @@ pub mod tests {
write_flag: false,
execute_flag: false,
},
MemorySegment {
bytes: [0x02, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00].to_vec(),
base_address: 0x4000,
read_flag: true,
write_flag: false,
execute_flag: false,
},
MemorySegment {
bytes: [
0x2f, 0x64, 0x65, 0x76, 0x2f, 0x73, 0x64, 0x25, 0x63, 0x25, 0x64, 0x00,
0x63, 0x61, 0x74, 0x20, 0x25, 0x73, 0x00,
]
.to_vec(),
base_address: 0x5000,
read_flag: true,
write_flag: false,
execute_flag: false,
},
],
is_little_endian: true,
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment