Unverified Commit c6a2741b by Melvin Klimke Committed by GitHub

Detect scanf and sscanf calls in CWE-78 check (#184)

parent 60f934d6
...@@ -13,14 +13,16 @@ ...@@ -13,14 +13,16 @@
"user_input_symbols": [ "user_input_symbols": [
"scanf", "scanf",
"__isoc99_scanf", "__isoc99_scanf",
"sscanf" "sscanf",
"__isoc99_sscanf"
], ],
"format_string_index": { "format_string_index": {
"sprintf": 1, "sprintf": 1,
"snprintf": 2, "snprintf": 2,
"scanf": 0, "scanf": 0,
"__isoc99_scanf": 0, "__isoc99_scanf": 0,
"sscanf": 1 "sscanf": 1,
"__isoc99_sscanf": 1
} }
}, },
"CWE190": { "CWE190": {
......
...@@ -100,6 +100,18 @@ impl<'a> Node<'a> { ...@@ -100,6 +100,18 @@ impl<'a> Node<'a> {
} }
} }
} }
/// Get the sub corresponding to the node for `BlkStart` and `BlkEnd` nodes.
/// panics if called on a `CallReturn` node.
pub fn get_sub(&self) -> &'a Term<Sub> {
use Node::*;
match self {
BlkStart(_blk, sub) | BlkEnd(_blk, sub) => sub,
CallSource { .. } | CallReturn { .. } => {
panic!("get_sub() is undefined for CallReturn and CallSource nodes")
}
}
}
} }
impl<'a> std::fmt::Display for Node<'a> { impl<'a> std::fmt::Display for Node<'a> {
......
...@@ -515,5 +515,10 @@ mod tests { ...@@ -515,5 +515,10 @@ mod tests {
let (log_sender, _) = crossbeam_channel::unbounded(); let (log_sender, _) = crossbeam_channel::unbounded();
PointerInference::new(project, mem_image, graph, config, log_sender) PointerInference::new(project, mem_image, graph, config, log_sender)
} }
pub fn set_node_value(&mut self, node_value: State, node_index: NodeIndex) {
self.computation
.set_node_value(node_index, NodeValue::Value(node_value));
}
} }
} }
...@@ -222,7 +222,7 @@ fn get_entry_sub_to_entry_node_map( ...@@ -222,7 +222,7 @@ fn get_entry_sub_to_entry_node_map(
/// - Maps a symbol name to the index of its format string parameter. /// - Maps a symbol name to the index of its format string parameter.
pub struct SymbolMaps<'a> { pub struct SymbolMaps<'a> {
string_symbol_map: HashMap<Tid, &'a ExternSymbol>, string_symbol_map: HashMap<Tid, &'a ExternSymbol>,
_user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>, user_input_symbol_map: HashMap<Tid, &'a ExternSymbol>,
extern_symbol_map: HashMap<Tid, &'a ExternSymbol>, extern_symbol_map: HashMap<Tid, &'a ExternSymbol>,
format_string_index: HashMap<String, usize>, format_string_index: HashMap<String, usize>,
} }
...@@ -239,7 +239,7 @@ impl<'a> SymbolMaps<'a> { ...@@ -239,7 +239,7 @@ impl<'a> SymbolMaps<'a> {
project, project,
&config.string_symbols[..], &config.string_symbols[..],
), ),
_user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map( user_input_symbol_map: crate::utils::symbol_utils::get_symbol_map(
project, project,
&config.user_input_symbols[..], &config.user_input_symbols[..],
), ),
......
...@@ -3,7 +3,10 @@ use regex::Regex; ...@@ -3,7 +3,10 @@ use regex::Regex;
use crate::{ use crate::{
abstract_domain::{DataDomain, IntervalDomain, TryToBitvec}, abstract_domain::{DataDomain, IntervalDomain, TryToBitvec},
analysis::interprocedural_fixpoint_generic::NodeValue, analysis::{
backward_interprocedural_fixpoint::Context as _,
interprocedural_fixpoint_generic::NodeValue,
},
intermediate_representation::{Arg, ByteSize, CallingConvention, ExternSymbol, Variable}, intermediate_representation::{Arg, ByteSize, CallingConvention, ExternSymbol, Variable},
}; };
use crate::{ use crate::{
...@@ -24,8 +27,11 @@ impl<'a> Context<'a> { ...@@ -24,8 +27,11 @@ impl<'a> Context<'a> {
if self.is_string_symbol(symbol) { if self.is_string_symbol(symbol) {
return self.taint_extern_string_symbol_parameters(state, symbol, call_source_node); return self.taint_extern_string_symbol_parameters(state, symbol, call_source_node);
} }
if self.is_user_input_symbol(symbol) {
return self.taint_user_input_symbol_parameters(state, symbol, call_source_node);
}
self.taint_other_extern_symbol_parameters(state, call_source_node, symbol) self.taint_other_extern_symbol_parameters(state, symbol, call_source_node)
} }
/// Checks whether the current symbol is a string symbol as defined in the symbol configuration. /// Checks whether the current symbol is a string symbol as defined in the symbol configuration.
...@@ -36,13 +42,141 @@ impl<'a> Context<'a> { ...@@ -36,13 +42,141 @@ impl<'a> Context<'a> {
.is_some() .is_some()
} }
/// Checks whether the current symbol is a user input symbol as defined in the symbol configuration.
pub fn is_user_input_symbol(&self, symbol: &ExternSymbol) -> bool {
self.symbol_maps
.user_input_symbol_map
.get(&symbol.tid)
.is_some()
}
/// In case of a *scanf* call, all taints are removed and a warning is generated, as the input can be arbitrary.
/// However, the format string is analysed to avoid false positives. (e.g. pure integer input
/// does not trigger a cwe warning)
/// In case of a *sscanf* call, the source string pointer parameter is tainted, if one of the tainted
/// return values is a string.
/// Since the format parameters of a (s)scanf call are also the return locations, the relevance of the
/// call to the analysis is checked after the parameters have been parsed.
/// If the parameter list is empty (no string parameters), the function call is of no relevance.
/// Furthermore, if the parameter list contains elements but none of them points to a tainted memory position,
/// the function call is of no relevance, too.
pub fn taint_user_input_symbol_parameters(
&self,
state: &State,
user_input_symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State {
let mut new_state = state.clone();
new_state
.remove_non_callee_saved_taint(user_input_symbol.get_calling_convention(self.project));
if let Some(NodeValue::Value(pi_state)) = self
.pointer_inference_results
.get_node_value(call_source_node)
{
let parameters = self.get_variable_number_parameters(pi_state, user_input_symbol);
if !parameters.is_empty() {
match user_input_symbol.name.as_str() {
"scanf" | "__isoc99_scanf" => {
self.process_scanf(call_source_node, &mut new_state, pi_state, parameters)
}
"sscanf" | "__isoc99_sscanf" => {
let source_string_register = user_input_symbol.parameters.get(0).unwrap();
self.process_sscanf(
&mut new_state,
pi_state,
parameters,
source_string_register,
)
}
_ => panic!("Invalid user input symbol."),
}
}
}
new_state
}
/// This function iterates over the scanf string parameters and generates a CWE warning
/// in case one of them points to a tainted memory position.
/// If the call is relevant, all taints are deleted since we cannot determine anymore,
/// where the whole input originates from.
pub fn process_scanf(
&self,
call_source_node: NodeIndex,
new_state: &mut State,
pi_state: &PointerInferenceState,
parameters: Vec<Arg>,
) {
for param in parameters.iter() {
if let Ok(address) = pi_state.eval_parameter_arg(
param,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if new_state.address_points_to_taint(address.clone(), pi_state) {
self.generate_cwe_warning(
&self
.get_graph()
.node_weight(call_source_node)
.unwrap()
.get_sub()
.term
.name,
);
new_state.remove_all_register_taints();
new_state.remove_all_memory_taints();
break;
}
}
}
}
/// This function iterates over the sscanf string parameters and taints the source string in case one
/// of the return parameters points to a tainted memory position.
/// Note that the return parameters and the format string input parameters are the same.
pub fn process_sscanf(
&self,
new_state: &mut State,
pi_state: &PointerInferenceState,
format_string_parameters: Vec<Arg>,
source_string_parameter: &Arg,
) {
let mut is_relevant = false;
for param in format_string_parameters.iter() {
if let Ok(address) = pi_state.eval_parameter_arg(
param,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
// Remove the tainted memory region if the return parameter points to it.
if new_state.address_points_to_taint(address.clone(), pi_state) {
is_relevant = true;
new_state.remove_mem_taint_at_target(&address);
}
}
}
if is_relevant {
if let Ok(address) = pi_state.eval_parameter_arg(
source_string_parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
new_state.save_taint_to_memory(
&address,
Taint::Tainted(self.project.stack_pointer_register.size),
);
}
}
}
/// Taints the parameters of a non string related extern symbol if it is relevant to the taint analysis. /// Taints the parameters of a non string related extern symbol if it is relevant to the taint analysis.
/// To determine whether the symbol is relevant, it is checked if either the arch's return registers are tainted /// To determine whether the symbol is relevant, it is checked if either the arch's return registers are tainted
pub fn taint_other_extern_symbol_parameters( pub fn taint_other_extern_symbol_parameters(
&self, &self,
state: &State, state: &State,
call_source_node: NodeIndex,
symbol: &ExternSymbol, symbol: &ExternSymbol,
call_source_node: NodeIndex,
) -> State { ) -> State {
let mut new_state = state.clone(); let mut new_state = state.clone();
// Check whether the return register is tainted before the call // Check whether the return register is tainted before the call
...@@ -107,11 +241,11 @@ impl<'a> Context<'a> { ...@@ -107,11 +241,11 @@ impl<'a> Context<'a> {
/// it is checked whether is points to a tainted memory address. /// it is checked whether is points to a tainted memory address.
pub fn is_relevant_string_function_call( pub fn is_relevant_string_function_call(
&self, &self,
string_symbol: &ExternSymbol, symbol: &ExternSymbol,
pi_state: &PointerInferenceState, pi_state: &PointerInferenceState,
state: &mut State, state: &mut State,
) -> bool { ) -> bool {
if let Some(param) = string_symbol.parameters.get(0) { if let Some(param) = symbol.parameters.get(0) {
self.first_param_points_to_memory_taint(pi_state, state, param) self.first_param_points_to_memory_taint(pi_state, state, param)
} else { } else {
panic!("Missing parameters for string related function!"); panic!("Missing parameters for string related function!");
......
use crate::checkers::cwe_476::Taint; use petgraph::graph::NodeIndex;
use crate::intermediate_representation::{ use crate::intermediate_representation::{
Arg, BinOpType, Bitvector, ByteSize, Expression, ExternSymbol, Tid, Variable, Arg, BinOpType, Bitvector, ByteSize, Expression, ExternSymbol, Tid, Variable,
}; };
...@@ -11,12 +12,19 @@ use crate::{ ...@@ -11,12 +12,19 @@ use crate::{
analysis::pointer_inference::{Data, PointerInference as PointerInferenceComputation}, analysis::pointer_inference::{Data, PointerInference as PointerInferenceComputation},
intermediate_representation::DatatypeProperties, intermediate_representation::DatatypeProperties,
}; };
use crate::{checkers::cwe_476::Taint, utils::log::CweWarning};
use super::super::tests::{bv, Setup}; use super::super::tests::{bv, Setup};
use super::Context; use super::Context;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
impl<'a> Context<'a> {
pub fn set_cwe_collector(&mut self, collector: crossbeam_channel::Sender<CweWarning>) {
self.cwe_collector = collector;
}
}
#[test] #[test]
fn tainting_generic_extern_symbol_parameters() { fn tainting_generic_extern_symbol_parameters() {
let mut setup = Setup::new(); let mut setup = Setup::new();
...@@ -42,7 +50,8 @@ fn tainting_generic_extern_symbol_parameters() { ...@@ -42,7 +50,8 @@ fn tainting_generic_extern_symbol_parameters() {
.set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size)); .set_register_taint(&r9_reg, Taint::Tainted(r9_reg.size));
let mut string_syms: HashMap<Tid, &ExternSymbol> = HashMap::new(); let mut string_syms: HashMap<Tid, &ExternSymbol> = HashMap::new();
string_syms.insert(Tid::new("sprintf"), &setup.string_sym); let string_sym = ExternSymbol::mock_string();
string_syms.insert(Tid::new("sprintf"), &string_sym);
let mut format_string_index: HashMap<String, usize> = HashMap::new(); let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("sprintf".to_string(), 1); format_string_index.insert("sprintf".to_string(), 1);
let context = Context::mock( let context = Context::mock(
...@@ -104,8 +113,6 @@ fn tainting_generic_extern_symbol_parameters() { ...@@ -104,8 +113,6 @@ fn tainting_generic_extern_symbol_parameters() {
assert_eq!(new_state.get_register_taint(&r9_reg), None); assert_eq!(new_state.get_register_taint(&r9_reg), None);
assert_eq!(new_state.get_register_taint(&rax_reg), None); assert_eq!(new_state.get_register_taint(&rax_reg), None);
assert_eq!(new_state.get_register_taint(&rsi_reg), None); assert_eq!(new_state.get_register_taint(&rsi_reg), None);
// TODO: add test for scanf when parameter detection is implemented
} }
#[test] #[test]
...@@ -147,8 +154,11 @@ fn tainting_extern_string_symbol_parameters() { ...@@ -147,8 +154,11 @@ fn tainting_extern_string_symbol_parameters() {
.get(&(Tid::new("call_string"), Tid::new("func"))) .get(&(Tid::new("call_string"), Tid::new("func")))
.unwrap(); .unwrap();
let new_state = let new_state = context.taint_extern_string_symbol_parameters(
context.taint_extern_string_symbol_parameters(&setup.state, &setup.string_sym, *node_id); &setup.state,
&ExternSymbol::mock_string(),
*node_id,
);
assert_eq!( assert_eq!(
new_state.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state), new_state.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state),
...@@ -167,6 +177,199 @@ fn tainting_extern_string_symbol_parameters() { ...@@ -167,6 +177,199 @@ fn tainting_extern_string_symbol_parameters() {
} }
#[test] #[test]
fn tainting_user_input_symbol_parameters() {
let mut setup = Setup::new();
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded::<CweWarning>();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let call_source_node: NodeIndex = graph.node_indices().next().unwrap();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut format_string_index: HashMap<String, usize> = HashMap::new();
format_string_index.insert("scanf".to_string(), 0);
let global_address = Bitvector::from_str_radix(16, "500c").unwrap();
let string_address =
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address));
let mut pi_result_state = pi_results
.get_node_value(call_source_node)
.unwrap()
.unwrap_value()
.clone();
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
pi_result_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
&Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(-8))),
&mem_image,
)
.expect("Failed to write to address.");
pi_result_state.set_register(&Variable::mock("RDI", 8 as u64), string_address);
pi_results.set_node_value(pi_result_state, call_source_node);
let mut context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
format_string_index,
&pi_results,
&mem_image,
);
context.set_cwe_collector(cwe_sender);
context.taint_source = Some(&setup.taint_source);
context.taint_source_name = Some("system".to_string());
context.taint_user_input_symbol_parameters(
&setup.state,
&ExternSymbol::mock_scanf(),
call_source_node,
);
assert!(!cwe_receiver.is_empty());
}
#[test]
fn processing_scanf() {
let mut setup = Setup::new();
let string_arg = Arg::Stack {
offset: 0,
size: ByteSize::new(8),
};
let (cwe_sender, cwe_receiver) = crossbeam_channel::unbounded::<CweWarning>();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let call_source_node: NodeIndex = graph.node_indices().next().unwrap();
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
context.set_cwe_collector(cwe_sender);
context.taint_source = Some(&setup.taint_source);
context.taint_source_name = Some("system".to_string());
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
&Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(-8))),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
context.process_scanf(
call_source_node,
&mut setup.state,
&setup.pi_state,
vec![string_arg],
);
assert!(!cwe_receiver.is_empty());
}
#[test]
fn processing_sscanf() {
let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", ByteSize::new(8));
let string_arg = Arg::Stack {
offset: 0,
size: ByteSize::new(8),
};
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let context = Context::mock(
&setup.project,
HashMap::new(),
HashMap::new(),
HashMap::new(),
&pi_results,
&mem_image,
);
setup
.pi_state
.set_register(&rdi_reg, setup.base_sixteen_offset.clone());
setup
.pi_state
.write_to_address(
&Expression::BinOp {
op: BinOpType::IntAdd,
lhs: Box::new(Expression::Var(Variable {
name: String::from("RSP"),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
&Data::Pointer(PointerDomain::new(setup.pi_state.stack_id.clone(), bv(-8))),
context.runtime_memory_image,
)
.expect("Failed to write to address.");
setup
.state
.save_taint_to_memory(&setup.base_eight_offset, Taint::Tainted(ByteSize::new(8)));
context.process_sscanf(
&mut setup.state,
&setup.pi_state,
vec![string_arg],
&Arg::Register(rdi_reg),
);
assert!(setup
.state
.address_points_to_taint(setup.base_sixteen_offset, &setup.pi_state));
assert!(!setup
.state
.address_points_to_taint(setup.base_eight_offset, &setup.pi_state));
}
#[test]
fn tainting_function_arguments() { fn tainting_function_arguments() {
let mut setup = Setup::new(); let mut setup = Setup::new();
let rdi_reg = Variable::mock("RDI", 8); let rdi_reg = Variable::mock("RDI", 8);
...@@ -248,6 +451,32 @@ fn test_is_string_symbol() { ...@@ -248,6 +451,32 @@ fn test_is_string_symbol() {
} }
#[test] #[test]
fn test_is_user_input_symbol() {
let setup = Setup::new();
let mem_image = RuntimeMemoryImage::mock();
let graph = crate::analysis::graph::get_program_cfg(&setup.project.program, HashSet::new());
let mut pi_results = PointerInferenceComputation::mock(&setup.project, &mem_image, &graph);
pi_results.compute();
let mut user_input_symbol_map: HashMap<Tid, &ExternSymbol> = HashMap::new();
let mut scanf_symbol = ExternSymbol::mock();
scanf_symbol.tid = Tid::new("scanf");
let mut memcpy_symbol = ExternSymbol::mock();
memcpy_symbol.tid = Tid::new("memcpy");
user_input_symbol_map.insert(Tid::new("scanf"), &scanf_symbol);
let context = Context::mock(
&setup.project,
HashMap::new(),
user_input_symbol_map,
HashMap::new(),
&pi_results,
&mem_image,
);
assert!(context.is_user_input_symbol(&scanf_symbol));
assert!(!context.is_user_input_symbol(&memcpy_symbol));
}
#[test]
fn test_get_return_registers_from_symbol() { fn test_get_return_registers_from_symbol() {
assert_eq!( assert_eq!(
vec!["RAX"], vec!["RAX"],
...@@ -381,7 +610,7 @@ fn test_map_format_specifier_to_bytesize() { ...@@ -381,7 +610,7 @@ fn test_map_format_specifier_to_bytesize() {
); );
assert_eq!( assert_eq!(
ByteSize::new(4), ByteSize::new(8),
context.map_format_specifier_to_bytesize("s".to_string()) context.map_format_specifier_to_bytesize("s".to_string())
); );
assert_eq!( assert_eq!(
...@@ -447,7 +676,7 @@ fn test_get_variable_number_parameters() { ...@@ -447,7 +676,7 @@ fn test_get_variable_number_parameters() {
output.push(Arg::Stack { output.push(Arg::Stack {
offset: 0, offset: 0,
size: ByteSize::new(4), size: ByteSize::new(8),
}); });
let global_address = Bitvector::from_str_radix(16, "500c").unwrap(); let global_address = Bitvector::from_str_radix(16, "500c").unwrap();
......
...@@ -38,12 +38,37 @@ impl ExternSymbol { ...@@ -38,12 +38,37 @@ impl ExternSymbol {
has_var_args: true, has_var_args: true,
} }
} }
pub fn mock_scanf() -> Self {
ExternSymbol {
tid: Tid::new("scanf"),
addresses: vec!["UNKNOWN".to_string()],
name: "scanf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI")],
return_values: vec![Arg::mock_register("RAX")],
no_return: false,
has_var_args: true,
}
}
pub fn mock_sscanf() -> Self {
ExternSymbol {
tid: Tid::new("sscanf"),
addresses: vec!["UNKNOWN".to_string()],
name: "sscanf".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI"), Arg::mock_register("RSI")],
return_values: vec![Arg::mock_register("RAX")],
no_return: false,
has_var_args: true,
}
}
} }
pub struct Setup { pub struct Setup {
pub project: Project, pub project: Project,
pub state: State, pub state: State,
pub pi_state: PointerInferenceState, pub pi_state: PointerInferenceState,
pub string_sym: ExternSymbol,
pub taint_source: Term<Jmp>, pub taint_source: Term<Jmp>,
pub base_eight_offset: DataDomain<ValueDomain>, pub base_eight_offset: DataDomain<ValueDomain>,
pub base_sixteen_offset: DataDomain<ValueDomain>, pub base_sixteen_offset: DataDomain<ValueDomain>,
...@@ -104,7 +129,6 @@ impl Setup { ...@@ -104,7 +129,6 @@ impl Setup {
project, project,
state, state,
pi_state, pi_state,
string_sym: ExternSymbol::mock_string(),
taint_source, taint_source,
base_eight_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))), base_eight_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-8))),
base_sixteen_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16))), base_sixteen_offset: Data::Pointer(PointerDomain::new(stack_id.clone(), bv(-16))),
...@@ -162,7 +186,7 @@ impl<'a> Context<'a> { ...@@ -162,7 +186,7 @@ impl<'a> Context<'a> {
let symbol_maps: SymbolMaps = SymbolMaps { let symbol_maps: SymbolMaps = SymbolMaps {
string_symbol_map: string_symbols, string_symbol_map: string_symbols,
_user_input_symbol_map: user_input_symbols, user_input_symbol_map: user_input_symbols,
extern_symbol_map, extern_symbol_map,
format_string_index, format_string_index,
}; };
......
...@@ -187,6 +187,16 @@ impl State { ...@@ -187,6 +187,16 @@ impl State {
self.register_taint.iter() self.register_taint.iter()
} }
/// Remove all memory taints
pub fn remove_all_memory_taints(&mut self) {
self.memory_taint = HashMap::new();
}
/// Remove all register taints
pub fn remove_all_register_taints(&mut self) {
self.register_taint = HashMap::new();
}
/// Gets the callee saved taints from the register taints. /// Gets the callee saved taints from the register taints.
pub fn get_callee_saved_register_taints( pub fn get_callee_saved_register_taints(
&self, &self,
......
...@@ -66,10 +66,6 @@ impl State { ...@@ -66,10 +66,6 @@ impl State {
None None
} }
pub fn remove_all_register_taints(&mut self) {
self.register_taint = HashMap::new();
}
} }
struct Setup { struct Setup {
......
...@@ -676,7 +676,7 @@ mod tests { ...@@ -676,7 +676,7 @@ mod tests {
long_double_size: ByteSize::new(8), long_double_size: ByteSize::new(8),
long_long_size: ByteSize::new(8), long_long_size: ByteSize::new(8),
long_size: ByteSize::new(4), long_size: ByteSize::new(4),
pointer_size: ByteSize::new(4), pointer_size: ByteSize::new(8),
short_size: ByteSize::new(2), short_size: ByteSize::new(2),
} }
} }
......
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::usize;
use super::{Expression, ExpressionType, RegisterProperties, Variable}; use super::{Expression, ExpressionType, RegisterProperties, Variable};
use crate::intermediate_representation::Arg as IrArg; use crate::intermediate_representation::Arg as IrArg;
...@@ -392,15 +393,139 @@ pub struct ExternSymbol { ...@@ -392,15 +393,139 @@ pub struct ExternSymbol {
pub has_var_args: bool, pub has_var_args: bool,
} }
impl From<ExternSymbol> for IrExternSymbol { impl ExternSymbol {
/// Artificially creates format string arguments as they are not detected by Ghidra.
/// For scanf calls, the format string parameter is added to the function signature.
/// For sscanf calls, the source and format string parameters are added to the function signature.
fn create_format_string_args_for_scanf_and_sscanf(
&mut self,
conventions: &[CallingConvention],
stack_pointer: &Variable,
cpu_arch: &str,
) {
let mut args: Vec<Arg> = Vec::new();
if cpu_arch == "x86_32" {
args.push(ExternSymbol::create_stack_arg(stack_pointer, 0));
if self.name == "sscanf" || self.name == "__isoc99_sscanf" {
args.push(ExternSymbol::create_stack_arg(
stack_pointer,
stack_pointer.size.as_bit_length(),
));
}
} else {
args.push(self.create_register_arg(0, conventions, stack_pointer));
if self.name == "sscanf" || self.name == "__isoc99_sscanf" {
args.push(self.create_register_arg(1, conventions, stack_pointer));
}
}
self.arguments.append(&mut args);
}
/// Matches the symbol's calling convention name and returns the desired integer parameter by index.
fn get_symbol_parameter_by_index(
&self,
conventions: &[CallingConvention],
index: usize,
) -> Option<String> {
if let Some(cconv) = self.calling_convention.clone() {
for convention in conventions.iter() {
if convention.name == cconv {
return Some(
convention
.integer_parameter_register
.get(index)
.unwrap()
.clone(),
);
}
}
}
None
}
/// Creates a stack argument for scanf or sscanf calls.
/// The address differs for both calls since the format string parameter is
/// at a different position.
fn create_stack_arg(stack_pointer: &Variable, address: usize) -> Arg {
Arg {
var: None,
location: Some(Expression {
mnemonic: ExpressionType::LOAD,
input0: Some(Variable {
name: None,
value: None,
address: Some(format!(
"{:0width$x}",
address,
width = stack_pointer.size.as_bit_length()
)),
size: stack_pointer.size,
is_virtual: false,
}),
input1: None,
input2: None,
}),
intent: ArgIntent::INPUT,
}
}
/// Creates a register argument for scanf and sscanf calls.
/// The format string index is different for each call.
fn create_register_arg(
&self,
index: usize,
conventions: &[CallingConvention],
stack_pointer: &Variable,
) -> Arg {
Arg {
var: Some(Variable {
name: self.get_symbol_parameter_by_index(conventions, index),
value: None,
address: None,
size: stack_pointer.size,
is_virtual: false,
}),
location: None,
intent: ArgIntent::INPUT,
}
}
/// Matches the symbols name with either scanf or sscanf.
fn is_scanf_or_sscanf(&self) -> bool {
matches!(
self.name.as_str(),
"scanf" | "sscanf" | "__isoc99_scanf" | "__isoc99_sscanf"
)
}
/// Convert an extern symbol parsed from Ghidra to the internally used IR. /// Convert an extern symbol parsed from Ghidra to the internally used IR.
fn from(symbol: ExternSymbol) -> IrExternSymbol { fn into_ir_symbol(
self,
conventions: &[CallingConvention],
stack_pointer: &Variable,
cpu_arch: &str,
) -> IrExternSymbol {
let mut symbol = self.clone();
let mut parameters = Vec::new(); let mut parameters = Vec::new();
let mut return_values = Vec::new(); let mut return_values = Vec::new();
for arg in symbol.arguments { let input_args: Vec<&Arg> = symbol
let ir_arg = if let Some(var) = arg.var { .arguments
.iter()
.filter(|arg| matches!(arg.intent, ArgIntent::INPUT))
.collect();
if symbol.is_scanf_or_sscanf() && input_args.is_empty() {
symbol.create_format_string_args_for_scanf_and_sscanf(
conventions,
stack_pointer,
cpu_arch,
);
}
for arg in symbol.arguments.iter() {
let ir_arg = if let Some(var) = arg.var.clone() {
IrArg::Register(var.into()) IrArg::Register(var.into())
} else if let Some(expr) = arg.location { } else if let Some(expr) = arg.location.clone() {
if expr.mnemonic == ExpressionType::LOAD { if expr.mnemonic == ExpressionType::LOAD {
IrArg::Stack { IrArg::Stack {
offset: i64::from_str_radix( offset: i64::from_str_radix(
...@@ -427,14 +552,14 @@ impl From<ExternSymbol> for IrExternSymbol { ...@@ -427,14 +552,14 @@ impl From<ExternSymbol> for IrExternSymbol {
} }
} }
IrExternSymbol { IrExternSymbol {
tid: symbol.tid, tid: self.tid,
addresses: symbol.addresses, addresses: self.addresses,
name: symbol.name, name: self.name,
calling_convention: symbol.calling_convention, calling_convention: self.calling_convention,
parameters, parameters,
return_values, return_values,
no_return: symbol.no_return, no_return: self.no_return,
has_var_args: symbol.has_var_args, has_var_args: self.has_var_args,
} }
} }
} }
...@@ -466,17 +591,19 @@ impl Program { ...@@ -466,17 +591,19 @@ impl Program {
pub fn into_ir_program( pub fn into_ir_program(
self, self,
binary_base_address: u64, binary_base_address: u64,
generic_pointer_size: ByteSize, conventions: &[CallingConvention],
stack_pointer: &Variable,
cpu_arch: &str,
) -> IrProgram { ) -> IrProgram {
let subs = self let subs = self
.subs .subs
.into_iter() .into_iter()
.map(|sub| sub.into_ir_sub_term(generic_pointer_size)) .map(|sub| sub.into_ir_sub_term(stack_pointer.size))
.collect(); .collect();
let extern_symbols = self let extern_symbols = self
.extern_symbols .extern_symbols
.into_iter() .into_iter()
.map(|symbol| symbol.into()) .map(|symbol| symbol.into_ir_symbol(conventions, stack_pointer, cpu_arch))
.collect(); .collect();
let address_base_offset = let address_base_offset =
u64::from_str_radix(&self.image_base, 16).unwrap() - binary_base_address; u64::from_str_radix(&self.image_base, 16).unwrap() - binary_base_address;
...@@ -544,10 +671,12 @@ impl Project { ...@@ -544,10 +671,12 @@ impl Project {
pub fn into_ir_project(self, binary_base_address: u64) -> IrProject { pub fn into_ir_project(self, binary_base_address: u64) -> IrProject {
let mut program: Term<IrProgram> = Term { let mut program: Term<IrProgram> = Term {
tid: self.program.tid, tid: self.program.tid,
term: self term: self.program.term.into_ir_program(
.program binary_base_address,
.term &self.register_calling_convention,
.into_ir_program(binary_base_address, self.stack_pointer_register.size), &self.stack_pointer_register,
&self.cpu_architecture,
),
}; };
let register_map: HashMap<&String, &RegisterProperties> = self let register_map: HashMap<&String, &RegisterProperties> = self
.register_properties .register_properties
...@@ -659,11 +788,12 @@ impl Project { ...@@ -659,11 +788,12 @@ impl Project {
stack_pointer_register: self.stack_pointer_register.into(), stack_pointer_register: self.stack_pointer_register.into(),
calling_conventions: self calling_conventions: self
.register_calling_convention .register_calling_convention
.clone()
.into_iter() .into_iter()
.map(|cconv| cconv.into()) .map(|cconv| cconv.into())
.collect(), .collect(),
register_list, register_list,
datatype_properties: self.datatype_properties, datatype_properties: self.datatype_properties.clone(),
} }
} }
} }
......
...@@ -560,6 +560,7 @@ fn sub_deserialization() { ...@@ -560,6 +560,7 @@ fn sub_deserialization() {
#[test] #[test]
fn extern_symbol_deserialization() { fn extern_symbol_deserialization() {
let setup = Setup::new();
let symbol: ExternSymbol = serde_json::from_str( let symbol: ExternSymbol = serde_json::from_str(
r#" r#"
{ {
...@@ -599,11 +600,16 @@ fn extern_symbol_deserialization() { ...@@ -599,11 +600,16 @@ fn extern_symbol_deserialization() {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrExternSymbol = symbol.into(); let _: IrExternSymbol = symbol.into_ir_symbol(
&setup.project.register_calling_convention,
&setup.project.stack_pointer_register,
&setup.project.cpu_architecture,
);
} }
#[test] #[test]
fn program_deserialization() { fn program_deserialization() {
let setup = Setup::new();
let program_term: Term<Program> = serde_json::from_str( let program_term: Term<Program> = serde_json::from_str(
r#" r#"
{ {
...@@ -621,7 +627,12 @@ fn program_deserialization() { ...@@ -621,7 +627,12 @@ fn program_deserialization() {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrProgram = program_term.term.into_ir_program(10000, ByteSize::new(8)); let _: IrProgram = program_term.term.into_ir_program(
10000,
&setup.project.register_calling_convention,
&setup.project.stack_pointer_register,
&setup.project.cpu_architecture,
);
} }
#[test] #[test]
......
...@@ -325,6 +325,7 @@ pub mod tests { ...@@ -325,6 +325,7 @@ pub mod tests {
write_flag: false, write_flag: false,
execute_flag: false, execute_flag: false,
}, },
// Contains strings: '/dev/sd%c%d' and 'cat %s'
MemorySegment { MemorySegment {
bytes: [ bytes: [
0x2f, 0x64, 0x65, 0x76, 0x2f, 0x73, 0x64, 0x25, 0x63, 0x25, 0x64, 0x00, 0x2f, 0x64, 0x65, 0x76, 0x2f, 0x73, 0x64, 0x25, 0x63, 0x25, 0x64, 0x00,
......
...@@ -46,11 +46,11 @@ public class PcodeExtractor extends GhidraScript { ...@@ -46,11 +46,11 @@ public class PcodeExtractor extends GhidraScript {
SimpleBlockModel simpleBM = new SimpleBlockModel(currentProgram); SimpleBlockModel simpleBM = new SimpleBlockModel(currentProgram);
Listing listing = currentProgram.getListing(); Listing listing = currentProgram.getListing();
TermCreator.symTab = currentProgram.getSymbolTable();
ExternSymbolCreator.createExternalSymbolMap(TermCreator.symTab);
setFunctionEntryPoints(); setFunctionEntryPoints();
TermCreator.symTab = currentProgram.getSymbolTable();
Term<Program> program = TermCreator.createProgramTerm(); Term<Program> program = TermCreator.createProgramTerm();
Project project = createProject(program); Project project = createProject(program);
ExternSymbolCreator.createExternalSymbolMap(TermCreator.symTab);
program = iterateFunctions(simpleBM, listing, program); program = iterateFunctions(simpleBM, listing, program);
program.getTerm().setExternSymbols(new ArrayList<ExternSymbol>(ExternSymbolCreator.externalSymbolMap.values())); program.getTerm().setExternSymbols(new ArrayList<ExternSymbol>(ExternSymbolCreator.externalSymbolMap.values()));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment