Unverified Commit 4b1e5bc5 by Enkelmann Committed by GitHub

Project struct refactoring (#246)

parent 785736f3
FROM rust:1.53 AS builder
FROM rust:1.54 AS builder
WORKDIR /cwe_checker
......
......@@ -44,7 +44,7 @@ If you want to build the docker image yourself, just run `docker build -t cwe_ch
### Local installation ###
The following dependencies must be installed in order to build and install the *cwe_checker* locally:
- [Rust](https://www.rust-lang.org) >= 1.53
- [Rust](https://www.rust-lang.org) >= 1.54
- [Ghidra](https://ghidra-sre.org/) >= 9.2
Run `make all GHIDRA_PATH=/path/to/ghidra_folder` (with the correct path to the local Ghidra installation inserted) to compile and install the cwe_checker.
......
use super::{create_computation, mock_context, NodeValue};
use crate::intermediate_representation::*;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::iter::FromIterator;
use mock_context::Context;
use mock_context::StartEnd;
......@@ -127,9 +129,9 @@ fn mock_program() -> Term<Program> {
let program = Term {
tid: Tid::new("program"),
term: Program {
subs: vec![sub1, sub2],
subs: BTreeMap::from_iter([(sub1.tid.clone(), sub1), (sub2.tid.clone(), sub2)]),
extern_symbols: BTreeMap::new(),
entry_points: Vec::new(),
entry_points: BTreeSet::new(),
address_base_offset: 0,
},
};
......@@ -146,8 +148,8 @@ fn backward_fixpoint() {
size: ByteSize::new(8),
is_temp: false,
},
calling_conventions: Vec::new(),
register_list: Vec::new(),
calling_conventions: BTreeMap::new(),
register_set: BTreeSet::new(),
datatype_properties: DatatypeProperties::mock(),
};
......
use std::collections::HashSet;
use crate::analysis::graph::Graph;
use crate::intermediate_representation::*;
use std::collections::BTreeSet;
/// Given the variables that are alive after execution of the given `Def` term,
/// modify the set of variables to the ones that are alive before the execution of the `Def` term.
pub fn update_alive_vars_by_def(alive_variables: &mut HashSet<Variable>, def: &Term<Def>) {
pub fn update_alive_vars_by_def(alive_variables: &mut BTreeSet<Variable>, def: &Term<Def>) {
match &def.term {
Def::Assign { var, value } => {
if alive_variables.contains(var) {
......@@ -44,23 +43,22 @@ pub struct Context<'a> {
/// This is the set of registers that are assumed to be alive at call/return instructions
/// and all other places in the control flow graph,
/// where the next instruction to be executed may not be known.
pub all_physical_registers: HashSet<Variable>,
pub all_physical_registers: &'a BTreeSet<Variable>,
}
impl<'a> Context<'a> {
/// Create a new context object for the given project and reversed control flow graph.
pub fn new(project: &'a Project, graph: &'a Graph) -> Context<'a> {
let all_physical_registers = project.register_list.iter().cloned().collect();
Context {
graph,
all_physical_registers,
all_physical_registers: &project.register_set,
}
}
}
impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Context<'a> {
/// The value at each node is the set of variables that are known to be alive.
type Value = HashSet<Variable>;
type Value = BTreeSet<Variable>;
/// Get the reversed control flow graph on which the fixpoint computation operates.
fn get_graph(&self) -> &Graph<'a> {
......
......@@ -5,7 +5,7 @@ use crate::analysis::backward_interprocedural_fixpoint::create_computation;
use crate::analysis::graph::Node;
use crate::analysis::interprocedural_fixpoint_generic::NodeValue;
use crate::intermediate_representation::*;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap};
mod alive_vars_computation;
use alive_vars_computation::*;
......@@ -13,7 +13,7 @@ use alive_vars_computation::*;
/// Compute alive variables by means of an intraprocedural fixpoint computation.
/// Returns a map that assigns to each basic block `Tid` the set of all variables
/// that are alive at the end of the basic block.
pub fn compute_alive_vars(project: &Project) -> HashMap<Tid, HashSet<Variable>> {
pub fn compute_alive_vars(project: &Project) -> HashMap<Tid, BTreeSet<Variable>> {
let extern_subs = project
.program
.term
......@@ -59,18 +59,18 @@ pub fn compute_alive_vars(project: &Project) -> HashMap<Tid, HashSet<Variable>>
}
computation.set_node_value(node, NodeValue::Value(alive_vars));
} else {
computation.set_node_value(node, NodeValue::Value(HashSet::new()))
computation.set_node_value(node, NodeValue::Value(BTreeSet::new()))
}
}
Node::CallReturn { .. } => {
computation.set_node_value(node, NodeValue::Value(HashSet::new()));
computation.set_node_value(node, NodeValue::Value(BTreeSet::new()));
}
Node::CallSource { .. } => {
computation.set_node_value(
node,
NodeValue::CallFlowCombinator {
call_stub: Some(HashSet::new()),
interprocedural_flow: Some(HashSet::new()),
call_stub: Some(BTreeSet::new()),
interprocedural_flow: Some(BTreeSet::new()),
},
);
}
......@@ -100,7 +100,7 @@ pub fn compute_alive_vars(project: &Project) -> HashMap<Tid, HashSet<Variable>>
/// An assignment is considered dead if the register is not read before its value is overwritten by another assignment.
fn remove_dead_var_assignments_of_block(
block: &mut Term<Blk>,
alive_vars_map: &HashMap<Tid, HashSet<Variable>>,
alive_vars_map: &HashMap<Tid, BTreeSet<Variable>>,
) {
let mut alive_vars = alive_vars_map.get(&block.tid).unwrap().clone();
let mut cleaned_defs = Vec::new();
......@@ -117,7 +117,7 @@ fn remove_dead_var_assignments_of_block(
/// Remove all dead assignments from all basic blocks in the given `project`.
pub fn remove_dead_var_assignments(project: &mut Project) {
let alive_vars_map = compute_alive_vars(project);
for sub in project.program.term.subs.iter_mut() {
for sub in project.program.term.subs.values_mut() {
for block in sub.term.blocks.iter_mut() {
remove_dead_var_assignments_of_block(block, &alive_vars_map);
}
......@@ -163,7 +163,7 @@ mod tests {
},
};
let mut project = Project::mock_empty();
project.program.term.subs.push(sub);
project.program.term.subs.insert(sub.tid.clone(), sub);
remove_dead_var_assignments(&mut project);
let cleaned_defs = vec![
......@@ -173,7 +173,9 @@ mod tests {
def_assign_term(5, "C", "RBX"),
];
assert_eq!(
&project.program.term.subs[0].term.blocks[0].term.defs,
&project.program.term.subs[&Tid::new("sub")].term.blocks[0]
.term
.defs,
&cleaned_defs
);
}
......
......@@ -222,7 +222,7 @@ impl<'a> GraphBuilder<'a> {
/// i.e. for blocks contained in more than one function the extra nodes have to be added separately later.
/// The `sub` a block is associated with is the `sub` that the block is contained in in the `program` struct.
fn add_program_blocks(&mut self) {
let subs = self.program.term.subs.iter();
let subs = self.program.term.subs.values();
for sub in subs {
for block in sub.term.blocks.iter() {
self.add_block(block, sub);
......@@ -232,7 +232,7 @@ impl<'a> GraphBuilder<'a> {
/// add all subs to the call targets so that call instructions can be linked to the starting block of the corresponding sub.
fn add_subs_to_call_targets(&mut self) {
for sub in self.program.term.subs.iter() {
for sub in self.program.term.subs.values() {
if !sub.term.blocks.is_empty() {
let start_block = &sub.term.blocks[0];
let target_index = self.jump_targets[&(start_block.tid.clone(), sub.tid.clone())];
......@@ -497,7 +497,8 @@ pub fn get_program_cfg(program: &Term<Program>, extern_subs: HashSet<Tid>) -> Gr
#[cfg(test)]
mod tests {
use super::*;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::iter::FromIterator;
fn mock_program() -> Term<Program> {
let call_term = Term {
......@@ -577,9 +578,9 @@ mod tests {
let program = Term {
tid: Tid::new("program"),
term: Program {
subs: vec![sub1, sub2],
subs: BTreeMap::from_iter([(sub1.tid.clone(), sub1), (sub2.tid.clone(), sub2)]),
extern_symbols: BTreeMap::new(),
entry_points: Vec::new(),
entry_points: BTreeSet::new(),
address_base_offset: 0,
},
};
......@@ -619,7 +620,7 @@ mod tests {
},
};
let mut program = Program::mock_empty();
program.subs.push(sub_term);
program.subs.insert(sub_term.tid.clone(), sub_term);
let program_term = Term {
tid: Tid::new("program".to_string()),
term: program,
......
......@@ -134,39 +134,23 @@ impl<'a> Context<'a> {
"malloc" => {
let size_parameter = extern_symbol.parameters.get(0).unwrap();
state
.eval_parameter_arg(
size_parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
.eval_parameter_arg(size_parameter, self.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize))
}
"realloc" => {
let size_parameter = extern_symbol.parameters.get(1).unwrap();
state
.eval_parameter_arg(
size_parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
.eval_parameter_arg(size_parameter, self.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize))
}
"calloc" => {
let size_param1 = extern_symbol.parameters.get(0).unwrap();
let size_param2 = extern_symbol.parameters.get(1).unwrap();
let param1_value = state
.eval_parameter_arg(
size_param1,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
.eval_parameter_arg(size_param1, self.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize));
let param2_value = state
.eval_parameter_arg(
size_param2,
&self.project.stack_pointer_register,
self.runtime_memory_image,
)
.eval_parameter_arg(size_param2, self.runtime_memory_image)
.unwrap_or_else(|_| Data::new_top(address_bytesize));
param1_value.bin_op(BinOpType::IntMult, &param2_value)
}
......@@ -238,11 +222,8 @@ impl<'a> Context<'a> {
) -> State {
match extern_symbol.get_unique_parameter() {
Ok(parameter) => {
let parameter_value = state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
);
let parameter_value =
state.eval_parameter_arg(parameter, self.runtime_memory_image);
match parameter_value {
Ok(memory_object_pointer) => {
if let Err(possible_double_frees) =
......@@ -290,11 +271,7 @@ impl<'a> Context<'a> {
extern_symbol: &ExternSymbol,
) {
for parameter in extern_symbol.parameters.iter() {
match state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
match state.eval_parameter_arg(parameter, self.runtime_memory_image) {
Ok(value) => {
if state.memory.is_dangling_pointer(&value, true) {
state
......@@ -336,11 +313,7 @@ impl<'a> Context<'a> {
extern_symbol: &ExternSymbol,
) {
for parameter in extern_symbol.parameters.iter() {
match state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
match state.eval_parameter_arg(parameter, self.runtime_memory_image) {
Ok(data) => {
if state.pointer_contains_out_of_bounds_target(&data, self.runtime_memory_image)
{
......@@ -417,35 +390,26 @@ impl<'a> Context<'a> {
extern_symbol: &ExternSymbol,
) -> State {
self.log_debug(
new_state.clear_stack_parameter(
extern_symbol,
&self.project.stack_pointer_register,
self.runtime_memory_image,
),
new_state.clear_stack_parameter(extern_symbol, self.runtime_memory_image),
Some(&call.tid),
);
let calling_conv = extern_symbol.get_calling_convention(self.project);
let calling_conv = self.project.get_calling_convention(extern_symbol);
let mut possible_referenced_ids = BTreeSet::new();
if extern_symbol.parameters.is_empty() && extern_symbol.return_values.is_empty() {
// We assume here that we do not know the parameters and approximate them by all possible parameter registers.
// This approximation is wrong if the function is known but has neither parameters nor return values.
// We cannot distinguish these two cases yet.
for parameter_register_name in calling_conv
.integer_parameter_register
.iter()
.chain(calling_conv.float_parameter_register.iter())
{
if let Some(register_value) = state.get_register_by_name(parameter_register_name) {
possible_referenced_ids.extend(register_value.referenced_ids().cloned());
}
for parameter_register in calling_conv.integer_parameter_register.iter() {
let register_value = state.get_register(parameter_register);
possible_referenced_ids.extend(register_value.referenced_ids().cloned());
}
for float_parameter_expression in calling_conv.float_parameter_register.iter() {
let register_value = state.eval(float_parameter_expression);
possible_referenced_ids.extend(register_value.referenced_ids().cloned());
}
} else {
for parameter in extern_symbol.parameters.iter() {
if let Ok(data) = state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(data) = state.eval_parameter_arg(parameter, self.runtime_memory_image) {
possible_referenced_ids.extend(data.referenced_ids().cloned());
}
}
......@@ -475,16 +439,13 @@ impl<'a> Context<'a> {
self.adjust_stack_register_on_extern_call(state_before_call, &mut new_state);
let mut possible_referenced_ids = BTreeSet::new();
for parameter_register_name in calling_conv
.integer_parameter_register
.iter()
.chain(calling_conv.float_parameter_register.iter())
{
if let Some(register_value) =
state_before_call.get_register_by_name(parameter_register_name)
{
possible_referenced_ids.extend(register_value.referenced_ids().cloned());
}
for parameter_register in calling_conv.integer_parameter_register.iter() {
let register_value = state_before_call.get_register(parameter_register);
possible_referenced_ids.extend(register_value.referenced_ids().cloned());
}
for float_parameter_expression in calling_conv.float_parameter_register.iter() {
let register_value = state_before_call.eval(float_parameter_expression);
possible_referenced_ids.extend(register_value.referenced_ids().cloned());
}
possible_referenced_ids =
state_before_call.add_recursively_referenced_ids_to_id_set(possible_referenced_ids);
......
use crate::intermediate_representation::DatatypeProperties;
use super::*;
use std::collections::HashSet;
use std::{collections::HashSet, iter::FromIterator};
fn bv(value: i64) -> ValueDomain {
ValueDomain::from(Bitvector::from_i64(value))
......@@ -16,7 +16,7 @@ fn new_id(time: &str, reg_name: &str) -> AbstractIdentifier {
fn mock_extern_symbol(name: &str) -> (Tid, ExternSymbol) {
let arg = Arg::Register {
var: register("RDX"),
expr: Expression::Var(register("RDX")),
data_type: None,
};
let tid = Tid::new("extern_".to_string() + name);
......@@ -76,7 +76,7 @@ fn return_term(target_name: &str) -> Term<Jmp> {
fn mock_project() -> (Project, Config) {
let program = Program {
subs: Vec::new(),
subs: BTreeMap::new(),
extern_symbols: vec![
mock_extern_symbol("malloc"),
mock_extern_symbol("free"),
......@@ -84,7 +84,7 @@ fn mock_project() -> (Project, Config) {
]
.into_iter()
.collect(),
entry_points: Vec::new(),
entry_points: BTreeSet::new(),
address_base_offset: 0,
};
let program_term = Term {
......@@ -92,13 +92,14 @@ fn mock_project() -> (Project, Config) {
term: program,
};
let cconv = CallingConvention {
name: "default".to_string(),
integer_parameter_register: vec!["RDX".to_string()],
float_parameter_register: vec!["XMM0".to_string()],
return_register: vec!["RDX".to_string()],
callee_saved_register: vec!["callee_saved_reg".to_string()],
name: "__cdecl".to_string(),
integer_parameter_register: vec![Variable::mock("RDX", 8)],
float_parameter_register: vec![Expression::Var(Variable::mock("XMMO", 16))],
integer_return_register: vec![Variable::mock("RDX", 8)],
float_return_register: vec![],
callee_saved_register: vec![Variable::mock("callee_saved_reg", 8)],
};
let register_list = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
let register_set = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
.into_iter()
.map(|name| Variable::mock(name, ByteSize::new(8)))
.collect();
......@@ -107,8 +108,8 @@ fn mock_project() -> (Project, Config) {
program: program_term,
cpu_architecture: "x86_64".to_string(),
stack_pointer_register: register("RSP"),
calling_conventions: vec![cconv],
register_list,
calling_conventions: BTreeMap::from_iter([(cconv.name.clone(), cconv)]),
register_set,
datatype_properties: DatatypeProperties::mock(),
},
Config {
......
......@@ -337,7 +337,7 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
);
}
// Clear non-callee-saved registers from the state.
let cconv = extern_symbol.get_calling_convention(self.project);
let cconv = self.project.get_calling_convention(extern_symbol);
new_state.clear_non_callee_saved_register(&cconv.callee_saved_register[..]);
// Adjust stack register value (for x86 architecture).
self.adjust_stack_register_on_extern_call(state, &mut new_state);
......
......@@ -107,15 +107,8 @@ impl<'a> PointerInference<'a> {
);
let mut entry_sub_to_entry_blocks_map = HashMap::new();
let subs: HashMap<Tid, &Term<Sub>> = project
.program
.term
.subs
.iter()
.map(|sub| (sub.tid.clone(), sub))
.collect();
for sub_tid in project.program.term.entry_points.iter() {
if let Some(sub) = subs.get(sub_tid) {
if let Some(sub) = project.program.term.subs.get(sub_tid) {
if let Some(entry_block) = sub.term.blocks.get(0) {
entry_sub_to_entry_blocks_map.insert(sub_tid, entry_block.tid.clone());
}
......@@ -266,7 +259,7 @@ impl<'a> PointerInference<'a> {
) {
// TODO: Refactor the fixpoint computation structs, so that the project reference can be extracted from them.
let mut start_block_to_sub_map: HashMap<&Tid, &Term<Sub>> = HashMap::new();
for sub in project.program.term.subs.iter() {
for sub in project.program.term.subs.values() {
if project.program.term.extern_symbols.contains_key(&sub.tid) {
continue; // We ignore functions marked as extern symbols.
}
......
......@@ -14,19 +14,6 @@ impl State {
}
}
/// Get the value of a register by its name.
///
/// Returns None if no value is set for the register.
pub fn get_register_by_name(&self, reg_name: &str) -> Option<Data> {
self.register.iter().find_map(|(key, value)| {
if key.name == reg_name {
Some(value.clone())
} else {
None
}
})
}
/// Set the value of a register.
pub fn set_register(&mut self, variable: &Variable, value: Data) {
if !value.is_top() {
......@@ -237,16 +224,11 @@ impl State {
pub fn eval_parameter_arg(
&self,
parameter: &Arg,
stack_pointer: &Variable,
global_memory: &RuntimeMemoryImage,
) -> Result<Data, Error> {
match parameter {
Arg::Register { var, .. } => Ok(self.eval(&Expression::Var(var.clone()))),
Arg::Stack { offset, size, .. } => self.load_value(
&Expression::Var(stack_pointer.clone()).plus_const(*offset),
*size,
global_memory,
),
Arg::Register { expr, .. } => Ok(self.eval(expr)),
Arg::Stack { address, size, .. } => self.load_value(address, *size, global_memory),
}
}
......
......@@ -74,18 +74,13 @@ impl State {
pub fn new_with_generic_parameter_objects(
stack_register: &Variable,
function_tid: Tid,
params: &[String],
params: &[Variable],
) -> State {
let mut state = State::new(stack_register, function_tid.clone());
for param_name in params {
let param = Variable {
name: param_name.clone(),
size: stack_register.size,
is_temp: false,
};
for param in params {
let param_id = AbstractIdentifier::new(
function_tid.clone(),
AbstractLocation::from_var(&param).unwrap(),
AbstractLocation::from_var(param).unwrap(),
);
state.memory.add_abstract_object(
param_id.clone(),
......@@ -94,11 +89,8 @@ impl State {
stack_register.size,
);
state.set_register(
&param,
DataDomain::from_target(
param_id,
Bitvector::zero(stack_register.size.into()).into(),
),
param,
DataDomain::from_target(param_id, Bitvector::zero(param.size.into()).into()),
)
}
state
......@@ -135,18 +127,15 @@ impl State {
/// Clear all non-callee-saved registers from the state.
/// This automatically also removes all virtual registers.
/// The parameter is a list of callee-saved register names.
pub fn clear_non_callee_saved_register(&mut self, callee_saved_register_names: &[String]) {
let register = self
.register
pub fn clear_non_callee_saved_register(&mut self, callee_saved_register: &[Variable]) {
let register = callee_saved_register
.iter()
.filter_map(|(register, value)| {
if callee_saved_register_names
.iter()
.any(|reg_name| **reg_name == register.name)
{
Some((register.clone(), value.clone()))
} else {
.filter_map(|var| {
let value = self.get_register(var);
if value.is_top() {
None
} else {
Some((var.clone(), value))
}
})
.collect();
......@@ -158,20 +147,15 @@ impl State {
pub fn clear_stack_parameter(
&mut self,
extern_call: &ExternSymbol,
stack_pointer_register: &Variable,
global_memory: &RuntimeMemoryImage,
) -> Result<(), Error> {
let mut result_log = Ok(());
for arg in &extern_call.parameters {
match arg {
Arg::Register { .. } => (),
Arg::Stack { offset, size, .. } => {
Arg::Stack { address, size, .. } => {
let data_top = Data::new_top(*size);
let location_expression =
Expression::Var(stack_pointer_register.clone()).plus_const(*offset);
if let Err(err) =
self.write_to_address(&location_expression, &data_top, global_memory)
{
if let Err(err) = self.write_to_address(address, &data_top, global_memory) {
result_log = Err(err);
}
}
......@@ -268,32 +252,19 @@ impl State {
cconv: &CallingConvention,
stack_register: &Variable,
) {
for (register, value) in caller_state.register.iter() {
if register != stack_register
&& cconv
.callee_saved_register
.iter()
.any(|reg_name| *reg_name == register.name)
{
self.set_register(register, value.clone());
}
for register in cconv
.callee_saved_register
.iter()
.filter(|reg| *reg != stack_register)
{
self.set_register(register, caller_state.get_register(register));
}
}
/// Remove all knowledge about the contents of callee-saved registers from the state.
pub fn remove_callee_saved_register(&mut self, cconv: &CallingConvention) {
let mut register_to_remove = Vec::new();
for register in self.register.keys() {
if cconv
.callee_saved_register
.iter()
.any(|reg_name| *reg_name == register.name)
{
register_to_remove.push(register.clone());
}
}
for register in register_to_remove {
self.register.remove(&register);
for register in &cconv.callee_saved_register {
self.register.remove(register);
}
}
}
......
......@@ -273,7 +273,7 @@ fn clear_parameters_on_the_stack_on_extern_calls() {
.unwrap();
// create an extern symbol which uses the value on the stack as a parameter
let stack_param = Arg::Stack {
offset: 8,
address: reg_add("RSP", 8),
size: ByteSize::new(8),
data_type: None,
};
......@@ -295,7 +295,7 @@ fn clear_parameters_on_the_stack_on_extern_calls() {
);
// clear stack parameter
state
.clear_stack_parameter(&extern_symbol, &register("RSP"), &global_memory)
.clear_stack_parameter(&extern_symbol, &global_memory)
.unwrap();
// check the value after
assert_eq!(
......@@ -1120,13 +1120,10 @@ fn test_check_def_for_null_dereferences() {
#[test]
fn test_new_with_generic_parameter_objects() {
let param_names = vec!["param1".to_string(), "param2".to_string()];
let state = State::new_with_generic_parameter_objects(
&register("RSP"),
Tid::new("func_tid"),
&param_names,
);
let params = vec![Variable::mock("param1", 8), Variable::mock("param2", 8)];
let state =
State::new_with_generic_parameter_objects(&register("RSP"), Tid::new("func_tid"), &params);
assert_eq!(state.memory.get_num_objects(), 3);
assert!(state.get_register_by_name("param1").is_some());
assert!(state.get_register_by_name("param2").is_some());
assert!(!state.get_register(&Variable::mock("param1", 8)).is_top());
assert!(!state.get_register(&Variable::mock("param1", 8)).is_top());
}
......@@ -39,10 +39,7 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
if let Some(standard_cconv) = self.project.get_standard_calling_convention() {
let mut filtered_map = state.get_variable_to_pointer_map().clone();
for (register, _) in state.get_variable_to_pointer_map().clone().iter() {
if !standard_cconv
.callee_saved_register
.contains(&register.name)
{
if !standard_cconv.callee_saved_register.contains(register) {
filtered_map.remove(register);
}
}
......@@ -220,11 +217,9 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
if let Some(dest_arg) = extern_symbol.parameters.first() {
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Ok(pointer) = pi_state.eval_parameter_arg(
dest_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(pointer) =
pi_state.eval_parameter_arg(dest_arg, self.runtime_memory_image)
{
let heap_to_string_map = state.get_heap_to_string_map();
for (target, _) in pointer.get_relative_values().iter() {
if heap_to_string_map.contains_key(target) {
......
......@@ -43,11 +43,9 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
pi_state: &PointerInferenceState,
) -> Result<DataDomain<IntervalDomain>, Error> {
if let Some(return_arg) = extern_symbol.parameters.first() {
if let Ok(return_data) = pi_state.eval_parameter_arg(
return_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(return_data) =
pi_state.eval_parameter_arg(return_arg, self.runtime_memory_image)
{
if !return_data.get_relative_values().is_empty() {
return Ok(return_data);
}
......@@ -64,11 +62,7 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
pi_state: &PointerInferenceState,
) -> Result<DataDomain<IntervalDomain>, Error> {
if let Some(input_arg) = extern_symbol.parameters.get(1) {
return pi_state.eval_parameter_arg(
input_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
);
return pi_state.eval_parameter_arg(input_arg, self.runtime_memory_image);
}
Err(anyhow!("No input values"))
......@@ -384,14 +378,12 @@ mod tests {
let return_targets = setup
.pi_state_before_symbol_call
.get_register_by_name("r0")
.unwrap();
.get_register(&Variable::mock("r0", 4));
let input_target: DataDomain<IntervalDomain> = DataDomain::from(
setup
.pi_state_before_symbol_call
.get_register_by_name("r1")
.unwrap()
.get_register(&Variable::mock("r1", 4))
.get_absolute_value()
.unwrap()
.clone(),
......@@ -430,16 +422,14 @@ mod tests {
let return_targets = setup
.pi_state_before_symbol_call
.get_register_by_name("r0")
.unwrap()
.get_register(&Variable::mock("r0", 4))
.get_relative_values()
.clone();
let input_target: DataDomain<IntervalDomain> = DataDomain::from(
setup
.pi_state_before_symbol_call
.get_register_by_name("r1")
.unwrap()
.get_register(&Variable::mock("r1", 4))
.get_absolute_value()
.unwrap()
.clone(),
......
......@@ -47,11 +47,8 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
) {
for (argument, value) in arg_to_value_map.into_iter() {
if argument.get_data_type().unwrap() == Datatype::Pointer {
if let Ok(data) = pi_state.eval_parameter_arg(
&argument,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(data) = pi_state.eval_parameter_arg(&argument, self.runtime_memory_image)
{
if !data.get_relative_values().is_empty() {
Context::add_constant_or_top_value_to_return_locations(
state, pi_state, data, value,
......@@ -94,11 +91,9 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
let mut new_state = state.clone();
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Some(source_string_arg) = extern_symbol.parameters.first() {
if let Ok(source_string) = pi_state.eval_parameter_arg(
source_string_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(source_string) =
pi_state.eval_parameter_arg(source_string_arg, self.runtime_memory_image)
{
if self.source_string_mapped_to_return_locations(
pi_state,
&mut new_state,
......@@ -202,7 +197,7 @@ mod tests {
use crate::abstract_domain::{AbstractIdentifier, AbstractLocation, CharacterInclusionDomain};
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::analysis::string_abstraction::tests::mock_project_with_intraprocedural_control_flow;
use crate::intermediate_representation::Variable;
use crate::intermediate_representation::{Expression, Variable};
use crate::utils::binary::RuntimeMemoryImage;
use super::super::tests::*;
......@@ -316,11 +311,11 @@ mod tests {
let mut arg_to_value_map: HashMap<Arg, Option<String>> = HashMap::new();
let register_arg = Arg::Register {
var: r2_reg.clone(),
expr: Expression::Var(r2_reg.clone()),
data_type: Some(Datatype::Pointer),
};
let stack_arg = Arg::Stack {
offset: 0,
address: Expression::Var(Variable::mock("sp", 4)),
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
};
......@@ -393,11 +388,11 @@ mod tests {
let mut arg_to_value_map: HashMap<Arg, Option<String>> = HashMap::new();
let register_arg = Arg::Register {
var: r1_reg.clone(),
expr: Expression::Var(r1_reg.clone()),
data_type: Some(Datatype::Pointer),
};
let stack_arg = Arg::Stack {
offset: 0,
address: Expression::Var(Variable::mock("sp", 4)),
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
};
......@@ -761,7 +756,7 @@ mod tests {
),
(
Arg::Stack {
offset: 0,
address: Expression::Var(Variable::mock("sp", 4)),
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
},
......@@ -769,7 +764,7 @@ mod tests {
),
(
Arg::Stack {
offset: 4,
address: Expression::Var(Variable::mock("sp", 4)).plus_const(4),
size: ByteSize::new(4),
data_type: Some(Datatype::Pointer),
},
......
......@@ -24,11 +24,9 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
let mut new_state = state.clone();
if let Some(return_arg) = extern_symbol.parameters.first() {
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Ok(return_pointer) = pi_state.eval_parameter_arg(
return_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(return_pointer) =
pi_state.eval_parameter_arg(return_arg, self.runtime_memory_image)
{
if !return_pointer.get_relative_values().is_empty() {
let format_string_index = self
.format_string_index_map
......@@ -65,7 +63,6 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
pi_state,
extern_symbol,
format_string_index,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
let returned_abstract_domain = self.create_string_domain_for_sprintf_snprintf(
......@@ -261,11 +258,7 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
pi_state: &PointerInferenceState,
state: &State<T>,
) -> T {
if let Ok(data) = pi_state.eval_parameter_arg(
arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(data) = pi_state.eval_parameter_arg(arg, self.runtime_memory_image) {
let constant_domain: Option<T> = self.fetch_constant_domain_if_available(&data, arg);
if let Some(generated_domain) = Context::<T>::fetch_subdomains_if_available(
&data,
......
......@@ -3,7 +3,7 @@ use std::collections::{BTreeSet, HashSet};
use super::*;
use crate::abstract_domain::{AbstractIdentifier, AbstractLocation};
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::intermediate_representation::{Bitvector, Tid, Variable};
use crate::intermediate_representation::{Bitvector, Expression, Tid, Variable};
use crate::{
abstract_domain::{CharacterInclusionDomain, CharacterSet},
analysis::string_abstraction::{
......@@ -183,15 +183,15 @@ fn test_create_string_domain_using_data_type_approximations() {
fn test_create_string_domain_using_constants_and_sub_domains() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let string_arg = Arg::Register {
var: Variable::mock("r6", 4),
expr: Expression::Var(Variable::mock("r6", 4)),
data_type: Some(Datatype::Pointer),
};
let integer_arg = Arg::Register {
var: Variable::mock("r7", 4),
expr: Expression::Var(Variable::mock("r7", 4)),
data_type: Some(Datatype::Integer),
};
let char_arg = Arg::Register {
var: Variable::mock("r8", 4),
expr: Expression::Var(Variable::mock("r8", 4)),
data_type: Some(Datatype::Char),
};
......@@ -352,15 +352,15 @@ fn test_no_specifiers() {
fn test_fetch_constant_and_domain_for_format_specifier() {
let sprintf_symbol = ExternSymbol::mock_sprintf_symbol_arm();
let string_arg = Arg::Register {
var: Variable::mock("r6", 4),
expr: Expression::Var(Variable::mock("r6", 4)),
data_type: Some(Datatype::Pointer),
};
let integer_arg = Arg::Register {
var: Variable::mock("r7", 4),
expr: Expression::Var(Variable::mock("r7", 4)),
data_type: Some(Datatype::Integer),
};
let char_arg = Arg::Register {
var: Variable::mock("r8", 4),
expr: Expression::Var(Variable::mock("r8", 4)),
data_type: Some(Datatype::Char),
};
......
......@@ -16,11 +16,9 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
let mut new_state = state.clone();
if let Some(pi_state) = state.get_pointer_inference_state() {
if let Some(return_arg) = extern_symbol.parameters.first() {
if let Ok(return_pointer) = pi_state.eval_parameter_arg(
return_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(return_pointer) =
pi_state.eval_parameter_arg(return_arg, self.runtime_memory_image)
{
if !return_pointer.get_relative_values().is_empty() {
let target_domain =
Context::<T>::merge_domains_from_multiple_pointer_targets(
......@@ -65,11 +63,9 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> Conte
) -> T {
let mut input_domain = T::create_top_value_domain();
if let Some(input_arg) = extern_symbol.parameters.get(1) {
if let Ok(input_value) = pi_state.eval_parameter_arg(
input_arg,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(input_value) =
pi_state.eval_parameter_arg(input_arg, self.runtime_memory_image)
{
// Check whether the second input string is in read only memory or on stack/heap.
if !input_value.get_relative_values().is_empty() {
input_domain = Context::<T>::merge_domains_from_multiple_pointer_targets(
......
......@@ -42,7 +42,8 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String> + Debu
.program
.term
.subs
.get(0)
.values()
.next()
.unwrap()
.term
.blocks
......@@ -64,7 +65,15 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String> + Debu
);
let state_before_call: State<T> = State::mock_with_given_pi_state(
pi_context.project.program.term.subs.get(0).unwrap().clone(),
pi_context
.project
.program
.term
.subs
.values()
.next()
.unwrap()
.clone(),
pi_state.clone(),
);
......
......@@ -62,7 +62,7 @@ impl<'a, T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>>
);
let mut sub_to_entry_blocks_map = HashMap::new();
for sub in project.program.term.subs.iter() {
for sub in project.program.term.subs.values() {
if let Some(entry_block) = sub.term.blocks.get(0) {
sub_to_entry_blocks_map.insert(sub.tid.clone(), entry_block.tid.clone());
}
......
......@@ -611,10 +611,10 @@ impl<T: AbstractDomain + DomainInsertion + HasTop + Eq + From<String>> State<T>
project: &Project,
extern_symbol: &ExternSymbol,
) {
let cconv = extern_symbol.get_calling_convention(project);
let cconv = project.get_calling_convention(extern_symbol);
let mut filtered_map = self.variable_to_pointer_map.clone();
for (register, _) in self.variable_to_pointer_map.clone().iter() {
if !cconv.callee_saved_register.contains(&register.name) {
if !cconv.callee_saved_register.contains(register) {
if let Some(pointer) = filtered_map.remove(register) {
self.unassigned_return_pointer.insert(pointer);
}
......
......@@ -527,7 +527,7 @@ fn test_remove_non_callee_saved_pointer_entries_for_external_symbol() {
);
let mut mock_state = State::<CharacterInclusionDomain>::mock_with_default_pi_state(
project.program.term.subs.get(0).unwrap().clone(),
project.program.term.subs.values().next().unwrap().clone(),
);
let top_domain = DataDomain::new_empty(ByteSize::new(4));
......
use crate::intermediate_representation::*;
use std::{collections::BTreeMap, iter::FromIterator};
pub struct Setup;
......@@ -494,14 +495,15 @@ impl CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register: ["r0", "r1", "r2", "r3"]
.iter()
.map(|s| s.to_string())
.map(|s| Variable::mock(s, 4))
.collect(),
float_parameter_register: ["s0", "s1", "s2", "s3"]
.iter()
.map(|s| s.to_string())
.map(|s| Expression::Var(Variable::mock(s, 4)))
.collect(),
return_register: vec!["r0".to_string()],
callee_saved_register: vec!["r11".to_string()],
integer_return_register: vec![Variable::mock("r0", 4)],
float_return_register: vec![],
callee_saved_register: vec![Variable::mock("r11", 4)],
}
}
}
......@@ -590,11 +592,9 @@ pub fn mock_project_with_intraprocedural_control_flow(
sub_name: &str,
) -> Project {
let mut program = Program::mock_empty();
let mocked_sub = mock_sub_with_name_and_symbol_calls(sub_name, symbol_call_config);
program.subs.push(mock_sub_with_name_and_symbol_calls(
sub_name,
symbol_call_config,
));
program.subs.insert(mocked_sub.tid.clone(), mocked_sub);
let memcpy = ExternSymbol::mock_memcpy_symbol_arm();
program.extern_symbols.insert(memcpy.tid.clone(), memcpy);
let sprintf = ExternSymbol::mock_sprintf_symbol_arm();
......@@ -609,12 +609,13 @@ pub fn mock_project_with_intraprocedural_control_flow(
program.extern_symbols.insert(free.tid.clone(), free);
let malloc = ExternSymbol::mock_malloc_symbol_arm();
program.extern_symbols.insert(malloc.tid.clone(), malloc);
program.entry_points.push(Tid::new(sub_name));
program.entry_points.insert(Tid::new(sub_name));
let register_list = ["r0", "r1", "r2", "r3", "r11", "sp"]
let register_set = ["r0", "r1", "r2", "r3", "r11", "sp"]
.iter()
.map(|name| Variable::mock(name, ByteSize::new(4)))
.collect();
let cconv = CallingConvention::mock_standard_arm_32();
Project {
program: Term {
......@@ -623,8 +624,8 @@ pub fn mock_project_with_intraprocedural_control_flow(
},
cpu_architecture: "arm_32".to_string(),
stack_pointer_register: Variable::mock("sp", 4u64),
calling_conventions: vec![CallingConvention::mock_standard_arm_32()],
register_list,
calling_conventions: BTreeMap::from_iter([(cconv.name.clone(), cconv)]),
register_set,
datatype_properties: DatatypeProperties::mock_standard_arm_32(),
}
}
......@@ -31,7 +31,6 @@ use crate::analysis::interprocedural_fixpoint_generic::NodeValue;
use crate::analysis::pointer_inference::PointerInference;
use crate::intermediate_representation::ExternSymbol;
use crate::intermediate_representation::Jmp;
use crate::intermediate_representation::Variable;
use crate::prelude::*;
use crate::utils::binary::RuntimeMemoryImage;
use crate::utils::log::CweWarning;
......@@ -93,7 +92,6 @@ pub fn check_cwe(
&format_string_index,
pointer_inference_results,
analysis_results.runtime_memory_image,
&project.stack_pointer_register,
);
if matches!(
......@@ -121,18 +119,15 @@ fn locate_format_string(
format_string_index: &HashMap<String, usize>,
pointer_inference_results: &PointerInference,
runtime_memory_image: &RuntimeMemoryImage,
stack_pointer: &Variable,
) -> StringLocation {
if let Some(NodeValue::Value(pi_state)) = pointer_inference_results.get_node_value(*node) {
let format_string_parameter = symbol
.parameters
.get(*format_string_index.get(&symbol.name).unwrap())
.unwrap();
if let Ok(address) = pi_state.eval_parameter_arg(
format_string_parameter,
stack_pointer,
runtime_memory_image,
) {
if let Ok(address) =
pi_state.eval_parameter_arg(format_string_parameter, runtime_memory_image)
{
if let Ok(address_vector) = address.try_to_bitvec() {
if runtime_memory_image.is_global_memory_address(&address_vector) {
if runtime_memory_image
......@@ -184,7 +179,7 @@ pub mod tests {
use std::collections::HashSet;
use crate::analysis::pointer_inference::PointerInference as PointerInferenceComputation;
use crate::intermediate_representation::{Blk, Def, Expression, Jmp, Project, Sub};
use crate::intermediate_representation::*;
use super::*;
......@@ -212,8 +207,8 @@ pub mod tests {
block1.term.jmps.push(jump);
sub.term.blocks.push(block1);
sub.term.blocks.push(block2);
project.program.term.subs.push(sub);
project.program.term.entry_points.push(Tid::new("func"));
project.program.term.subs.insert(sub.tid.clone(), sub);
project.program.term.entry_points.insert(Tid::new("func"));
project
}
......@@ -221,7 +216,6 @@ pub mod tests {
#[test]
fn test_locate_format_string() {
let sprintf_symbol = ExternSymbol::mock_string();
let stack_pointer = Variable::mock("RSP", ByteSize::new(8));
let runtime_memory_image = RuntimeMemoryImage::mock();
let project = mock_project();
let graph = crate::analysis::graph::get_program_cfg(&project.program, HashSet::new());
......@@ -246,7 +240,6 @@ pub mod tests {
&format_string_index,
&pi_results,
&runtime_memory_image,
&stack_pointer
),
StringLocation::GlobalReadable
);
......
......@@ -108,7 +108,7 @@ pub fn check_cwe(
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let mut cwe_warnings = Vec::new();
let symbol_map = get_symbol_map(project, &config.symbols);
for sub in project.program.term.subs.iter() {
for sub in project.program.term.subs.values() {
for (block, jump, symbol) in get_callsites(sub, &symbol_map) {
if block_contains_multiplication(block) {
cwe_warnings.push(generate_cwe_warning(&jump.tid, symbol));
......
......@@ -88,7 +88,7 @@ pub fn check_cwe(
system_symbol.insert(tid, name);
}
if !system_symbol.is_empty() && !privilege_changing_symbols.is_empty() {
for sub in project.program.term.subs.iter() {
for sub in project.program.term.subs.values() {
if !get_calls_to_symbols(sub, &system_symbol).is_empty()
&& !get_calls_to_symbols(sub, &privilege_changing_symbols).is_empty()
{
......
......@@ -79,9 +79,7 @@ fn check_for_pointer_sized_arg(
let pointer_size = project.stack_pointer_register.size;
let state = compute_block_end_state(project, global_memory, block);
for parameter in symbol.parameters.iter() {
if let Ok(param) =
state.eval_parameter_arg(parameter, &project.stack_pointer_register, global_memory)
{
if let Ok(param) = state.eval_parameter_arg(parameter, global_memory) {
if let Ok(param_value) = param.try_to_bitvec() {
if Ok(u64::from(pointer_size)) == param_value.try_to_u64() {
return true;
......@@ -120,7 +118,7 @@ pub fn check_cwe(
let mut cwe_warnings = Vec::new();
let symbol_map = get_symbol_map(project, &config.symbols);
for sub in project.program.term.subs.iter() {
for sub in project.program.term.subs.values() {
for (block, jmp, symbol) in get_callsites(sub, &symbol_map) {
if check_for_pointer_sized_arg(
project,
......
......@@ -111,11 +111,7 @@ pub fn check_cwe(
let mut computation = create_computation(context, None);
computation.set_node_value(
node,
NodeValue::Value(State::new(
symbol,
&project.stack_pointer_register,
pi_state_at_taint_source.as_ref(),
)),
NodeValue::Value(State::new(symbol, pi_state_at_taint_source.as_ref())),
);
computation.compute_with_max_steps(100);
}
......
......@@ -178,8 +178,8 @@ impl<'a> Context<'a> {
) -> bool {
// First check for taint directly in parameter registers (we don't need a pointer inference state for that)
for parameter in extern_symbol.parameters.iter() {
if let Arg::Register { var, .. } = parameter {
if state.eval(&Expression::Var(var.clone())).is_tainted() {
if let Arg::Register { expr, .. } = parameter {
if state.eval(expr).is_tainted() {
return true;
}
}
......@@ -190,28 +190,22 @@ impl<'a> Context<'a> {
// Check stack parameters and collect referenced memory object that need to be checked for taint.
for parameter in extern_symbol.parameters.iter() {
match parameter {
Arg::Register { var, .. } => {
let data = pi_state.eval(&Expression::Var(var.clone()));
Arg::Register { expr, .. } => {
let data = pi_state.eval(expr);
if state.check_if_address_points_to_taint(data, pi_state) {
return true;
}
}
Arg::Stack { offset, size, .. } => {
let stack_address = pi_state.eval(
&Expression::Var(self.project.stack_pointer_register.clone())
.plus_const(*offset),
);
Arg::Stack { address, size, .. } => {
if state
.load_taint_from_memory(&stack_address, *size)
.load_taint_from_memory(&pi_state.eval(address), *size)
.is_tainted()
{
return true;
}
if let Ok(stack_param) = pi_state.eval_parameter_arg(
parameter,
&self.project.stack_pointer_register,
self.runtime_memory_image,
) {
if let Ok(stack_param) =
pi_state.eval_parameter_arg(parameter, self.runtime_memory_image)
{
if state.check_if_address_points_to_taint(stack_param, pi_state) {
return true;
}
......@@ -293,7 +287,7 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
}
let mut new_state = state.clone();
new_state.remove_non_callee_saved_taint(
extern_symbol.get_calling_convention(self.project),
self.project.get_calling_convention(extern_symbol),
);
Some(new_state)
} else {
......
......@@ -80,11 +80,7 @@ impl AbstractDomain for State {
impl State {
/// Get a new state in which only the return values of the given extern symbol are tainted.
pub fn new(
taint_source: &ExternSymbol,
stack_pointer_register: &Variable,
pi_state: Option<&PointerInferenceState>,
) -> State {
pub fn new(taint_source: &ExternSymbol, pi_state: Option<&PointerInferenceState>) -> State {
let mut state = State {
register_taint: HashMap::new(),
memory_taint: HashMap::new(),
......@@ -92,16 +88,16 @@ impl State {
};
for return_arg in taint_source.return_values.iter() {
match return_arg {
Arg::Register { var, .. } => {
state
.register_taint
.insert(var.clone(), Taint::Tainted(var.size));
Arg::Register { expr, .. } => {
for var in expr.input_vars() {
state
.register_taint
.insert(var.clone(), Taint::Tainted(var.size));
}
}
Arg::Stack { offset, size, .. } => {
Arg::Stack { address, size, .. } => {
if let Some(pi_state) = pi_state {
let address_exp =
Expression::Var(stack_pointer_register.clone()).plus_const(*offset);
let address = pi_state.eval(&address_exp);
let address = pi_state.eval(address);
state.save_taint_to_memory(&address, Taint::Tainted(*size));
}
}
......@@ -255,26 +251,23 @@ impl State {
/// Return `true` if taint was found and `false` if no taint was found.
fn check_register_list_for_taint(
&self,
register_list: &[String],
register_list: &[Variable],
pi_state_option: Option<&PointerInferenceState>,
) -> bool {
// Check whether a register contains taint
for (register, taint) in &self.register_taint {
if register_list
.iter()
.any(|reg_name| *reg_name == register.name)
&& !taint.is_top()
{
return true;
for register in register_list {
if let Some(taint) = self.register_taint.get(register) {
if !taint.is_top() {
return true;
}
}
}
// Check whether some memory object referenced by a register may contain taint
if let Some(pi_state) = pi_state_option {
for register_name in register_list {
if let Some(register_value) = pi_state.get_register_by_name(register_name) {
if self.check_if_address_points_to_taint(register_value, pi_state) {
return true;
}
for register in register_list {
let register_value = pi_state.get_register(register);
if self.check_if_address_points_to_taint(register_value, pi_state) {
return true;
}
}
}
......@@ -291,7 +284,11 @@ impl State {
) -> bool {
if let Some(calling_conv) = project.get_standard_calling_convention() {
let mut all_parameters = calling_conv.integer_parameter_register.clone();
all_parameters.append(&mut calling_conv.float_parameter_register.clone());
for float_param in calling_conv.float_parameter_register.iter() {
for var in float_param.input_vars() {
all_parameters.push(var.clone());
}
}
self.check_register_list_for_taint(&all_parameters, pi_state_option)
} else {
// No standard calling convention found. Assume everything may be parameters or referenced by parameters.
......@@ -308,7 +305,10 @@ impl State {
pi_state_option: Option<&PointerInferenceState>,
) -> bool {
if let Some(calling_conv) = project.get_standard_calling_convention() {
self.check_register_list_for_taint(&calling_conv.return_register[..], pi_state_option)
self.check_register_list_for_taint(
&calling_conv.integer_return_register[..],
pi_state_option,
)
} else {
// No standard calling convention found. Assume everything may be return values or referenced by return values.
!self.is_empty()
......@@ -324,7 +324,7 @@ impl State {
if calling_conv
.callee_saved_register
.iter()
.any(|callee_saved_reg| register.name == *callee_saved_reg)
.any(|callee_saved_reg| register == callee_saved_reg)
{
Some((register.clone(), *taint))
} else {
......@@ -389,11 +389,11 @@ mod tests {
pub fn mock_with_pi_state() -> (State, PointerInferenceState) {
let arg1 = Arg::Register {
var: register("RAX"),
expr: Expression::Var(register("RAX")),
data_type: None,
};
let arg2 = Arg::Stack {
offset: 0,
address: Expression::Var(register("RSP")),
size: ByteSize::new(8),
data_type: None,
};
......@@ -408,7 +408,7 @@ mod tests {
no_return: false,
has_var_args: false,
};
let state = State::new(&symbol, &register("RSP"), Some(&pi_state));
let state = State::new(&symbol, Some(&pi_state));
(state, pi_state)
}
}
......
......@@ -70,8 +70,7 @@ fn get_umask_permission_arg(
}
let parameter = umask_symbol.get_unique_parameter()?;
let param_value =
state.eval_parameter_arg(parameter, &project.stack_pointer_register, global_memory)?;
let param_value = state.eval_parameter_arg(parameter, global_memory)?;
if let Ok(umask_arg) = param_value.try_to_bitvec() {
Ok(umask_arg.try_to_u64()?)
} else {
......@@ -114,7 +113,7 @@ pub fn check_cwe(
let mut log_messages = Vec::new();
let umask_symbol_map = get_symbol_map(project, &["umask".to_string()]);
if !umask_symbol_map.is_empty() {
for sub in project.program.term.subs.iter() {
for sub in project.program.term.subs.values() {
for (block, jmp, umask_symbol) in get_callsites(sub, &umask_symbol_map) {
match get_umask_permission_arg(
block,
......
......@@ -47,11 +47,11 @@ pub struct Config {
/// For each subroutine and each found dangerous symbol, check for calls to the corresponding symbol
pub fn get_calls<'a>(
subfunctions: &'a [Term<Sub>],
subfunctions: &'a BTreeMap<Tid, Term<Sub>>,
dangerous_symbols: &'a HashMap<&'a Tid, &'a str>,
) -> Vec<(&'a str, &'a Tid, &'a str)> {
let mut calls: Vec<(&str, &Tid, &str)> = Vec::new();
for sub in subfunctions.iter() {
for sub in subfunctions.values() {
calls.append(&mut get_calls_to_symbols(sub, dangerous_symbols));
}
......@@ -114,7 +114,7 @@ pub fn check_cwe(
let project = analysis_results.project;
let config: Config = serde_json::from_value(cwe_params.clone()).unwrap();
let prog: &Term<Program> = &project.program;
let subfunctions: &Vec<Term<Sub>> = &prog.term.subs;
let subfunctions = &prog.term.subs;
let external_symbols: &BTreeMap<Tid, ExternSymbol> = &prog.term.extern_symbols;
let dangerous_symbols = resolve_symbols(external_symbols, &config.symbols);
let dangerous_calls = get_calls(subfunctions, &dangerous_symbols);
......
......@@ -42,6 +42,7 @@ use crate::analysis::pointer_inference::State as PointerInferenceState;
use crate::analysis::string_abstraction::context::Context;
use crate::analysis::string_abstraction::state::State;
use crate::intermediate_representation::Arg;
use crate::intermediate_representation::Expression;
use crate::intermediate_representation::ExternSymbol;
use crate::intermediate_representation::Jmp;
use crate::intermediate_representation::Sub;
......@@ -151,7 +152,11 @@ pub fn check_system_call_parameter(
runtime_memory_image: &RuntimeMemoryImage,
) {
let sub = source_state.get_current_sub().unwrap();
if let Some(Arg::Register { var, .. }) = system_symbol.parameters.get(0) {
if let Some(Arg::Register {
expr: Expression::Var(var),
..
}) = system_symbol.parameters.get(0)
{
if let Some(value) = source_state.get_variable_to_pointer_map().get(var) {
let contains_string_constant = value.get_absolute_value().is_some();
let contains_relative_string_pointer = !value.get_relative_values().is_empty();
......
......@@ -79,7 +79,7 @@ pub fn check_cwe(
let symbol: &HashMap<&Tid, &str> = &[(tid, name)].iter().cloned().collect();
prog.term
.subs
.iter()
.values()
.for_each(|sub| warnings.append(&mut handle_sub(sub, symbol)));
}
warnings.sort();
......
......@@ -272,7 +272,10 @@ impl Expression {
/// This function recursively iterates into the expression and checks whether a sub register was used.
/// If so, the sub register is turned into a SUBPIECE of the corresponding base register.
fn replace_input_sub_register(&mut self, register_map: &HashMap<&String, &RegisterProperties>) {
pub fn replace_input_sub_register(
&mut self,
register_map: &HashMap<&String, &RegisterProperties>,
) {
match self {
Expression::BinOp { lhs, rhs, .. } => {
lhs.replace_input_sub_register(register_map);
......
......@@ -88,6 +88,9 @@ impl Expression {
///
/// The bytesize of the value is automatically adjusted to the bytesize of the given expression.
pub fn plus_const(self, value: i64) -> Expression {
if value == 0 {
return self;
}
let bytesize = self.bytesize();
let mut value = Bitvector::from_i64(value);
match u64::from(bytesize) {
......
use super::{Blk, ExternSymbol, Sub};
use crate::prelude::*;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
/// The `Program` structure represents a disassembled binary.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct Program {
/// The known functions contained in the binary
pub subs: Vec<Term<Sub>>,
pub subs: BTreeMap<Tid, Term<Sub>>,
/// Extern symbols linked to the binary by the linker.
pub extern_symbols: BTreeMap<Tid, ExternSymbol>,
/// Entry points into to binary,
/// i.e. the term identifiers of functions that may be called from outside of the binary.
pub entry_points: Vec<Tid>,
pub entry_points: BTreeSet<Tid>,
/// An offset that has been added to all addresses in the program compared to the addresses
/// as specified in the binary file.
///
......@@ -29,7 +29,7 @@ impl Program {
pub fn find_block(&self, tid: &Tid) -> Option<&Term<Blk>> {
self.subs
.iter()
.map(|sub| sub.term.blocks.iter())
.map(|(_, sub)| sub.term.blocks.iter())
.flatten()
.find(|block| block.tid == *tid)
}
......@@ -42,9 +42,9 @@ mod tests {
impl Program {
pub fn mock_empty() -> Program {
Program {
subs: Vec::new(),
subs: BTreeMap::new(),
extern_symbols: BTreeMap::new(),
entry_points: Vec::new(),
entry_points: BTreeSet::new(),
address_base_offset: 0,
}
}
......
use super::{Blk, CallingConvention, DatatypeProperties, Def, Jmp, Program, Sub, Variable};
use crate::prelude::*;
use super::*;
use crate::utils::log::LogMessage;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
mod block_duplication_normalization;
use block_duplication_normalization::*;
......@@ -19,10 +18,10 @@ pub struct Project {
/// The stack pointer register for the given CPU architecture.
pub stack_pointer_register: Variable,
/// The known calling conventions that may be used for calls to extern functions.
pub calling_conventions: Vec<CallingConvention>,
/// A list of all known physical registers for the CPU architecture.
pub calling_conventions: BTreeMap<String, CallingConvention>,
/// The set of all known physical registers for the CPU architecture.
/// Does only contain base registers, i.e. sub registers of other registers are not contained.
pub register_list: Vec<Variable>,
pub register_set: BTreeSet<Variable>,
/// Contains the properties of C data types. (e.g. size)
pub datatype_properties: DatatypeProperties,
}
......@@ -36,8 +35,21 @@ impl Project {
/// Try to guess a standard calling convention from the list of calling conventions in the project.
pub fn get_standard_calling_convention(&self) -> Option<&CallingConvention> {
self.calling_conventions
.iter()
.find(|cconv| cconv.name == "__stdcall" || cconv.name == "__cdecl")
.get("__stdcall")
.or_else(|| self.calling_conventions.get("__cdecl"))
}
/// Return the calling convention associated to the given extern symbol.
/// If the extern symbol has no annotated calling convention
/// then return the standard calling convention of the project instead.
///
/// This function panics if no suitable calling convention is found.
pub fn get_calling_convention(&self, extern_symbol: &ExternSymbol) -> &CallingConvention {
if let Some(cconv_name) = &extern_symbol.calling_convention {
self.calling_conventions.get(cconv_name).unwrap()
} else {
self.get_standard_calling_convention().unwrap()
}
}
}
......@@ -45,7 +57,7 @@ impl Project {
/// For all expressions contained in the project,
/// replace trivially computable subexpressions like `a XOR a` with their result.
fn substitute_trivial_expressions(&mut self) {
for sub in self.program.term.subs.iter_mut() {
for sub in self.program.term.subs.values_mut() {
for block in sub.term.blocks.iter_mut() {
for def in block.term.defs.iter_mut() {
match &mut def.term {
......@@ -83,7 +95,7 @@ impl Project {
fn remove_references_to_nonexisting_tids(&mut self) -> Vec<LogMessage> {
// Gather all existing jump targets
let mut jump_target_tids = HashSet::new();
for sub in self.program.term.subs.iter() {
for sub in self.program.term.subs.values() {
jump_target_tids.insert(sub.tid.clone());
for block in sub.term.blocks.iter() {
jump_target_tids.insert(block.tid.clone());
......@@ -96,7 +108,7 @@ impl Project {
let dummy_sub_tid = Tid::new("Artificial Sink Sub");
let dummy_blk_tid = Tid::new("Artificial Sink Block");
let mut log_messages = Vec::new();
for sub in self.program.term.subs.iter_mut() {
for sub in self.program.term.subs.values_mut() {
for block in sub.term.blocks.iter_mut() {
if let Err(mut logs) =
block.remove_nonexisting_indirect_jump_targets(&jump_target_tids)
......@@ -130,7 +142,10 @@ impl Project {
}],
},
};
self.program.term.subs.push(dummy_sub);
self.program
.term
.subs
.insert(dummy_sub.tid.clone(), dummy_sub);
}
log_messages
}
......@@ -140,7 +155,7 @@ impl Project {
/// The propagation only occurs inside basic blocks
/// but not across basic block boundaries.
fn propagate_input_expressions(&mut self) {
for sub in self.program.term.subs.iter_mut() {
for sub in self.program.term.subs.values_mut() {
for block in sub.term.blocks.iter_mut() {
block.merge_def_assignments_to_same_var();
block.propagate_input_expressions();
......@@ -221,7 +236,7 @@ mod tests {
impl Project {
pub fn mock_empty() -> Project {
let register_list = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
let register_set = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
.into_iter()
.map(|name| Variable::mock(name, ByteSize::new(8)))
.collect();
......@@ -232,8 +247,8 @@ mod tests {
},
cpu_architecture: "x86_64".to_string(),
stack_pointer_register: Variable::mock("RSP", 8u64),
calling_conventions: Vec::new(),
register_list,
calling_conventions: BTreeMap::new(),
register_set,
datatype_properties: DatatypeProperties::mock(),
}
}
......
......@@ -38,7 +38,7 @@ impl Project {
/// to the `Sub` TID in which the term is contained.
fn generate_tid_to_sub_tid_map(&self) -> HashMap<Tid, Tid> {
let mut tid_to_sub_map = HashMap::new();
for sub in self.program.term.subs.iter() {
for sub in self.program.term.subs.values() {
tid_to_sub_map.insert(sub.tid.clone(), sub.tid.clone());
for block in sub.term.blocks.iter() {
tid_to_sub_map.insert(block.tid.clone(), sub.tid.clone());
......@@ -56,7 +56,7 @@ impl Project {
/// Generate a map mapping all block TIDs to the corresponding block.
fn generate_block_tid_to_block_term_map(&self) -> HashMap<Tid, &Term<Blk>> {
let mut tid_to_block_map = HashMap::new();
for sub in self.program.term.subs.iter() {
for sub in self.program.term.subs.values() {
for block in sub.term.blocks.iter() {
tid_to_block_map.insert(block.tid.clone(), block);
}
......@@ -72,7 +72,7 @@ impl Project {
block_tid_to_block_map: &HashMap<Tid, &Term<Blk>>,
) -> HashMap<Tid, HashSet<Tid>> {
let mut sub_to_blocks_map = HashMap::new();
for sub in self.program.term.subs.iter() {
for sub in self.program.term.subs.values() {
let mut worklist: Vec<Tid> =
sub.term.blocks.iter().map(|blk| blk.tid.clone()).collect();
let mut block_set = HashSet::new();
......@@ -120,7 +120,7 @@ impl Project {
) -> HashMap<Tid, Vec<Term<Blk>>> {
// Generate new blocks without adjusting jump TIDs
let mut sub_to_additional_blocks_map = HashMap::new();
for sub in self.program.term.subs.iter() {
for sub in self.program.term.subs.values() {
let tid_suffix = format!("_{}", sub.tid);
let mut additional_blocks = Vec::new();
for block_tid in sub_to_blocks_map.get(&sub.tid).unwrap() {
......@@ -147,7 +147,7 @@ impl Project {
&mut self,
tid_to_original_sub_map: &HashMap<Tid, Tid>,
) {
for sub in self.program.term.subs.iter_mut() {
for sub in self.program.term.subs.values_mut() {
let tid_suffix = format!("_{}", sub.tid);
for block in sub.term.blocks.iter_mut() {
for jump in block.term.jmps.iter_mut() {
......@@ -198,7 +198,7 @@ pub fn make_block_to_sub_mapping_unique(project: &mut Project) {
&block_tid_to_block_map,
);
// Add the new blocks to the subs
for sub in project.program.term.subs.iter_mut() {
for sub in project.program.term.subs.values_mut() {
sub.term
.blocks
.append(&mut sub_to_additional_blocks_map.remove(&sub.tid).unwrap());
......@@ -210,6 +210,7 @@ pub fn make_block_to_sub_mapping_unique(project: &mut Project) {
#[cfg(test)]
mod tests {
use super::*;
use std::iter::FromIterator;
fn create_block_with_jump_target(block_name: &str, target_name: &str) -> Term<Blk> {
Term {
......@@ -252,12 +253,19 @@ mod tests {
"sub_3",
vec![create_block_with_jump_target("blk_4", "blk_3")],
);
let sub_1_tid = &sub_1.tid;
let sub_2_tid = &sub_2.tid;
let sub_3_tid = &sub_3.tid;
let mut project = Project::mock_empty();
project.program.term.subs = vec![sub_1.clone(), sub_2, sub_3];
project.program.term.subs = BTreeMap::from_iter([
(sub_1_tid.clone(), sub_1.clone()),
(sub_2_tid.clone(), sub_2.clone()),
(sub_3.tid.clone(), sub_3.clone()),
]);
make_block_to_sub_mapping_unique(&mut project);
assert_eq!(&project.program.term.subs[0], &sub_1);
assert_eq!(&project.program.term.subs[sub_1_tid], &sub_1);
let sub_2_modified = create_sub_with_blocks(
"sub_2",
vec![
......@@ -266,16 +274,16 @@ mod tests {
create_block_with_jump_target("blk_1_sub_2", "blk_2_sub_2"),
],
);
assert_eq!(project.program.term.subs[1].term.blocks.len(), 3);
assert_eq!(project.program.term.subs[sub_2_tid].term.blocks.len(), 3);
assert_eq!(
&project.program.term.subs[1].term.blocks[0],
&project.program.term.subs[sub_2_tid].term.blocks[0],
&sub_2_modified.term.blocks[0]
);
assert!(project.program.term.subs[1]
assert!(project.program.term.subs[sub_2_tid]
.term
.blocks
.contains(&sub_2_modified.term.blocks[1]));
assert!(project.program.term.subs[1]
assert!(project.program.term.subs[sub_2_tid]
.term
.blocks
.contains(&sub_2_modified.term.blocks[2]));
......@@ -288,24 +296,24 @@ mod tests {
create_block_with_jump_target("blk_1_sub_3", "blk_2_sub_3"),
],
);
assert_eq!(project.program.term.subs[2].term.blocks.len(), 4);
assert_eq!(project.program.term.subs[sub_3_tid].term.blocks.len(), 4);
assert_eq!(
&project.program.term.subs[2].term.blocks[0],
&project.program.term.subs[sub_3_tid].term.blocks[0],
&sub_3_modified.term.blocks[0]
);
assert!(project.program.term.subs[2]
assert!(project.program.term.subs[sub_3_tid]
.term
.blocks
.contains(&sub_3_modified.term.blocks[0]));
assert!(project.program.term.subs[2]
assert!(project.program.term.subs[sub_3_tid]
.term
.blocks
.contains(&sub_3_modified.term.blocks[1]));
assert!(project.program.term.subs[2]
assert!(project.program.term.subs[sub_3_tid]
.term
.blocks
.contains(&sub_3_modified.term.blocks[2]));
assert!(project.program.term.subs[2]
assert!(project.program.term.subs[sub_3_tid]
.term
.blocks
.contains(&sub_3_modified.term.blocks[3]));
......
use super::{Blk, Datatype, Project, Variable};
use super::{Blk, Datatype, Expression, Project, Variable};
use crate::prelude::*;
/// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it.
......@@ -18,20 +18,17 @@ pub struct Sub {
/// A parameter or return argument of a function.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Arg {
/// The argument is passed in the given register
/// The argument is passed in a register
Register {
/// The variable object representing the register.
var: Variable,
/// The expression evaluating to the argument.
expr: Expression,
/// An optional data type indicator.
data_type: Option<Datatype>,
},
/// The argument is passed on the stack.
/// It is positioned at the given offset (in bytes) relative to the stack pointer on function entry
/// and has the given size.
Stack {
/// The position of the argument on the stack
/// given as offset relative to the stack pointer on function entry.
offset: i64,
/// The expression that computes the address of the argument on the stack.
address: Expression,
/// The size in bytes of the argument.
size: ByteSize,
/// An optional data type indicator.
......@@ -40,6 +37,14 @@ pub enum Arg {
}
impl Arg {
/// Generate a new register argument.
pub fn from_var(var: Variable, data_type_hint: Option<Datatype>) -> Arg {
Arg::Register {
expr: Expression::Var(var),
data_type: data_type_hint,
}
}
/// Returns the data type field of an Arg object.
pub fn get_data_type(&self) -> Option<Datatype> {
match self {
......@@ -78,7 +83,11 @@ impl ExternSymbol {
pub fn get_unique_return_register(&self) -> Result<&Variable, Error> {
if self.return_values.len() == 1 {
match self.return_values[0] {
Arg::Register { ref var, .. } => Ok(var),
Arg::Register {
expr: Expression::Var(ref var),
..
} => Ok(var),
Arg::Register { .. } => Err(anyhow!("Return value is a sub-register")),
Arg::Stack { .. } => Err(anyhow!("Return value is passed on the stack")),
}
} else {
......@@ -97,12 +106,7 @@ impl ExternSymbol {
/// Get the calling convention corresponding to the extern symbol.
pub fn get_calling_convention<'a>(&self, project: &'a Project) -> &'a CallingConvention {
let cconv_name: &str = self.calling_convention.as_deref().unwrap_or("default");
project
.calling_conventions
.iter()
.find(|cconv| cconv.name == cconv_name)
.unwrap()
project.get_calling_convention(self)
}
}
......@@ -113,14 +117,18 @@ pub struct CallingConvention {
#[serde(rename = "calling_convention")]
pub name: String,
/// Possible integer parameter registers.
pub integer_parameter_register: Vec<String>,
pub integer_parameter_register: Vec<Variable>,
/// Possible float parameter registers.
pub float_parameter_register: Vec<String>,
/// A list of possible return register
pub return_register: Vec<String>,
/// Given as expressions, since they are usually sub-register of larger floating point registers.
pub float_parameter_register: Vec<Expression>,
/// A list of possible return register for non-float values.
pub integer_return_register: Vec<Variable>,
/// A list of possible return register for float values.
/// Given as expressions, since they are usually sub-register of larger floating point registers.
pub float_return_register: Vec<Expression>,
/// A list of callee-saved register,
/// i.e. the values of these registers should be the same after the call as they were before the call.
pub callee_saved_register: Vec<String>,
pub callee_saved_register: Vec<Variable>,
}
#[cfg(test)]
......@@ -143,23 +151,29 @@ mod tests {
pub fn mock() -> CallingConvention {
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register: vec!["RDI".to_string()],
float_parameter_register: vec!["XMMO".to_string()],
return_register: vec!["RAX".to_string()],
callee_saved_register: vec!["RBP".to_string()],
integer_parameter_register: vec![Variable::mock("RDI", 8)],
float_parameter_register: vec![Expression::Var(Variable::mock("XMMO", 16))],
integer_return_register: vec![Variable::mock("RAX", 8)],
float_return_register: vec![],
callee_saved_register: vec![Variable::mock("RBP", 8)],
}
}
pub fn mock_with_parameter_registers(
integer_parameter_register: Vec<String>,
float_parameter_register: Vec<String>,
integer_parameter_register: Vec<Variable>,
float_parameter_register: Vec<Variable>,
) -> CallingConvention {
let float_parameter_register = float_parameter_register
.into_iter()
.map(Expression::Var)
.collect();
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register,
float_parameter_register,
return_register: vec!["RAX".to_string()],
callee_saved_register: vec!["RBP".to_string()],
integer_return_register: vec![Variable::mock("RAX", 8)],
float_return_register: vec![],
callee_saved_register: vec![Variable::mock("RBP", 8)],
}
}
}
......@@ -167,7 +181,7 @@ mod tests {
impl Arg {
pub fn mock_register(name: impl ToString, size_in_bytes: impl Into<ByteSize>) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
expr: Expression::Var(Variable::mock(name.to_string(), size_in_bytes)),
data_type: None,
}
}
......@@ -178,7 +192,7 @@ mod tests {
data_type: Option<Datatype>,
) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
expr: Expression::Var(Variable::mock(name.to_string(), size_in_bytes)),
data_type,
}
}
......@@ -188,7 +202,7 @@ mod tests {
size_in_bytes: impl Into<ByteSize>,
) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
expr: Expression::Var(Variable::mock(name.to_string(), size_in_bytes)),
data_type: Some(Datatype::Pointer),
}
}
......
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::usize;
use super::{Expression, ExpressionType, RegisterProperties, Variable};
......@@ -14,6 +14,7 @@ use crate::intermediate_representation::Jmp as IrJmp;
use crate::intermediate_representation::Program as IrProgram;
use crate::intermediate_representation::Project as IrProject;
use crate::intermediate_representation::Sub as IrSub;
use crate::intermediate_representation::Variable as IrVariable;
use crate::prelude::*;
use crate::utils::log::LogMessage;
......@@ -531,22 +532,23 @@ impl ExternSymbol {
for arg in symbol.arguments.iter() {
let ir_arg = if let Some(var) = arg.var.clone() {
IrArg::Register {
var: var.into(),
expr: IrExpression::Var(var.into()),
data_type: None,
}
} else if let Some(expr) = arg.location.clone() {
if expr.mnemonic == ExpressionType::LOAD {
let offset = i64::from_str_radix(
expr.input0
.clone()
.unwrap()
.address
.unwrap()
.trim_start_matches("0x"),
16,
)
.unwrap();
IrArg::Stack {
offset: i64::from_str_radix(
expr.input0
.clone()
.unwrap()
.address
.unwrap()
.trim_start_matches("0x"),
16,
)
.unwrap(),
address: IrExpression::Var(stack_pointer.clone().into()).plus_const(offset),
size: expr.input0.unwrap().size,
data_type: None,
}
......@@ -608,7 +610,7 @@ impl Program {
let subs = self
.subs
.into_iter()
.map(|sub| sub.into_ir_sub_term(stack_pointer.size))
.map(|sub| (sub.tid.clone(), sub.into_ir_sub_term(stack_pointer.size)))
.collect();
let extern_symbols = self
.extern_symbols
......@@ -625,7 +627,7 @@ impl Program {
IrProgram {
subs,
extern_symbols,
entry_points: self.entry_points,
entry_points: self.entry_points.into_iter().collect(),
address_base_offset,
}
}
......@@ -641,22 +643,62 @@ pub struct CallingConvention {
integer_parameter_register: Vec<String>,
/// Possible float parameter registers.
float_parameter_register: Vec<String>,
/// Possible return registers.
/// Possible integer return registers.
return_register: Vec<String>,
/// Possible float return registers.
float_return_register: Vec<String>,
/// Callee-saved registers.
unaffected_register: Vec<String>,
/// Registers that may be overwritten by the call, i.e. caller-saved registers.
killed_by_call_register: Vec<String>,
}
impl From<CallingConvention> for IrCallingConvention {
fn from(cconv: CallingConvention) -> IrCallingConvention {
impl CallingConvention {
/// Convert a calling convention parsed from Ghidra to the internally used IR.
fn into_ir_cconv(
self,
register_map: &HashMap<&String, &RegisterProperties>,
) -> IrCallingConvention {
let to_ir_var_list = |list: Vec<String>| {
list.into_iter()
.map(|register_name| {
let reg = register_map.get(&register_name).cloned().unwrap();
assert_eq!(reg.register, reg.base_register);
reg.into()
})
.collect()
};
let to_ir_expression_list = |list: Vec<String>| {
list.into_iter()
.map(|register_name| {
let reg = register_map.get(&register_name).cloned().unwrap();
let mut expression = IrExpression::Var(reg.into());
expression.replace_input_sub_register(register_map);
expression
})
.collect()
};
let to_ir_base_var_list = |list: Vec<String>| {
let register_set: BTreeSet<IrVariable> = list
.into_iter()
.map(|reg_name| {
let reg = register_map.get(&reg_name).unwrap();
let base_reg = *register_map.get(&reg.base_register).unwrap();
base_reg.into()
})
.collect();
register_set.into_iter().collect()
};
IrCallingConvention {
name: cconv.name,
integer_parameter_register: cconv.integer_parameter_register,
float_parameter_register: cconv.float_parameter_register,
return_register: cconv.return_register,
callee_saved_register: cconv.unaffected_register,
name: self.name,
integer_parameter_register: to_ir_var_list(self.integer_parameter_register),
float_parameter_register: to_ir_expression_list(self.float_parameter_register),
integer_return_register: to_ir_var_list(self.return_register),
float_return_register: to_ir_expression_list(self.float_return_register),
// TODO / FIXME: Using `to_ir_base_var_list` is technically incorrect.
// For example, on AArch64 only the bottom 64bit of some floating point registers are callee-saved.
// To fix this one may have to to change callee_saved_register to a Vec<Expression>.
callee_saved_register: to_ir_base_var_list(self.unaffected_register),
}
}
}
......@@ -684,6 +726,11 @@ impl Project {
/// The `binary_base_address` denotes the base address of the memory image of the binary
/// according to the program headers of the binary.
pub fn into_ir_project(self, binary_base_address: u64) -> IrProject {
let register_map: HashMap<&String, &RegisterProperties> = self
.register_properties
.iter()
.map(|p| (&p.register, p))
.collect();
let mut program: Term<IrProgram> = Term {
tid: self.program.tid,
term: self.program.term.into_ir_program(
......@@ -693,15 +740,10 @@ impl Project {
&self.cpu_architecture,
),
};
let register_map: HashMap<&String, &RegisterProperties> = self
.register_properties
.iter()
.map(|p| (&p.register, p))
.collect();
let mut zero_extend_tids: HashSet<Tid> = HashSet::new();
// iterates over definitions and checks whether sub registers are used
// if so, they are swapped with subpieces of base registers
for sub in program.term.subs.iter_mut() {
for sub in program.term.subs.values_mut() {
for blk in sub.term.blocks.iter_mut() {
let mut def_iter = blk.term.defs.iter_mut().peekable();
while let Some(def) = def_iter.next() {
......@@ -786,7 +828,21 @@ impl Project {
});
}
}
let register_list = self
// Iterate over symbol arguments and replace used sub-registers
for symbol in program.term.extern_symbols.values_mut() {
for arg in symbol.parameters.iter_mut() {
if let IrArg::Register { expr, .. } = arg {
expr.replace_input_sub_register(&register_map);
}
}
for arg in symbol.return_values.iter_mut() {
if let IrArg::Register { expr, .. } = arg {
expr.replace_input_sub_register(&register_map);
}
}
}
let register_set = self
.register_properties
.iter()
.filter_map(|reg| {
......@@ -797,17 +853,18 @@ impl Project {
}
})
.collect();
let calling_conventions = self
.register_calling_convention
.clone()
.into_iter()
.map(|cconv| (cconv.name.clone(), cconv.into_ir_cconv(&register_map)))
.collect();
IrProject {
program,
cpu_architecture: self.cpu_architecture,
stack_pointer_register: self.stack_pointer_register.into(),
calling_conventions: self
.register_calling_convention
.clone()
.into_iter()
.map(|cconv| cconv.into())
.collect(),
register_list,
calling_conventions,
register_set,
datatype_properties: self.datatype_properties.clone(),
}
}
......
......@@ -88,6 +88,7 @@ impl Setup {
"integer_parameter_register": [],
"float_parameter_register": [],
"return_register": [],
"float_return_register": [],
"unaffected_register": [],
"killed_by_call_register": []
}
......@@ -746,6 +747,7 @@ fn from_project_to_ir_project() {
blk.term.jmps.push(setup.jmp_t);
let mut sub = setup.sub_t;
let sub_tid = sub.tid.clone();
sub.term.blocks.push(blk);
mock_project.program.term.subs.push(sub.clone());
......@@ -888,31 +890,14 @@ fn from_project_to_ir_project() {
};
// Checks whether the zero extension was correctly removed; leaving only 5 definitions behind.
assert_eq!(ir_program.subs[0].term.blocks[0].term.defs.len(), 5);
let ir_block = &ir_program.subs.get(&sub_tid).unwrap().term.blocks[0].term;
assert_eq!(ir_block.defs.len(), 5);
// Checks if the other definitions and the jump were correctly casted.
assert_eq!(
ir_program.subs[0].term.blocks[0].term.defs[0].term,
expected_def_0
);
assert_eq!(
ir_program.subs[0].term.blocks[0].term.defs[1].term,
expected_def_1
);
assert_eq!(
ir_program.subs[0].term.blocks[0].term.defs[2].term,
expected_def_3
);
assert_eq!(
ir_program.subs[0].term.blocks[0].term.defs[3].term,
expected_def_4
);
assert_eq!(
ir_program.subs[0].term.blocks[0].term.defs[4].term,
expected_def_5
);
assert_eq!(
ir_program.subs[0].term.blocks[0].term.jmps[0].term,
expected_jmp
);
assert_eq!(ir_block.defs[0].term, expected_def_0);
assert_eq!(ir_block.defs[1].term, expected_def_1);
assert_eq!(ir_block.defs[2].term, expected_def_3);
assert_eq!(ir_block.defs[3].term, expected_def_4);
assert_eq!(ir_block.defs[4].term, expected_def_5);
assert_eq!(ir_block.jmps[0].term, expected_jmp);
}
//! Handles argument detection by parsing format string arguments during a function call. (e.g. sprintf)
use std::collections::HashMap;
use crate::{intermediate_representation::Datatype, prelude::*};
use regex::Regex;
use super::binary::RuntimeMemoryImage;
use crate::prelude::*;
use crate::{
abstract_domain::{IntervalDomain, TryToBitvec},
analysis::pointer_inference::State as PointerInferenceState,
intermediate_representation::{
Arg, ByteSize, CallingConvention, DatatypeProperties, ExternSymbol, Project, Variable,
},
intermediate_representation::*,
};
use super::binary::RuntimeMemoryImage;
/// Returns all return registers of a symbol as a vector of strings.
pub fn get_return_registers_from_symbol(symbol: &ExternSymbol) -> Vec<String> {
symbol
.return_values
.iter()
.filter_map(|ret| match ret {
Arg::Register { var, .. } => Some(var.name.clone()),
_ => None,
})
.collect::<Vec<String>>()
}
use regex::Regex;
use std::collections::HashMap;
/// Parses the input format string for the corresponding string function.
pub fn get_input_format_string(
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
format_string_index: usize,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) -> Result<String, Error> {
if let Some(format_string) = extern_symbol.parameters.get(format_string_index) {
if let Ok(Some(address)) = pi_state
.eval_parameter_arg(format_string, stack_pointer_register, runtime_memory_image)
.eval_parameter_arg(format_string, runtime_memory_image)
.as_ref()
.map(|param| param.get_if_absolute_value())
{
......@@ -135,7 +116,6 @@ pub fn get_variable_parameters(
pi_state,
extern_symbol,
format_string_index,
&project.stack_pointer_register,
runtime_memory_image,
);
......@@ -146,8 +126,10 @@ pub fn get_variable_parameters(
Ok(parameters) => {
return Ok(calculate_parameter_locations(
parameters,
extern_symbol.get_calling_convention(project),
project.get_calling_convention(extern_symbol),
format_string_index,
&project.stack_pointer_register,
&project.cpu_architecture,
));
}
Err(e) => {
......@@ -168,6 +150,8 @@ pub fn calculate_parameter_locations(
parameters: Vec<(Datatype, ByteSize)>,
calling_convention: &CallingConvention,
format_string_index: usize,
stack_register: &Variable,
cpu_arch: &str,
) -> Vec<Arg> {
let mut var_args: Vec<Arg> = Vec::new();
// The number of the remaining integer argument registers are calculated
......@@ -175,37 +159,55 @@ pub fn calculate_parameter_locations(
let mut integer_arg_register_count =
calling_convention.integer_parameter_register.len() - (format_string_index + 1);
let mut float_arg_register_count = calling_convention.float_parameter_register.len();
let mut stack_offset: i64 = 0;
let mut stack_offset: i64 = match cpu_arch {
"x86" | "x86_32" | "x86_64" => u64::from(stack_register.size) as i64,
_ => 0,
};
for (data_type, size) in parameters.iter() {
match data_type {
Datatype::Integer | Datatype::Pointer | Datatype::Char => {
if integer_arg_register_count > 0 {
let register_name = calling_convention.integer_parameter_register
[calling_convention.integer_parameter_register.len()
- integer_arg_register_count]
let register = calling_convention.integer_parameter_register[calling_convention
.integer_parameter_register
.len()
- integer_arg_register_count]
.clone();
var_args.push(create_register_arg(*size, register_name, data_type.clone()));
var_args.push(create_register_arg(
Expression::Var(register),
data_type.clone(),
));
integer_arg_register_count -= 1;
} else {
var_args.push(create_stack_arg(*size, stack_offset, data_type.clone()));
var_args.push(create_stack_arg(
*size,
stack_offset,
data_type.clone(),
stack_register,
));
stack_offset += u64::from(*size) as i64
}
}
Datatype::Double => {
if float_arg_register_count > 0 {
let register_name = calling_convention.float_parameter_register
[calling_convention.float_parameter_register.len()
- float_arg_register_count]
let expr = calling_convention.float_parameter_register[calling_convention
.float_parameter_register
.len()
- float_arg_register_count]
.clone();
var_args.push(create_register_arg(*size, register_name, data_type.clone()));
var_args.push(create_register_arg(expr, data_type.clone()));
float_arg_register_count -= 1;
} else {
var_args.push(create_stack_arg(*size, stack_offset, data_type.clone()));
var_args.push(create_stack_arg(
*size,
stack_offset,
data_type.clone(),
stack_register,
));
stack_offset += u64::from(*size) as i64
}
}
......@@ -217,22 +219,23 @@ pub fn calculate_parameter_locations(
}
/// Creates a stack parameter given a size, stack offset and data type.
pub fn create_stack_arg(size: ByteSize, stack_offset: i64, data_type: Datatype) -> Arg {
pub fn create_stack_arg(
size: ByteSize,
stack_offset: i64,
data_type: Datatype,
stack_register: &Variable,
) -> Arg {
Arg::Stack {
offset: stack_offset,
address: Expression::Var(stack_register.clone()).plus_const(stack_offset),
size,
data_type: Some(data_type),
}
}
/// Creates a register parameter given a size, register name and data type.
pub fn create_register_arg(size: ByteSize, register_name: String, data_type: Datatype) -> Arg {
pub fn create_register_arg(expr: Expression, data_type: Datatype) -> Arg {
Arg::Register {
var: Variable {
name: register_name,
size,
is_temp: false,
},
expr,
data_type: Some(data_type),
}
}
......
use crate::intermediate_representation::{Bitvector, Tid};
use std::collections::BTreeMap;
use std::iter::FromIterator;
use super::*;
......@@ -7,14 +9,6 @@ fn mock_pi_state() -> PointerInferenceState {
}
#[test]
fn test_get_return_registers_from_symbol() {
assert_eq!(
vec!["RAX"],
get_return_registers_from_symbol(&ExternSymbol::mock_string())
);
}
#[test]
fn test_get_variable_parameters() {
let mem_image = RuntimeMemoryImage::mock();
let mut pi_state = mock_pi_state();
......@@ -28,20 +22,20 @@ fn test_get_variable_parameters() {
);
let mut project = Project::mock_empty();
let cconv = CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string()],
vec!["XMM0".to_string()],
vec![Variable::mock("RDI", 8)],
vec![Variable::mock("XMM0", 16)],
);
project.calling_conventions = vec![cconv];
project.calling_conventions = BTreeMap::from_iter([(cconv.name.clone(), cconv)]);
let mut output: Vec<Arg> = Vec::new();
output.push(Arg::Stack {
offset: 0,
address: Expression::Var(Variable::mock("RSP", 8)).plus_const(8),
size: ByteSize::new(4),
data_type: Some(Datatype::Char),
});
output.push(Arg::Stack {
offset: 4,
address: Expression::Var(Variable::mock("RSP", 8)).plus_const(12),
size: ByteSize::new(4),
data_type: Some(Datatype::Integer),
});
......@@ -58,7 +52,7 @@ fn test_get_variable_parameters() {
);
output = vec![Arg::Stack {
offset: 0,
address: Expression::Var(Variable::mock("RSP", 8)).plus_const(8),
size: ByteSize::new(8),
data_type: Some(Datatype::Pointer),
}];
......@@ -96,14 +90,7 @@ fn test_get_input_format_string() {
assert_eq!(
"Hello World",
get_input_format_string(
&pi_state,
&sprintf_symbol,
1,
&Variable::mock("RSP", 8 as u64),
&mem_image
)
.unwrap()
get_input_format_string(&pi_state, &sprintf_symbol, 1, &mem_image).unwrap()
);
}
......@@ -180,12 +167,12 @@ fn test_parse_format_string_parameters() {
fn test_calculate_parameter_locations() {
let cconv = CallingConvention::mock_with_parameter_registers(
vec![
"RDI".to_string(),
"RSI".to_string(),
"R8".to_string(),
"R9".to_string(),
Variable::mock("RDI", 8),
Variable::mock("RSI", 8),
Variable::mock("R8", 8),
Variable::mock("R9", 8),
],
vec!["XMM0".to_string()],
vec![Variable::mock("XMM0", 16)],
);
let format_string_index: usize = 1;
let mut parameters: Vec<(Datatype, ByteSize)> = Vec::new();
......@@ -195,15 +182,15 @@ fn test_calculate_parameter_locations() {
let mut expected_args = vec![
Arg::Register {
var: Variable::mock("R8", ByteSize::new(8)),
expr: Expression::Var(Variable::mock("R8", ByteSize::new(8))),
data_type: Some(Datatype::Integer),
},
Arg::Register {
var: Variable::mock("XMM0", ByteSize::new(16)),
expr: Expression::Var(Variable::mock("XMM0", ByteSize::new(16))),
data_type: Some(Datatype::Double),
},
Arg::Register {
var: Variable::mock("R9", ByteSize::new(8)),
expr: Expression::Var(Variable::mock("R9", ByteSize::new(8))),
data_type: Some(Datatype::Pointer),
},
];
......@@ -211,12 +198,18 @@ fn test_calculate_parameter_locations() {
// Test Case 1: The string parameter is still written in the R9 register since 'f' is contained in the float register.
assert_eq!(
expected_args,
calculate_parameter_locations(parameters.clone(), &cconv, format_string_index)
calculate_parameter_locations(
parameters.clone(),
&cconv,
format_string_index,
&Variable::mock("RSP", 8),
"x86_64"
)
);
parameters.push(("s".to_string().into(), ByteSize::new(8)));
expected_args.push(Arg::Stack {
offset: 0,
address: Expression::Var(Variable::mock("RSP", 8)).plus_const(8),
size: ByteSize::new(8),
data_type: Some(Datatype::Pointer),
});
......@@ -224,7 +217,13 @@ fn test_calculate_parameter_locations() {
// Test Case 2: A second string parameter does not fit into the registers anymore and is written into the stack.
assert_eq!(
expected_args,
calculate_parameter_locations(parameters, &cconv, format_string_index)
calculate_parameter_locations(
parameters,
&cconv,
format_string_index,
&Variable::mock("RSP", 8),
"x86_64"
)
);
}
......@@ -232,21 +231,15 @@ fn test_calculate_parameter_locations() {
fn test_create_stack_arg() {
assert_eq!(
Arg::Stack {
address: Expression::Var(Variable::mock("RSP", 8)).plus_const(8),
size: ByteSize::new(8),
offset: 8,
data_type: Some(Datatype::Pointer),
},
create_stack_arg(ByteSize::new(8), 8, Datatype::Pointer),
create_stack_arg(
ByteSize::new(8),
8,
Datatype::Pointer,
&Variable::mock("RSP", 8)
),
)
}
#[test]
fn test_create_register_arg() {
assert_eq!(
Arg::Register {
var: Variable::mock("R9", ByteSize::new(8)),
data_type: Some(Datatype::Pointer),
},
create_register_arg(ByteSize::new(8), "R9".to_string(), Datatype::Pointer),
);
}
......@@ -12,6 +12,9 @@ package internal;
*
* If the correct .cpsec file was found, it iterates over the XML DOM to extract the above mentioned registers.
*
* TODO: Since Ghidra 10.0 it should be a possible to extract the registers using the Ghidra API without parsing the .cspec file.
* See issue 2357 in the Ghidra repository.
* We should update the code below accordingly as soon as we bump the minimal Ghidra version to 10.0!
*/
import ghidra.xml.*;
......@@ -299,7 +302,7 @@ public class ParseCspecContent {
} else if (entries.getName().equals("killedbycall")) {
convention.setKilledByCall(getRegisters(parser));
} else if (entries.getName().equals("output")) {
convention.setReturn(parseOutput(parser));
parseOutput(parser, convention);
} else {
discardSubTree(parser);
}
......@@ -364,15 +367,21 @@ public class ParseCspecContent {
*
* Parses the output and pentry wrapper to access the return register fields
*/
public static ArrayList<String> parseOutput(XmlPullParser parser) {
ArrayList<String> registers = new ArrayList<String>();
public static void parseOutput(XmlPullParser parser, RegisterConvention convention) {
ArrayList<String> integerRegisters = new ArrayList<String>();
ArrayList<String> floatRegisters = new ArrayList<String>();
parser.start("output");
while(parser.peek().isStart()) {
XmlElement pentry = parser.peek();
parser.start("pentry");
XmlElement entry = parser.peek();
if(entry.getName().equals("register")) {
parser.start("register");
registers.add(entry.getAttribute("name"));
if(isFloatRegister(pentry)) {
floatRegisters.add(entry.getAttribute("name"));
} else {
integerRegisters.add(entry.getAttribute("name"));
}
parser.end();
} else {
discardSubTree(parser);
......@@ -381,7 +390,8 @@ public class ParseCspecContent {
}
parser.end();
return registers;
convention.setReturn(integerRegisters);
convention.setFloatReturn(floatRegisters);
}
......
......@@ -14,6 +14,8 @@ public class RegisterConvention {
private ArrayList<String> floatParameter;
@SerializedName("return_register")
private ArrayList<String> return_;
@SerializedName("float_return_register")
private ArrayList<String> floatReturn;
@SerializedName("unaffected_register")
private ArrayList<String> unaffected;
@SerializedName("killed_by_call_register")
......@@ -31,7 +33,8 @@ public class RegisterConvention {
String cconv,
ArrayList<String> integerParameter,
ArrayList<String> floatParameter,
ArrayList<String> return_,
ArrayList<String> return_,
ArrayList<String> floatReturn,
ArrayList<String> unaffected,
ArrayList<String> killedByCall
) {
......@@ -39,6 +42,7 @@ public class RegisterConvention {
this.setIntegerParameter(integerParameter);
this.setFloatParameter(floatParameter);
this.setReturn(return_);
this.setFloatReturn(floatReturn);
this.setUnaffected(unaffected);
this.setKilledByCall(killedByCall);
}
......@@ -75,6 +79,14 @@ public class RegisterConvention {
this.return_ = return_;
}
public ArrayList<String> getFloatReturn() {
return floatReturn;
}
public void setFloatReturn(ArrayList<String> floatReturn) {
this.floatReturn = floatReturn;
}
public ArrayList<String> getUnaffected() {
return unaffected;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment