Unverified Commit 6810c1f8 by Melvin Klimke Committed by GitHub

Refactor parameter detection (#186)

parent c6a2741b
//! Handles argument detection by parsing format string arguments during a function call. (e.g. sprintf)
use std::collections::HashMap;
use regex::Regex;
use crate::{
abstract_domain::{DataDomain, IntervalDomain, TryToBitvec},
analysis::pointer_inference::State as PointerInferenceState,
intermediate_representation::{
Arg, ByteSize, CallingConvention, DatatypeProperties, ExternSymbol, Project, Variable,
},
};
use super::binary::RuntimeMemoryImage;
/// Returns all return registers of a symbol as a vector of strings.
pub fn get_return_registers_from_symbol(symbol: &ExternSymbol) -> Vec<String> {
symbol
.return_values
.iter()
.filter_map(|ret| match ret {
Arg::Register(var) => Some(var.name.clone()),
_ => None,
})
.collect::<Vec<String>>()
}
/// Parses the input format string for the corresponding string function.
pub fn get_input_format_string(
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
format_string_index: usize,
stack_pointer_register: &Variable,
runtime_memory_image: &RuntimeMemoryImage,
) -> String {
if let Some(format_string) = extern_symbol.parameters.get(format_string_index) {
if let Ok(address) = pi_state.eval_parameter_arg(
format_string,
&stack_pointer_register,
runtime_memory_image,
) {
parse_format_string_destination_and_return_content(address, runtime_memory_image)
} else {
panic!("Could not parse target address of format string pointer.");
}
} else {
panic!(
"No format string parameter at specified index {} for function {}",
format_string_index, extern_symbol.name
);
}
}
/// Parses the destiniation address of the format string.
/// It checks whether the address points to another pointer in memory.
/// If so, it will use the target address of that pointer read the format string from memory.
pub fn parse_format_string_destination_and_return_content(
address: DataDomain<IntervalDomain>,
runtime_memory_image: &RuntimeMemoryImage,
) -> String {
if let Ok(address_vector) = address.try_to_bitvec() {
match runtime_memory_image.read_string_until_null_terminator(&address_vector) {
Ok(format_string) => format_string.to_string(),
Err(e) => panic!("{}", e),
}
} else {
panic!("Could not translate format string address to bitvector.");
}
}
/// Parses the format string parameters using a regex, determines their data types,
/// and calculates their positions (register or memory).
pub fn parse_format_string_parameters(
format_string: &str,
datatype_properties: &DatatypeProperties,
) -> Vec<(String, ByteSize)> {
let re = Regex::new(r#"%\d{0,2}([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S])"#)
.expect("No valid regex!");
re.captures_iter(format_string)
.map(|cap| {
(
cap[1].to_string(),
map_format_specifier_to_bytesize(datatype_properties, cap[1].to_string()),
)
})
.collect()
}
/// Maps a given format specifier to the bytesize of its corresponding data type.
pub fn map_format_specifier_to_bytesize(
datatype_properties: &DatatypeProperties,
specifier: String,
) -> ByteSize {
if is_integer(&specifier) {
return datatype_properties.integer_size;
}
if is_float(&specifier) {
return datatype_properties.double_size;
}
if is_pointer(&specifier) {
return datatype_properties.pointer_size;
}
panic!("Unknown format specifier.")
}
/// Returns an argument vector of detected variable parameters if they are of type string.
pub fn get_variable_number_parameters(
project: &Project,
pi_state: &PointerInferenceState,
extern_symbol: &ExternSymbol,
format_string_index_map: &HashMap<String, usize>,
runtime_memory_image: &RuntimeMemoryImage,
) -> Vec<Arg> {
let format_string_index = match format_string_index_map.get(&extern_symbol.name) {
Some(index) => *index,
None => panic!("External Symbol does not contain a format string parameter."),
};
let format_string = get_input_format_string(
pi_state,
extern_symbol,
format_string_index,
&project.stack_pointer_register,
runtime_memory_image,
);
let parameters =
parse_format_string_parameters(format_string.as_str(), &project.datatype_properties);
if parameters.iter().any(|(specifier, _)| is_string(specifier)) {
return calculate_parameter_locations(
project,
parameters,
extern_symbol.get_calling_convention(project),
format_string_index,
);
}
vec![]
}
/// Calculates the register and stack positions of format string parameters.
/// The parameters are then returned as an argument vector for later tainting.
pub fn calculate_parameter_locations(
project: &Project,
parameters: Vec<(String, ByteSize)>,
calling_convention: &CallingConvention,
format_string_index: usize,
) -> Vec<Arg> {
let mut var_args: Vec<Arg> = Vec::new();
// The number of the remaining integer argument registers are calculated
// from the format string position since it is the last fixed argument.
let mut integer_arg_register_count =
calling_convention.integer_parameter_register.len() - (format_string_index + 1);
let mut float_arg_register_count = calling_convention.float_parameter_register.len();
let mut stack_offset: i64 = 0;
for (type_name, size) in parameters.iter() {
if is_integer(type_name) || is_pointer(type_name) {
if integer_arg_register_count > 0 {
if is_string(type_name) {
let register_name = calling_convention.integer_parameter_register
[calling_convention.integer_parameter_register.len()
- integer_arg_register_count]
.clone();
var_args.push(create_string_register_arg(
project.get_pointer_bytesize(),
register_name,
));
}
integer_arg_register_count -= 1;
} else {
if is_string(type_name) {
var_args.push(create_string_stack_arg(*size, stack_offset));
}
stack_offset += u64::from(*size) as i64
}
} else if float_arg_register_count > 0 {
float_arg_register_count -= 1;
} else {
stack_offset += u64::from(*size) as i64;
}
}
var_args
}
/// Creates a string stack parameter given a size and stack offset.
pub fn create_string_stack_arg(size: ByteSize, stack_offset: i64) -> Arg {
Arg::Stack {
offset: stack_offset,
size,
}
}
/// Creates a string register parameter given a register name.
pub fn create_string_register_arg(size: ByteSize, register_name: String) -> Arg {
Arg::Register(Variable {
name: register_name,
size,
is_temp: false,
})
}
/// Checks whether the format specifier is of type int.
pub fn is_integer(specifier: &str) -> bool {
matches!(specifier, "d" | "i" | "o" | "x" | "X" | "u" | "c" | "C")
}
/// Checks whether the format specifier is of type pointer.
pub fn is_pointer(specifier: &str) -> bool {
matches!(specifier, "s" | "S" | "n" | "p")
}
/// Checks whether the format specifier is of type float.
pub fn is_float(specifier: &str) -> bool {
matches!(specifier, "f" | "F" | "e" | "E" | "a" | "A" | "g" | "G")
}
/// Checks whether the format specifier is a string pointer
/// or a string.
pub fn is_string(specifier: &str) -> bool {
matches!(specifier, "s" | "S")
}
#[cfg(test)]
mod tests;
use crate::intermediate_representation::{Bitvector, Tid};
use super::*;
fn mock_pi_state() -> PointerInferenceState {
PointerInferenceState::new(&Variable::mock("RSP", 8 as u64), Tid::new("func"))
}
#[test]
fn test_get_return_registers_from_symbol() {
assert_eq!(
vec!["RAX"],
get_return_registers_from_symbol(&ExternSymbol::mock_string())
);
}
#[test]
fn test_get_variable_number_parameters() {
let mem_image = RuntimeMemoryImage::mock();
let mut pi_state = mock_pi_state();
let sprintf_symbol = ExternSymbol::mock_string();
let mut format_string_index_map: HashMap<String, usize> = HashMap::new();
format_string_index_map.insert("sprintf".to_string(), 0);
let global_address = Bitvector::from_str_radix(16, "5000").unwrap();
pi_state.set_register(
&Variable::mock("RDI", 8 as u64),
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address)),
);
let mut project = Project::mock_empty();
let cconv = CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string()],
vec!["XMM0".to_string()],
);
project.calling_conventions = vec![cconv];
let mut output: Vec<Arg> = Vec::new();
assert_eq!(
output,
get_variable_number_parameters(
&project,
&pi_state,
&sprintf_symbol,
&format_string_index_map,
&mem_image,
)
);
output.push(Arg::Stack {
offset: 0,
size: ByteSize::new(8),
});
let global_address = Bitvector::from_str_radix(16, "500c").unwrap();
pi_state.set_register(
&Variable::mock("RDI", 8 as u64),
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address)),
);
assert_eq!(
output,
get_variable_number_parameters(
&project,
&pi_state,
&sprintf_symbol,
&format_string_index_map,
&mem_image,
)
);
}
#[test]
fn test_get_input_format_string() {
let mem_image = RuntimeMemoryImage::mock();
let mut pi_state = mock_pi_state();
let sprintf_symbol = ExternSymbol::mock_string();
let global_address = Bitvector::from_str_radix(16, "3002").unwrap();
pi_state.set_register(
&Variable::mock("RSI", 8 as u64),
DataDomain::Value(IntervalDomain::new(global_address.clone(), global_address)),
);
assert_eq!(
"Hello World",
get_input_format_string(
&pi_state,
&sprintf_symbol,
1,
&Variable::mock("RSP", 8 as u64),
&mem_image
)
);
}
#[test]
fn test_parse_format_string_destination_and_return_content() {
let mem_image = RuntimeMemoryImage::mock();
let string_address_vector = Bitvector::from_str_radix(16, "3002").unwrap();
let string_address = DataDomain::Value(IntervalDomain::new(
string_address_vector.clone(),
string_address_vector,
));
assert_eq!(
"Hello World",
parse_format_string_destination_and_return_content(string_address, &mem_image)
);
}
#[test]
fn test_parse_format_string_parameters() {
let test_cases: Vec<&str> = vec![
"%s \"%s\" %s",
"ifconfig eth0 add 3ffe:501:ffff:101:2%02x:%02xff:fe%02x:%02x%02x/64",
"/dev/sd%c%d",
"%s: Unable to open \'%s\', errno=%d\n",
];
let properties = DatatypeProperties::mock();
let expected_outputs: Vec<Vec<(String, ByteSize)>> = vec![
vec![
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
],
vec![
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
],
vec![
("c".to_string(), properties.integer_size),
("d".to_string(), properties.integer_size),
],
vec![
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
("d".to_string(), properties.integer_size),
],
];
for (case, output) in test_cases.into_iter().zip(expected_outputs.into_iter()) {
assert_eq!(output, parse_format_string_parameters(case, &properties));
}
}
#[test]
fn test_map_format_specifier_to_bytesize() {
let properties = DatatypeProperties::mock();
assert_eq!(
ByteSize::new(8),
map_format_specifier_to_bytesize(&properties, "s".to_string())
);
assert_eq!(
ByteSize::new(8),
map_format_specifier_to_bytesize(&properties, "f".to_string())
);
assert_eq!(
ByteSize::new(4),
map_format_specifier_to_bytesize(&properties, "d".to_string())
);
}
#[test]
#[should_panic]
fn test_map_invalid_format_specifier_to_bytesize() {
let properties = DatatypeProperties::mock();
map_format_specifier_to_bytesize(&properties, "w".to_string());
}
#[test]
fn test_calculate_parameter_locations() {
let project = Project::mock_empty();
let cconv = CallingConvention::mock_with_parameter_registers(
vec![
"RDI".to_string(),
"RSI".to_string(),
"R8".to_string(),
"R9".to_string(),
],
vec!["XMM0".to_string()],
);
let format_string_index: usize = 1;
let mut parameters: Vec<(String, ByteSize)> = Vec::new();
parameters.push(("d".to_string(), ByteSize::new(4)));
parameters.push(("f".to_string(), ByteSize::new(8)));
parameters.push(("s".to_string(), ByteSize::new(4)));
let mut expected_args = vec![Arg::Register(Variable::mock("R9", ByteSize::new(8)))];
// Test Case 1: The string parameter is still written in the R9 register since 'f' is contained in the float register.
assert_eq!(
expected_args,
calculate_parameter_locations(&project, parameters.clone(), &cconv, format_string_index)
);
parameters.push(("s".to_string(), ByteSize::new(4)));
expected_args.push(Arg::Stack {
offset: 0,
size: ByteSize::new(4),
});
// Test Case 2: A second string parameter does not fit into the registers anymore and is written into the stack.
assert_eq!(
expected_args,
calculate_parameter_locations(&project, parameters, &cconv, format_string_index)
);
}
#[test]
fn test_create_string_stack_arg() {
assert_eq!(
Arg::Stack {
size: ByteSize::new(8),
offset: 8,
},
create_string_stack_arg(ByteSize::new(8), 8),
)
}
#[test]
fn test_create_string_register_arg() {
assert_eq!(
Arg::Register(Variable::mock("R9", ByteSize::new(8))),
create_string_register_arg(ByteSize::new(8), "R9".to_string()),
);
}
#[test]
fn test_is_integer() {
assert!(is_integer("d"));
assert!(is_integer("i"));
assert!(!is_integer("f"));
}
#[test]
fn test_is_pointer() {
assert!(is_pointer("s"));
assert!(is_pointer("S"));
assert!(is_pointer("n"));
assert!(is_pointer("p"));
assert!(!is_pointer("g"));
}
#[test]
fn test_is_string() {
assert!(is_string("s"));
assert!(is_string("S"));
assert!(!is_string("g"));
}
#[test]
fn test_is_float() {
assert!(is_float("f"));
assert!(is_float("A"));
assert!(!is_float("s"));
}
//! This module contains various utility modules and helper functions.
pub mod arguments;
pub mod binary;
pub mod graph_utils;
pub mod log;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment