Unverified Commit 4746e4d3 by Melvin Klimke Committed by GitHub

Parse all data types from format string (#200)

parent 29b2e1a0
......@@ -3,7 +3,7 @@ use petgraph::graph::NodeIndex;
use crate::abstract_domain::{DataDomain, IntervalDomain, PointerDomain};
use crate::analysis::pointer_inference::{Data, PointerInference as PointerInferenceComputation};
use crate::intermediate_representation::{
Arg, BinOpType, Bitvector, ByteSize, Expression, ExternSymbol, Tid, Variable,
Arg, BinOpType, Bitvector, ByteSize, CallingConvention, Expression, ExternSymbol, Tid, Variable,
};
use crate::utils::binary::RuntimeMemoryImage;
use crate::{checkers::cwe_476::Taint, utils::log::CweWarning};
......@@ -22,6 +22,10 @@ impl<'a> Context<'a> {
#[test]
fn tainting_generic_extern_symbol_parameters() {
let mut setup = Setup::new();
setup.project.calling_conventions = vec![CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string(), "RSI".to_string()],
vec!["XMM0".to_string()],
)];
let r9_reg = Variable::mock("R9", 8 as u64);
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
......@@ -112,6 +116,10 @@ fn tainting_generic_extern_symbol_parameters() {
#[test]
fn tainting_extern_string_symbol_parameters() {
let mut setup = Setup::new();
setup.project.calling_conventions = vec![CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string(), "RSI".to_string()],
vec!["XMM0".to_string()],
)];
let rbp_reg = Variable::mock("RBP", 8 as u64); // callee saved -> will point to RSP
let rdi_reg = Variable::mock("RDI", 8 as u64); // parameter 1 -> will point to RBP - 8
let rsi_reg = Variable::mock("RSI", 8 as u64); // parameter 2
......
......@@ -947,6 +947,10 @@ fn splitting_return_stub() {
#[test]
fn updating_call_stub() {
let mut setup = Setup::new();
setup.project.calling_conventions = vec![CallingConvention::mock_with_parameter_registers(
vec!["RDI".to_string(), "RSI".to_string()],
vec!["XMM0".to_string()],
)];
let r9_reg = Variable::mock("R9", 8 as u64); // non callee saved
let rbp_reg = Variable::mock("RBP", 8 as u64);
let rdi_reg = Variable::mock("RDI", 8 as u64);
......
......@@ -110,6 +110,23 @@ pub struct DatatypeProperties {
pub short_size: ByteSize,
}
impl DatatypeProperties {
/// Matches a given data type with its size from the properties struct.
pub fn get_size_from_data_type(&self, data_type: Datatype) -> ByteSize {
match data_type {
Datatype::Char => self.char_size,
Datatype::Double => self.double_size,
Datatype::Float => self.float_size,
Datatype::Integer => self.integer_size,
Datatype::LongDouble => self.long_double_size,
Datatype::LongLong => self.long_long_size,
Datatype::Long => self.long_size,
Datatype::Pointer => self.pointer_size,
Datatype::Short => self.short_size,
}
}
}
/// C/C++ data types.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Datatype {
......@@ -133,6 +150,25 @@ pub enum Datatype {
Short,
}
impl From<String> for Datatype {
/// The purpose of this conversion is to locate parameters to variadic functions.
/// Therefore, char types are mapped to integer types since they undergo the default
/// argument promotion. (e.g. 1 byte char -> 4 byte integer)
/// The same holds for all float types that are promoted to doubles. (e.g. 8 byte float -> 16 byte double)
fn from(specifier: String) -> Self {
match specifier.as_str() {
"c" | "C" | "d" | "i" | "u" | "o" | "x" | "X" | "hi" | "hd" | "hu" => Datatype::Integer,
"s" | "S" | "n" | "p" => Datatype::Pointer,
"lf" | "lg" | "le" | "la" | "lF" | "lG" | "lE" | "lA" | "f" | "F" | "e" | "E" | "a"
| "A" | "g" | "G" => Datatype::Double,
"li" | "ld" | "lu" => Datatype::Long,
"lli" | "lld" | "llu" => Datatype::LongLong,
"Lf" | "Lg" | "Le" | "La" | "LF" | "LG" | "LE" | "LA" => Datatype::LongDouble,
_ => panic!("Invalid data type specifier from format string."),
}
}
}
#[cfg(test)]
mod tests {
use apint::BitWidth;
......
......@@ -82,38 +82,35 @@ pub fn parse_format_string_destination_and_return_content(
pub fn parse_format_string_parameters(
format_string: &str,
datatype_properties: &DatatypeProperties,
) -> Vec<(String, ByteSize)> {
let re = Regex::new(r#"%\d{0,2}([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S])"#)
) -> Result<Vec<(Datatype, ByteSize)>, Error> {
let re = Regex::new(r#"%\d{0,2}(([c,C,d,i,o,u,x,X,e,E,f,F,g,G,a,A,n,p,s,S])|(hi|hd|hu|li|ld|lu|lli|lld|llu|lf|lg|le|la|lF|lG|lE|lA|Lf|Lg|Le|La|LF|LG|LE|LA))"#)
.expect("No valid regex!");
re.captures_iter(format_string)
let datatype_map: Vec<(Datatype, ByteSize)> = re
.captures_iter(format_string)
.map(|cap| {
let data_type = Datatype::from(cap[1].to_string());
(
cap[1].to_string(),
map_format_specifier_to_bytesize(datatype_properties, cap[1].to_string()),
data_type.clone(),
datatype_properties.get_size_from_data_type(data_type),
)
})
.collect()
}
/// Maps a given format specifier to the bytesize of its corresponding data type.
pub fn map_format_specifier_to_bytesize(
datatype_properties: &DatatypeProperties,
specifier: String,
) -> ByteSize {
if is_integer(&specifier) {
return datatype_properties.integer_size;
}
if is_float(&specifier) {
return datatype_properties.double_size;
}
if is_pointer(&specifier) {
return datatype_properties.pointer_size;
.collect();
let data_type_not_yet_parsable = datatype_map.iter().any(|(data_type, _)| {
matches!(
data_type,
Datatype::Long | Datatype::LongLong | Datatype::LongDouble
)
});
if data_type_not_yet_parsable {
return Err(anyhow!(
"Data types: long, long long and long double, cannot be parsed yet."
));
}
panic!("Unknown format specifier.")
Ok(datatype_map)
}
/// Returns an argument vector of detected variable parameters if they are of type string.
......@@ -137,19 +134,16 @@ pub fn get_variable_parameters(
runtime_memory_image,
);
if let Ok(format_string) = format_string_results {
let parameters =
parse_format_string_parameters(format_string.as_str(), &project.datatype_properties);
if parameters.iter().any(|(specifier, _)| is_string(specifier)) {
if let Ok(format_string) = format_string_results.as_ref() {
if let Ok(parameters) =
parse_format_string_parameters(format_string, &project.datatype_properties)
{
return Ok(calculate_parameter_locations(
project,
parameters,
extern_symbol.get_calling_convention(project),
format_string_index,
));
}
return Ok(vec![]);
}
Err(anyhow!(
......@@ -161,8 +155,7 @@ pub fn get_variable_parameters(
/// Calculates the register and stack positions of format string parameters.
/// The parameters are then returned as an argument vector for later tainting.
pub fn calculate_parameter_locations(
project: &Project,
parameters: Vec<(String, ByteSize)>,
parameters: Vec<(Datatype, ByteSize)>,
calling_convention: &CallingConvention,
format_string_index: usize,
) -> Vec<Arg> {
......@@ -174,77 +167,65 @@ pub fn calculate_parameter_locations(
let mut float_arg_register_count = calling_convention.float_parameter_register.len();
let mut stack_offset: i64 = 0;
for (type_name, size) in parameters.iter() {
if is_integer(type_name) || is_pointer(type_name) {
if integer_arg_register_count > 0 {
if is_string(type_name) {
for (data_type, size) in parameters.iter() {
match data_type {
Datatype::Integer | Datatype::Pointer => {
if integer_arg_register_count > 0 {
let register_name = calling_convention.integer_parameter_register
[calling_convention.integer_parameter_register.len()
- integer_arg_register_count]
.clone();
var_args.push(create_string_register_arg(
project.get_pointer_bytesize(),
register_name,
));
var_args.push(create_register_arg(*size, register_name, data_type.clone()));
integer_arg_register_count -= 1;
} else {
var_args.push(create_stack_arg(*size, stack_offset, data_type.clone()));
stack_offset += u64::from(*size) as i64
}
integer_arg_register_count -= 1;
} else {
if is_string(type_name) {
var_args.push(create_string_stack_arg(*size, stack_offset));
}
Datatype::Double => {
if float_arg_register_count > 0 {
let register_name = calling_convention.float_parameter_register
[calling_convention.float_parameter_register.len()
- float_arg_register_count]
.clone();
var_args.push(create_register_arg(*size, register_name, data_type.clone()));
float_arg_register_count -= 1;
} else {
var_args.push(create_stack_arg(*size, stack_offset, data_type.clone()));
stack_offset += u64::from(*size) as i64
}
stack_offset += u64::from(*size) as i64
}
} else if float_arg_register_count > 0 {
float_arg_register_count -= 1;
} else {
stack_offset += u64::from(*size) as i64;
_ => panic!("Invalid data type specifier from format string."),
}
}
var_args
}
/// Creates a string stack parameter given a size and stack offset.
pub fn create_string_stack_arg(size: ByteSize, stack_offset: i64) -> Arg {
/// Creates a stack parameter given a size, stack offset and data type.
pub fn create_stack_arg(size: ByteSize, stack_offset: i64, data_type: Datatype) -> Arg {
Arg::Stack {
offset: stack_offset,
size,
data_type: Some(Datatype::Pointer),
data_type: Some(data_type),
}
}
/// Creates a string register parameter given a register name.
pub fn create_string_register_arg(size: ByteSize, register_name: String) -> Arg {
/// Creates a register parameter given a size, register name and data type.
pub fn create_register_arg(size: ByteSize, register_name: String, data_type: Datatype) -> Arg {
Arg::Register {
var: Variable {
name: register_name,
size,
is_temp: false,
},
data_type: Some(Datatype::Pointer),
data_type: Some(data_type),
}
}
/// Checks whether the format specifier is of type int.
pub fn is_integer(specifier: &str) -> bool {
matches!(specifier, "d" | "i" | "o" | "x" | "X" | "u" | "c" | "C")
}
/// Checks whether the format specifier is of type pointer.
pub fn is_pointer(specifier: &str) -> bool {
matches!(specifier, "s" | "S" | "n" | "p")
}
/// Checks whether the format specifier is of type float.
pub fn is_float(specifier: &str) -> bool {
matches!(specifier, "f" | "F" | "e" | "E" | "a" | "A" | "g" | "G")
}
/// Checks whether the format specifier is a string pointer
/// or a string.
pub fn is_string(specifier: &str) -> bool {
matches!(specifier, "s" | "S")
}
#[cfg(test)]
mod tests;
......@@ -15,7 +15,7 @@ fn test_get_return_registers_from_symbol() {
}
#[test]
fn test_get_variable_number_parameters() {
fn test_get_variable_parameters() {
let mem_image = RuntimeMemoryImage::mock();
let mut pi_state = mock_pi_state();
let sprintf_symbol = ExternSymbol::mock_string();
......@@ -34,6 +34,17 @@ fn test_get_variable_number_parameters() {
project.calling_conventions = vec![cconv];
let mut output: Vec<Arg> = Vec::new();
output.push(Arg::Stack {
offset: 0,
size: ByteSize::new(4),
data_type: Some(Datatype::Integer),
});
output.push(Arg::Stack {
offset: 4,
size: ByteSize::new(4),
data_type: Some(Datatype::Integer),
});
assert_eq!(
output,
get_variable_parameters(
......@@ -46,11 +57,11 @@ fn test_get_variable_number_parameters() {
.unwrap()
);
output.push(Arg::Stack {
output = vec![Arg::Stack {
offset: 0,
size: ByteSize::new(8),
data_type: Some(Datatype::Pointer),
});
}];
let global_address = Bitvector::from_str_radix(16, "500c").unwrap();
pi_state.set_register(
......@@ -115,64 +126,58 @@ fn test_parse_format_string_parameters() {
"ifconfig eth0 add 3ffe:501:ffff:101:2%02x:%02xff:fe%02x:%02x%02x/64",
"/dev/sd%c%d",
"%s: Unable to open \'%s\', errno=%d\n",
"%s %lli",
];
let properties = DatatypeProperties::mock();
let expected_outputs: Vec<Vec<(String, ByteSize)>> = vec![
let expected_outputs: Vec<Vec<(Datatype, ByteSize)>> = vec![
vec![
(Datatype::from("s".to_string()), properties.pointer_size),
(Datatype::from("s".to_string()), properties.pointer_size),
(Datatype::from("s".to_string()), properties.pointer_size),
],
vec![
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
(Datatype::from("x".to_string()), properties.integer_size),
(Datatype::from("x".to_string()), properties.integer_size),
(Datatype::from("x".to_string()), properties.integer_size),
(Datatype::from("x".to_string()), properties.integer_size),
(Datatype::from("x".to_string()), properties.integer_size),
],
vec![
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
("x".to_string(), properties.integer_size),
(Datatype::from("c".to_string()), properties.integer_size),
(Datatype::from("d".to_string()), properties.integer_size),
],
vec![
("c".to_string(), properties.integer_size),
("d".to_string(), properties.integer_size),
(Datatype::from("s".to_string()), properties.pointer_size),
(Datatype::from("s".to_string()), properties.pointer_size),
(Datatype::from("d".to_string()), properties.integer_size),
],
vec![
("s".to_string(), properties.pointer_size),
("s".to_string(), properties.pointer_size),
("d".to_string(), properties.integer_size),
(Datatype::from("s".to_string()), properties.pointer_size),
(Datatype::from("lli".to_string()), properties.pointer_size),
],
];
for (case, output) in test_cases.into_iter().zip(expected_outputs.into_iter()) {
assert_eq!(output, parse_format_string_parameters(case, &properties));
for (index, (case, output)) in test_cases
.into_iter()
.zip(expected_outputs.into_iter())
.enumerate()
{
if index == 4 {
assert_ne!(
output,
parse_format_string_parameters(case, &properties).unwrap_or(vec![])
);
} else {
assert_eq!(
output,
parse_format_string_parameters(case, &properties).unwrap()
);
}
}
}
#[test]
fn test_map_format_specifier_to_bytesize() {
let properties = DatatypeProperties::mock();
assert_eq!(
ByteSize::new(8),
map_format_specifier_to_bytesize(&properties, "s".to_string())
);
assert_eq!(
ByteSize::new(8),
map_format_specifier_to_bytesize(&properties, "f".to_string())
);
assert_eq!(
ByteSize::new(4),
map_format_specifier_to_bytesize(&properties, "d".to_string())
);
}
#[test]
#[should_panic]
fn test_map_invalid_format_specifier_to_bytesize() {
let properties = DatatypeProperties::mock();
map_format_specifier_to_bytesize(&properties, "w".to_string());
}
#[test]
fn test_calculate_parameter_locations() {
let project = Project::mock_empty();
let cconv = CallingConvention::mock_with_parameter_registers(
vec![
"RDI".to_string(),
......@@ -183,85 +188,65 @@ fn test_calculate_parameter_locations() {
vec!["XMM0".to_string()],
);
let format_string_index: usize = 1;
let mut parameters: Vec<(String, ByteSize)> = Vec::new();
parameters.push(("d".to_string(), ByteSize::new(4)));
parameters.push(("f".to_string(), ByteSize::new(8)));
parameters.push(("s".to_string(), ByteSize::new(4)));
let mut parameters: Vec<(Datatype, ByteSize)> = Vec::new();
parameters.push(("d".to_string().into(), ByteSize::new(8)));
parameters.push(("f".to_string().into(), ByteSize::new(16)));
parameters.push(("s".to_string().into(), ByteSize::new(8)));
let mut expected_args = vec![Arg::Register {
var: Variable::mock("R9", ByteSize::new(8)),
data_type: Some(Datatype::Pointer),
}];
let mut expected_args = vec![
Arg::Register {
var: Variable::mock("R8", ByteSize::new(8)),
data_type: Some(Datatype::Integer),
},
Arg::Register {
var: Variable::mock("XMM0", ByteSize::new(16)),
data_type: Some(Datatype::Double),
},
Arg::Register {
var: Variable::mock("R9", ByteSize::new(8)),
data_type: Some(Datatype::Pointer),
},
];
// Test Case 1: The string parameter is still written in the R9 register since 'f' is contained in the float register.
assert_eq!(
expected_args,
calculate_parameter_locations(&project, parameters.clone(), &cconv, format_string_index)
calculate_parameter_locations(parameters.clone(), &cconv, format_string_index)
);
parameters.push(("s".to_string(), ByteSize::new(4)));
parameters.push(("s".to_string().into(), ByteSize::new(8)));
expected_args.push(Arg::Stack {
offset: 0,
size: ByteSize::new(4),
size: ByteSize::new(8),
data_type: Some(Datatype::Pointer),
});
// Test Case 2: A second string parameter does not fit into the registers anymore and is written into the stack.
assert_eq!(
expected_args,
calculate_parameter_locations(&project, parameters, &cconv, format_string_index)
calculate_parameter_locations(parameters, &cconv, format_string_index)
);
}
#[test]
fn test_create_string_stack_arg() {
fn test_create_stack_arg() {
assert_eq!(
Arg::Stack {
size: ByteSize::new(8),
offset: 8,
data_type: Some(Datatype::Pointer),
},
create_string_stack_arg(ByteSize::new(8), 8),
create_stack_arg(ByteSize::new(8), 8, Datatype::Pointer),
)
}
#[test]
fn test_create_string_register_arg() {
fn test_create_register_arg() {
assert_eq!(
Arg::Register {
var: Variable::mock("R9", ByteSize::new(8)),
data_type: Some(Datatype::Pointer),
},
create_string_register_arg(ByteSize::new(8), "R9".to_string()),
create_register_arg(ByteSize::new(8), "R9".to_string(), Datatype::Pointer),
);
}
#[test]
fn test_is_integer() {
assert!(is_integer("d"));
assert!(is_integer("i"));
assert!(!is_integer("f"));
}
#[test]
fn test_is_pointer() {
assert!(is_pointer("s"));
assert!(is_pointer("S"));
assert!(is_pointer("n"));
assert!(is_pointer("p"));
assert!(!is_pointer("g"));
}
#[test]
fn test_is_string() {
assert!(is_string("s"));
assert!(is_string("S"));
assert!(!is_string("g"));
}
#[test]
fn test_is_float() {
assert!(is_float("f"));
assert!(is_float("A"));
assert!(!is_float("s"));
}
......@@ -307,6 +307,7 @@ pub mod tests {
write_flag: true,
execute_flag: false,
},
// Contains the Hello World string at byte 3002.
MemorySegment {
bytes: [
0x01, 0x02, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment