Unverified Commit 00f223df by Enkelmann Committed by GitHub

Fix sub-register substitution in Load instructions (#353)

parent dc561415
......@@ -124,10 +124,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
} else {
// TODO: CWE78 is disabled on a standard run for now,
// because it uses up huge amounts of RAM and computation time on some binaries.
modules = modules
.into_iter()
.filter(|module| module.name != "CWE78")
.collect();
modules.retain(|module| module.name != "CWE78");
}
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());
......
use std::collections::HashMap;
use std::fmt::{self, Debug};
use super::ByteSize;
use super::Variable;
use super::{ByteSize, Def};
use crate::{pcode::RegisterProperties, prelude::*};
use crate::prelude::*;
mod builder;
mod trivial_operation_substitution;
......@@ -209,227 +208,6 @@ impl Expression {
}
}
}
/// This function checks for sub registers in pcode instruction and casts them into
/// SUBPIECE expressions with the base register as argument. It also checks whether
/// the given Term<Def> has a output sub register and if so, casts it into its
/// corresponding base register.
/// Lastly, it checks whether the following pcode instruction is a zero extension of
/// the currently overwritten sub register. If so, the zero extension is wrapped around
/// the current instruction and the TID of the zero extension instruction is returned
/// for later removal.
/// If there is no zero extension but an output register, the multiple SUBPIECEs are put
/// together to the size of the corresponding output base register using the PIECE instruction.
/// A few examples:
/// 1. From: EAX = COPY EDX;
/// To: RAX = COPY PIECE(SUBPIECE(RAX, 4, 4), SUBPIECE(RDX, 0, 4));
///
/// 2. From: AH = AH INT_XOR AH;
/// To: RAX = PIECE(PIECE(SUBPIECE(RAX, 2, 6), (SUBPIECE(RAX, 1, 1) INT_XOR SUBPIECE(RAX, 1, 1)), SUBPIECE(RAX, 0, 1));
///
/// 3. FROM EAX = COPY EDX && RAX = INT_ZEXT EAX;
/// To: RAX = INT_ZEXT SUBPIECE(RDX, 0, 4);
pub fn cast_sub_registers_to_base_register_subpieces(
&mut self,
output: Option<&mut Variable>,
register_map: &HashMap<&String, &RegisterProperties>,
peeked: Option<&&mut Term<Def>>,
) -> Option<Tid> {
let mut output_base_size: Option<ByteSize> = None;
let mut output_base_register: Option<&&RegisterProperties> = None;
let mut output_sub_register: Option<&RegisterProperties> = None;
let mut zero_extend_tid: Option<Tid> = None;
if let Some(output_value) = output {
if let Some(register) = register_map.get(&output_value.name) {
if *register.register != *register.base_register {
output_sub_register = Some(register);
output_base_register = register_map.get(&register.base_register);
output_value.name = register.base_register.clone();
output_value.size = output_base_register.unwrap().size;
output_base_size = Some(output_value.size);
if let Some(peek) = peeked {
zero_extend_tid = peek.check_for_zero_extension(
output_value.name.clone(),
output_sub_register.unwrap().register.clone(),
);
}
}
}
}
self.replace_input_sub_register(register_map);
// based on the zero extension and base register output, either piece the subpieces together,
// zero extend the expression or do nothing (e.g. if output is a virtual register, no further actions should be taken)
self.piece_zero_extend_or_none(
zero_extend_tid.clone(),
output_base_register,
output_base_size,
output_sub_register,
);
zero_extend_tid
}
/// This function recursively iterates into the expression and checks whether a sub register was used.
/// If so, the sub register is turned into a SUBPIECE of the corresponding base register.
pub fn replace_input_sub_register(
&mut self,
register_map: &HashMap<&String, &RegisterProperties>,
) {
match self {
Expression::BinOp { lhs, rhs, .. } => {
lhs.replace_input_sub_register(register_map);
rhs.replace_input_sub_register(register_map);
}
Expression::UnOp { arg, .. } | Expression::Cast { arg, .. } => {
arg.replace_input_sub_register(register_map)
}
Expression::Subpiece { arg, .. } => {
let truncated: &mut Expression = arg;
// Check whether the truncated data source is a sub register and if so,
// change it to its corresponding base register.
match truncated {
Expression::Var(variable) => {
if let Some(register) = register_map.get(&variable.name) {
if variable.name != *register.base_register {
variable.name = register.base_register.clone();
variable.size =
register_map.get(&register.base_register).unwrap().size
}
}
}
_ => arg.replace_input_sub_register(register_map),
}
}
Expression::Var(variable) => {
if let Some(register) = register_map.get(&variable.name) {
// We replace the register with a subpiece if the register itself is not a base register
// or if the expression is an implicit subpiece (identifiable with `variable.size < register.size`).
if variable.name != *register.base_register || variable.size < register.size {
let target_size = variable.size;
self.create_subpiece_from_sub_register(
register.base_register.clone(),
target_size,
register.lsb,
register_map,
);
}
}
}
_ => (),
}
}
/// This function creates a SUBPIECE expression
/// from a sub_register containing the corresponding base register.
fn create_subpiece_from_sub_register(
&mut self,
base: String,
size: ByteSize,
lsb: ByteSize,
register_map: &HashMap<&String, &RegisterProperties>,
) {
*self = Expression::Subpiece {
low_byte: lsb,
size,
arg: Box::new(Expression::Var(Variable {
name: base.clone(),
size: register_map.get(&base).unwrap().size,
is_temp: false,
})),
};
}
/// This function either wraps the current expression into a
/// 1. zero extension expression: if the next instruction is a zero extension
/// of the currently overwritten sub register
/// 2. piece expression: if no zero extension is done the a sub register is overwritten
/// or does nothing in case there is no overwritten sub register.
fn piece_zero_extend_or_none(
&mut self,
zero_extend: Option<Tid>,
output_base_register: Option<&&RegisterProperties>,
output_size: Option<ByteSize>,
sub_register: Option<&RegisterProperties>,
) {
if zero_extend.is_some() {
*self = Expression::Cast {
op: CastOpType::IntZExt,
size: output_size.unwrap(),
arg: Box::new(self.clone()),
}
} else if output_base_register.is_some() {
self.piece_two_expressions_together(
*output_base_register.unwrap(),
sub_register.unwrap(),
);
}
}
/// This function puts multiple SUBPIECE into PIECE of the size of the
/// base register. Depending on the position of the LSB of the sub register,
/// also nested PIECE instruction are possible.
fn piece_two_expressions_together(
&mut self,
output_base_register: &RegisterProperties,
sub_register: &RegisterProperties,
) {
let base_size: ByteSize = output_base_register.size;
let base_name: &String = &output_base_register.register;
let sub_size: ByteSize = sub_register.size;
let sub_lsb: ByteSize = sub_register.lsb;
let base_subpiece = Box::new(Expression::Var(Variable {
name: base_name.clone(),
size: base_size,
is_temp: false,
}));
if sub_register.lsb > ByteSize::new(0) && sub_register.lsb + sub_register.size == base_size
{
// Build PIECE as PIECE(lhs: sub_register, rhs: low subpiece)
*self = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(self.clone()),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: sub_lsb,
arg: base_subpiece,
}),
}
} else if sub_register.lsb > ByteSize::new(0) {
// Build PIECE as PIECE(lhs:PIECE(lhs:higher subpiece, rhs:sub register), rhs:lower subpiece)
*self = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: sub_lsb + sub_size,
size: base_size - (sub_lsb + sub_size),
arg: base_subpiece.clone(),
}),
rhs: Box::new(self.clone()),
}),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: sub_lsb,
arg: base_subpiece,
}),
}
} else {
// Build PIECE as PIECE(lhs: high subpiece, rhs: sub register)
*self = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: sub_size,
size: base_size - sub_size,
arg: base_subpiece,
}),
rhs: Box::new(self.clone()),
}
}
}
}
impl fmt::Display for Expression {
......@@ -458,11 +236,7 @@ impl fmt::Display for Expression {
size,
arg,
} => {
if let (Ok(start), Ok(end)) = (u32::try_from(low_byte.0), u32::try_from(size.0)) {
write!(f, "({})[{}-{}]", arg, start, end)
} else {
write!(f, "{}[]", arg)
}
write!(f, "({})[{}-{}]", arg, low_byte.0, low_byte.0 + size.0 - 1)
}
}
}
......
use super::*;
struct Setup<'a> {
register_map: HashMap<&'a String, &'a RegisterProperties>,
eax_name: String,
rax_name: String,
ecx_name: String,
rcx_name: String,
eax_register: RegisterProperties,
rax_register: RegisterProperties,
ecx_register: RegisterProperties,
rcx_register: RegisterProperties,
higher_byte_register: RegisterProperties,
int_sub_expr: Expression,
int_sub_subpiece_expr: Expression,
eax_variable: Expression,
rax_variable: Expression,
rcx_variable: Expression,
}
impl<'a> Setup<'a> {
fn new() -> Self {
Self {
register_map: HashMap::new(),
eax_name: String::from("EAX"),
rax_name: String::from("RAX"),
ecx_name: String::from("ECX"),
rcx_name: String::from("RCX"),
eax_register: RegisterProperties {
register: String::from("EAX"),
base_register: String::from("RAX"),
lsb: ByteSize::new(0),
size: ByteSize::new(4),
},
rax_register: RegisterProperties {
register: String::from("RAX"),
base_register: String::from("RAX"),
lsb: ByteSize::new(0),
size: ByteSize::new(8),
},
ecx_register: RegisterProperties {
register: String::from("ECX"),
base_register: String::from("RCX"),
lsb: ByteSize::new(0),
size: ByteSize::new(4),
},
rcx_register: RegisterProperties {
register: String::from("RCX"),
base_register: String::from("RCX"),
lsb: ByteSize::new(0),
size: ByteSize::new(8),
},
higher_byte_register: RegisterProperties {
register: String::from("AH"),
base_register: String::from("RAX"),
lsb: ByteSize::new(1),
size: ByteSize::new(1),
},
int_sub_expr: Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Var(Variable {
name: String::from("EAX"),
size: ByteSize::new(4),
is_temp: false,
})),
rhs: Box::new(Expression::Var(Variable {
name: String::from("ECX"),
size: ByteSize::new(4),
is_temp: false,
})),
},
int_sub_subpiece_expr: Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(Expression::Var(Variable {
name: String::from("RAX"),
size: ByteSize::new(8),
is_temp: false,
})),
}),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(Expression::Var(Variable {
name: String::from("RCX"),
size: ByteSize::new(8),
is_temp: false,
})),
}),
},
eax_variable: Expression::Var(Variable {
name: String::from("EAX"),
size: ByteSize::new(4),
is_temp: false,
}),
rax_variable: Expression::Var(Variable {
name: String::from("RAX"),
size: ByteSize::new(8),
is_temp: false,
}),
rcx_variable: Expression::Var(Variable {
name: String::from("RCX"),
size: ByteSize::new(8),
is_temp: false,
}),
}
}
}
#[test]
fn trivial_expression_substitution() {
let setup = Setup::new();
let rax_variable = Expression::Var(Variable::mock("RAX", 8));
let rcx_variable = Expression::Var(Variable::mock("RCX", 8));
let mut expr = Expression::BinOp {
op: BinOpType::IntXOr,
lhs: Box::new(setup.rax_variable.clone()),
rhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
rhs: Box::new(rax_variable.clone()),
};
expr.substitute_trivial_operations();
assert_eq!(
......@@ -124,16 +16,16 @@ fn trivial_expression_substitution() {
);
let mut expr = Expression::BinOp {
op: BinOpType::IntOr,
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(8).into()))),
};
expr.substitute_trivial_operations();
assert_eq!(expr, setup.rax_variable);
assert_eq!(expr, rax_variable);
let sub_expr = Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntSub,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
};
let mut expr = Expression::BinOp {
op: BinOpType::IntEqual,
......@@ -144,9 +36,9 @@ fn trivial_expression_substitution() {
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntEqual,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}
);
let mut expr = Expression::BinOp {
......@@ -158,55 +50,55 @@ fn trivial_expression_substitution() {
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntNotEqual,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}
);
// Test `x < y || x == y` substitutes to `x <= y`
let mut expr = Expression::BinOp {
lhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntLess,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}),
op: BinOpType::BoolOr,
rhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntEqual,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntLessEqual,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}
);
// Test `x <= y && x != y` transforms to `x < y`
let mut expr = Expression::BinOp {
lhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntSLessEqual,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}),
op: BinOpType::BoolAnd,
rhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rcx_variable.clone()),
lhs: Box::new(rcx_variable.clone()),
op: BinOpType::IntNotEqual,
rhs: Box::new(setup.rax_variable.clone()),
rhs: Box::new(rax_variable.clone()),
}),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntSLess,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}
);
......@@ -254,24 +146,24 @@ fn trivial_expression_substitution() {
let mut expr = Expression::UnOp {
op: UnOpType::BoolNegate,
arg: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntLess,
rhs: Box::new(setup.rcx_variable.clone()),
rhs: Box::new(rcx_variable.clone()),
}),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rcx_variable.clone()),
lhs: Box::new(rcx_variable.clone()),
op: BinOpType::IntLessEqual,
rhs: Box::new(setup.rax_variable.clone()),
rhs: Box::new(rax_variable.clone()),
}
);
// Test (x - const_1) - const_2 = x - (const_1 + const_2)
let mut expr = Expression::BinOp {
lhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntSub,
rhs: Box::new(Expression::Const(Bitvector::from_i64(3))),
}),
......@@ -282,7 +174,7 @@ fn trivial_expression_substitution() {
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
lhs: Box::new(rax_variable.clone()),
op: BinOpType::IntSub,
rhs: Box::new(Expression::Const(Bitvector::from_i64(7)))
}
......@@ -290,276 +182,6 @@ fn trivial_expression_substitution() {
}
#[test]
fn subpiece_creation() {
let setup = Setup::new();
let lsb = ByteSize::new(0);
let size = ByteSize::new(4);
let mut register_map = setup.register_map.clone();
register_map.insert(&setup.eax_name, &setup.eax_register);
register_map.insert(&setup.rax_name, &setup.rax_register);
let mut expr = setup.eax_variable.clone();
let expected_expr = Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
};
expr.create_subpiece_from_sub_register(setup.rax_name.clone(), size, lsb, &register_map);
assert_eq!(expr, expected_expr);
}
#[test]
fn piecing_expressions_together() {
let setup = Setup::new();
// Simple test:
// Input: EAX = INT_SUB SUBPIECE(RAX, 0, 4), SUBPIECE(RCX, 0, 4)
// Expected Output: RAX = PIECE(SUBPIECE(RAX, 4, 4), INT_SUB SUBPIECE(RAX, 0, 4), SUBPIECE(RCX, 0, 4))
let mut expr = setup.int_sub_subpiece_expr.clone();
let expected_expr = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(setup.int_sub_subpiece_expr.clone()),
};
// More complex test:
// Input: EAX = INT_SUB SUBPIECE(RAX, 1, 1), 0:1;
// Expected Output: RAX = PIECE[ PIECE(SUBPIECE(RAX, 2, 6), INT_SUB SUBPIECE(RAX, 1, 1)), SUBPIECE(RAX, 0, 1) ]
let mut higher_byte_exp = Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(1).into()))),
};
let expected_higher_byte_expr = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(1).into()))),
}),
}),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(1),
arg: Box::new(setup.rax_variable.clone()),
}),
};
expr.piece_two_expressions_together(&setup.rax_register, &setup.eax_register);
higher_byte_exp
.piece_two_expressions_together(&setup.rax_register, &setup.higher_byte_register);
assert_eq!(expr, expected_expr);
assert_eq!(higher_byte_exp, expected_higher_byte_expr);
let higher_half_rax = RegisterProperties {
register: "upper_RAX_half".to_string(),
base_register: "RAX".to_string(),
lsb: ByteSize::new(4),
size: ByteSize::new(4),
};
let mut expression = Expression::Const(Bitvector::from_u32(42));
let expected_output = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(expression.clone()),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize(0),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
}),
};
expression.piece_two_expressions_together(&setup.rax_register, &higher_half_rax);
assert_eq!(expression, expected_output);
}
#[test]
fn piecing_extending_or_none() {
let setup = Setup::new();
let zero_extend: Option<Tid> = Some(Tid::new("zero_tid"));
let output_size: Option<ByteSize> = Some(ByteSize::new(8));
let mut expr = setup.int_sub_expr.clone();
let expected_expr_with_zero_extend = Expression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(setup.int_sub_expr.clone()),
};
// Test assumes that the next instruction is a zero extension of the current output
expr.piece_zero_extend_or_none(
zero_extend,
Some(&&setup.rax_register),
output_size,
Some(&setup.eax_register),
);
assert_eq!(expr, expected_expr_with_zero_extend);
expr = setup.int_sub_expr.clone();
// Test assumes there is no output (i.e. virtual register output)
expr.piece_zero_extend_or_none(None, None, None, None);
assert_eq!(expr, setup.int_sub_expr);
expr = setup.int_sub_subpiece_expr.clone();
let expected_expr_with_piecing = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(setup.int_sub_subpiece_expr.clone()),
};
// Test assume output is a base register and the input needs to be pieced together
expr.piece_zero_extend_or_none(
None,
Some(&&setup.rax_register),
output_size,
Some(&setup.eax_register),
);
assert_eq!(expr, expected_expr_with_piecing);
}
#[test]
fn sub_register_check() {
let setup = Setup::new();
let mut expr = setup.int_sub_expr.clone();
let mut register_map = setup.register_map.clone();
register_map.insert(&setup.eax_name, &setup.eax_register);
register_map.insert(&setup.rax_name, &setup.rax_register);
register_map.insert(&setup.ecx_name, &setup.ecx_register);
register_map.insert(&setup.rcx_name, &setup.rcx_register);
expr.replace_input_sub_register(&register_map);
assert_eq!(expr, setup.int_sub_subpiece_expr);
}
#[test]
fn processing_sub_registers() {
let setup = Setup::new();
let mut register_map = setup.register_map.clone();
register_map.insert(&setup.eax_name, &setup.eax_register);
register_map.insert(&setup.rax_name, &setup.rax_register);
register_map.insert(&setup.ecx_name, &setup.ecx_register);
register_map.insert(&setup.rcx_name, &setup.rcx_register);
// Test Case: Subregister output
let out_sub = Variable {
name: setup.eax_name.clone(),
size: ByteSize::new(4),
is_temp: false,
};
// Test Case: Baseregister output
let mut out_base = Variable {
name: setup.rax_name.clone(),
size: ByteSize::new(8),
is_temp: false,
};
// Test Case: Virtual register output
let mut out_virtual = Variable {
name: String::from("$u560"),
size: ByteSize::new(8),
is_temp: true,
};
// Test Case: Following instruction is a zero extend
let mut def_term_ext = Term {
tid: Tid::new("int_zext"),
term: Def::Assign {
var: out_base.clone(),
value: Expression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(setup.eax_variable.clone()),
},
},
};
// Test Case: Following instruction is not a zero extend
let mut def_term = Term {
tid: Tid::new("int_sext"),
term: Def::Assign {
var: out_base.clone(),
value: Expression::Cast {
op: CastOpType::IntSExt,
size: ByteSize::new(8),
arg: Box::new(setup.eax_variable.clone()),
},
},
};
// 1. Test: peeked is a zero extension and output is a sub register
// Expects: Sub register casted to base and zero extension detected
let def_term_ext_pointer = &mut def_term_ext;
let mut peeked = Some(&def_term_ext_pointer);
let mut sub_reg_output = out_sub.clone();
let mut output = Some(&mut sub_reg_output);
let mut expr = setup.int_sub_expr.clone();
let mut expected_expr = Expression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(setup.int_sub_subpiece_expr.clone()),
};
expr.cast_sub_registers_to_base_register_subpieces(output, &register_map, peeked);
assert_eq!(expr, expected_expr);
// 2. Test: peeked is not a zero extend and output is a sub register
// Expects: Piece input together to get the base register size
let def_term_pointer = &mut def_term;
peeked = Some(&def_term_pointer);
expr = setup.int_sub_expr.clone();
expected_expr = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(setup.int_sub_subpiece_expr.clone()),
};
let mut sub_reg_output = out_sub.clone();
output = Some(&mut sub_reg_output);
expr.cast_sub_registers_to_base_register_subpieces(output, &register_map, peeked);
assert_eq!(expr, expected_expr);
// 3. Test: peek is neglectable and output is a base register
let def_term_pointer = &mut def_term;
peeked = Some(&def_term_pointer);
expr = setup.int_sub_expr.clone();
output = Some(&mut out_base);
expr.cast_sub_registers_to_base_register_subpieces(output, &register_map, peeked);
assert_eq!(expr, setup.int_sub_subpiece_expr);
// 4. Test: peek is neglectable and output is a virtual register
let def_term_pointer = &mut def_term;
peeked = Some(&def_term_pointer);
expr = setup.int_sub_expr.clone();
output = Some(&mut out_virtual);
expr.cast_sub_registers_to_base_register_subpieces(output, &register_map, peeked);
assert_eq!(expr, setup.int_sub_subpiece_expr);
}
#[test]
fn display() {
let expr = Expression::const_from_i32(2);
let mul = Expression::BinOp {
......@@ -578,7 +200,7 @@ fn display() {
.subpiece(ByteSize(0), ByteSize(20));
assert_eq!(
"(FloatCeil(IntSExt(IntNegate((0x2:i32 + RAX:64 * RBP:64)))))[0-20]",
"(FloatCeil(IntSExt(IntNegate((0x2:i32 + RAX:64 * RBP:64)))))[0-19]",
format!("{}", expr)
);
}
......@@ -11,3 +11,4 @@ mod expressions;
pub use expressions::*;
mod term;
pub use term::*;
mod subregister_substitution;
use super::RegisterProperties;
use crate::intermediate_representation::{BinOpType, Blk, Def, Expression, Jmp, Variable};
use crate::prelude::*;
use std::collections::HashMap;
use std::iter::Peekable;
use std::ops::Deref;
/// In the given block replace all occurences of sub-registers by equivalent expressions of the corresponding base register.
/// By getting rid of all mentions of sub-registers we get rid of all hidden dependencies between registers,
/// which simplifies later analyses.
///
/// For example, this replaces mentions of the register EAX by a SUBPIECE expression of RAX for x86-64.
///
/// Note that Ghidra (and thus also the cwe_checker) handles flag registers as independent 1-bit registers
/// instead of combining them to one register containing all flags.
pub fn replace_subregister_in_block(
block: &mut Term<Blk>,
register_map: &HashMap<&String, &RegisterProperties>,
) {
// Substitute subregisters in expressions contained in jump instructions
for jump in block.term.jmps.iter_mut() {
replace_subregister_in_jump(jump, register_map);
}
// Substitute subregisters in Def instructions
let new_defs =
SubregisterSubstitutionBuilder::compute_replacement_defs_for_block(block, register_map);
block.term.defs = new_defs;
}
/// A builder struct for substitution of subregisters in a basic block.
///
/// For the basic workflow of the subregister substitution process see the
/// [compute_replacement_defs_for_block](SubregisterSubstitutionBuilder::compute_replacement_defs_for_block) method.
struct SubregisterSubstitutionBuilder<'a> {
/// The register map containing the necessary information which registers are subregisters
/// and what their base registers are.
register_map: &'a HashMap<&'a String, &'a RegisterProperties>,
/// The input iterator keeps track of which Def-terms are still to be substituted.
input_iter: Peekable<std::slice::Iter<'a, Term<Def>>>,
/// The output vector contains the already substituted Def-terms (in the correct order).
output_defs: Vec<Term<Def>>,
}
impl<'a> SubregisterSubstitutionBuilder<'a> {
/// Initialize a new substitution builder.
fn new(
block: &'a Term<Blk>,
register_map: &'a HashMap<&'a String, &'a RegisterProperties>,
) -> Self {
SubregisterSubstitutionBuilder {
register_map,
input_iter: block.term.defs.iter().peekable(),
output_defs: Vec::new(),
}
}
/// Iterate through all Def-terms of the given block and replace any occurence of a subregister
/// by its base register (wrapped in SUBPIECE- or PIECE- instructions to not change program semantics).
///
/// The basic workflow of the subregister substitution process for each Def-term is as follows:
/// - First replace all occurences of subregisters as inputs into expressions of the Def-term
/// by replacing them with the SUBPIECE-wrapped base register.
/// - Replace subregisters as outputs of the Def-term.
/// If the Def-term is an assignment and the next Def-term is a cast to the base register (e.g. a zero-extension)
/// then combine the two Def-terms to one.
/// If the Def-term is a load instruction then replace the output with a temporary register
/// and add a second Def-term that assigns the temporary register value to the corresponding bytes of the base register.
/// In all other cases one can assign directly to the corresponding bytes of the base register.
pub fn compute_replacement_defs_for_block(
block: &'a Term<Blk>,
register_map: &'a HashMap<&'a String, &'a RegisterProperties>,
) -> Vec<Term<Def>> {
let mut substitution_builder = Self::new(block, register_map);
while let Some(def) = substitution_builder.input_iter.next() {
substitution_builder.replace_subregister(def);
}
substitution_builder.output_defs
}
/// First replace subregisters as input into expressions of the given Def-term.
/// The replace subregisters as outputs of the Def-term.
/// The results get added to the `output_defs` array of `self`.
fn replace_subregister(&mut self, def: &Term<Def>) {
let mut def = def.clone();
match &mut def.term {
Def::Assign {
var: _,
value: expr,
}
| Def::Load {
var: _,
address: expr,
} => {
*expr = replace_input_subregister(expr.clone(), self.register_map);
}
Def::Store { address, value } => {
*address = replace_input_subregister(address.clone(), self.register_map);
*value = replace_input_subregister(value.clone(), self.register_map);
}
}
self.replace_output_subregister(def);
}
/// Replace subregisters as output variables of assign- or load-instructions.
///
/// If the next Def-term in the `input_iter` of `self` is a cast to the base register,
/// then it might get combined with the given Def-term to a single Def-term.
/// In this case the `input_iter` is advanced by one step.
///
/// For load instructions two Def-terms might get added to the `output_defs` array of `self`.
fn replace_output_subregister(&mut self, def: Term<Def>) {
match &def.term {
Def::Assign { var, value } => {
if let Some(register) = self.register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
// The register is not a base register and should be replaced.
if self.is_next_def_cast_to_base_register(var) {
let mut output = self.input_iter.next().unwrap().clone();
match &mut output.term {
Def::Assign {
var: _var_cast,
value: output_expr,
} => {
output_expr.substitute_input_var(var, value);
}
_ => panic!(),
}
self.output_defs.push(output);
return;
} else {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
let output_var: Variable = base_register.into();
let output_expression =
piece_base_register_assignment_expression_together(
value,
base_register,
register,
);
self.output_defs.push(Term {
tid: def.tid.clone(),
term: Def::Assign {
var: output_var,
value: output_expression,
},
});
return;
}
}
}
}
Def::Load { var, address } => {
if let Some(register) = self.register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
// The register is not a base register and should be replaced.
// We need two replacement defs: One is a load into a temporary register
// and the second is a cast to the base register.
let temp_reg = Variable {
name: "loaded_value".to_string(),
size: var.size,
is_temp: true,
};
self.output_defs.push(Term {
tid: def.tid.clone(),
term: Def::Load {
var: temp_reg.clone(),
address: address.clone(),
},
});
if self.is_next_def_cast_to_base_register(var) {
let mut cast_to_base_def = self.input_iter.next().unwrap().clone();
if let Def::Assign { value, .. } = &mut cast_to_base_def.term {
value.substitute_input_var(var, &Expression::Var(temp_reg));
} else {
panic!()
}
self.output_defs.push(cast_to_base_def);
} else {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
self.output_defs.push(Term {
tid: def.tid.clone().with_id_suffix("_cast_to_base"),
term: Def::Assign {
var: base_register.into(),
value: piece_base_register_assignment_expression_together(
&Expression::Var(temp_reg),
base_register,
register,
),
},
});
}
return;
}
}
}
Def::Store { .. } => (), // No output variable to replace.
}
// If we reach this point we did not need to modify the Def
self.output_defs.push(def);
}
/// Return `true` if the next Def-term in the `input_iter` of `self` is a cast of the given variable
/// to its corresponding base register.
fn is_next_def_cast_to_base_register(&mut self, input_var: &Variable) -> bool {
if let Some(peeked_def) = self.input_iter.peek() {
if let Def::Assign { var, value } = &peeked_def.term {
if let (Some(reg), Some(input_reg)) = (
self.register_map.get(&var.name),
self.register_map.get(&input_var.name),
) {
if let Expression::Cast { arg, .. } = value {
match arg.deref() {
Expression::Var(cast_var) if cast_var == input_var => {
if input_reg.register != input_reg.base_register
&& input_reg.base_register == reg.register
{
return true;
}
}
_ => (),
}
}
}
}
}
false
}
}
/// Replace subregisters that are inputs into expressions used by the given jump term
/// by SUBPIECE expressions of the corresponding base register.
fn replace_subregister_in_jump(
jump: &mut Term<Jmp>,
register_map: &HashMap<&String, &RegisterProperties>,
) {
match &mut jump.term {
Jmp::BranchInd(expr)
| Jmp::CBranch {
condition: expr, ..
}
| Jmp::CallInd { target: expr, .. }
| Jmp::Return(expr) => {
*expr = replace_input_subregister(expr.clone(), register_map);
}
Jmp::Branch(_) | Jmp::Call { .. } | Jmp::CallOther { .. } => (),
}
}
/// Replace input subregisters of the given expression by SUBPIECEs of the corresponding base register.
/// Return the resulting expression.
pub fn replace_input_subregister(
mut expression: Expression,
register_map: &HashMap<&String, &RegisterProperties>,
) -> Expression {
let mut replacement_pairs = Vec::new();
for var in expression.input_vars() {
if let Some(register) = register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
// The register is not a base register and should be replaced.
let target_size = var.size;
let replacement_expr = create_subpiece_from_sub_register(
register.base_register.clone(),
target_size,
register.lsb,
register_map,
);
replacement_pairs.push((var.clone(), replacement_expr));
}
}
}
for (var, replacement_expr) in replacement_pairs {
expression.substitute_input_var(&var, &replacement_expr);
}
expression
}
/// This function creates a SUBPIECE expression
/// from a subregister containing the corresponding base register.
fn create_subpiece_from_sub_register(
base: String,
size: ByteSize,
lsb: ByteSize,
register_map: &HashMap<&String, &RegisterProperties>,
) -> Expression {
Expression::Subpiece {
low_byte: lsb,
size,
arg: Box::new(Expression::Var(Variable {
name: base.clone(),
size: register_map.get(&base).unwrap().size,
is_temp: false,
})),
}
}
/// Consider an assignment of the form `sub-register = input_expression`.
/// Then this function pieces together an assignment expression for the base register
/// out of the input expression and those parts of the base register
/// that are not part of the sub-register (i.e. that are not overwritten by the sub-register assignment).
fn piece_base_register_assignment_expression_together(
input_expression: &Expression,
output_base_register: &RegisterProperties,
sub_register: &RegisterProperties,
) -> Expression {
let base_size: ByteSize = output_base_register.size;
let base_name: &String = &output_base_register.register;
let sub_size: ByteSize = sub_register.size;
let sub_lsb: ByteSize = sub_register.lsb;
let base_subpiece = Box::new(Expression::Var(Variable {
name: base_name.clone(),
size: base_size,
is_temp: false,
}));
if sub_register.lsb > ByteSize::new(0) && sub_register.lsb + sub_register.size == base_size {
// Build PIECE as PIECE(lhs: sub_register, rhs: low subpiece)
Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(input_expression.clone()),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: sub_lsb,
arg: base_subpiece,
}),
}
} else if sub_register.lsb > ByteSize::new(0) {
// Build PIECE as PIECE(lhs:PIECE(lhs:higher subpiece, rhs:sub register), rhs:lower subpiece)
Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: sub_lsb + sub_size,
size: base_size - (sub_lsb + sub_size),
arg: base_subpiece.clone(),
}),
rhs: Box::new(input_expression.clone()),
}),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: sub_lsb,
arg: base_subpiece,
}),
}
} else {
// Build PIECE as PIECE(lhs: high subpiece, rhs: sub register)
Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: sub_size,
size: base_size - sub_size,
arg: base_subpiece,
}),
rhs: Box::new(input_expression.clone()),
}
}
}
#[cfg(test)]
mod tests;
use crate::intermediate_representation::CastOpType;
use super::*;
struct Setup<'a> {
register_map: HashMap<&'a String, &'a RegisterProperties>,
eax_name: String,
rax_name: String,
ecx_name: String,
rcx_name: String,
ah_name: String,
eax_register: RegisterProperties,
rax_register: RegisterProperties,
ecx_register: RegisterProperties,
rcx_register: RegisterProperties,
ah_register: RegisterProperties,
int_sub_expr: Expression,
int_sub_subpiece_expr: Expression,
eax_variable: Expression,
rax_variable: Expression,
}
impl<'a> Setup<'a> {
fn new() -> Self {
Self {
register_map: HashMap::new(),
eax_name: String::from("EAX"),
rax_name: String::from("RAX"),
ecx_name: String::from("ECX"),
rcx_name: String::from("RCX"),
ah_name: String::from("AH"),
eax_register: RegisterProperties {
register: String::from("EAX"),
base_register: String::from("RAX"),
lsb: ByteSize::new(0),
size: ByteSize::new(4),
},
rax_register: RegisterProperties {
register: String::from("RAX"),
base_register: String::from("RAX"),
lsb: ByteSize::new(0),
size: ByteSize::new(8),
},
ecx_register: RegisterProperties {
register: String::from("ECX"),
base_register: String::from("RCX"),
lsb: ByteSize::new(0),
size: ByteSize::new(4),
},
rcx_register: RegisterProperties {
register: String::from("RCX"),
base_register: String::from("RCX"),
lsb: ByteSize::new(0),
size: ByteSize::new(8),
},
ah_register: RegisterProperties {
register: String::from("AH"),
base_register: String::from("RAX"),
lsb: ByteSize::new(1),
size: ByteSize::new(1),
},
int_sub_expr: Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Var(Variable::mock("EAX", 4))),
rhs: Box::new(Expression::Var(Variable::mock("ECX", 4))),
},
int_sub_subpiece_expr: Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(Expression::Var(Variable::mock("RAX", 8))),
}),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(Expression::Var(Variable::mock("RCX", 8))),
}),
},
eax_variable: Expression::Var(Variable::mock("EAX", 4)),
rax_variable: Expression::Var(Variable::mock("RAX", 8)),
}
}
}
#[test]
fn subpiece_creation() {
let setup = Setup::new();
let lsb = ByteSize::new(0);
let size = ByteSize::new(4);
let mut register_map = setup.register_map.clone();
register_map.insert(&setup.eax_name, &setup.eax_register);
register_map.insert(&setup.rax_name, &setup.rax_register);
let expected_expr = Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
};
let expr = create_subpiece_from_sub_register(setup.rax_name.clone(), size, lsb, &register_map);
assert_eq!(expr, expected_expr);
}
#[test]
fn sub_register_check() {
let setup = Setup::new();
let mut expr = setup.int_sub_expr.clone();
let mut register_map = setup.register_map.clone();
register_map.insert(&setup.eax_name, &setup.eax_register);
register_map.insert(&setup.rax_name, &setup.rax_register);
register_map.insert(&setup.ecx_name, &setup.ecx_register);
register_map.insert(&setup.rcx_name, &setup.rcx_register);
expr = replace_input_subregister(expr, &register_map);
assert_eq!(expr, setup.int_sub_subpiece_expr);
}
#[test]
fn piecing_expressions_together() {
let setup = Setup::new();
// Simple test:
// Input: EAX = INT_SUB SUBPIECE(RAX, 0, 4), SUBPIECE(RCX, 0, 4)
// Expected Output: RAX = PIECE(SUBPIECE(RAX, 4, 4), INT_SUB SUBPIECE(RAX, 0, 4), SUBPIECE(RCX, 0, 4))
let mut expr = setup.int_sub_subpiece_expr.clone();
let expected_expr = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(setup.int_sub_subpiece_expr.clone()),
};
// More complex test:
// Input: EAX = INT_SUB SUBPIECE(RAX, 1, 1), 0:1;
// Expected Output: RAX = PIECE[ PIECE(SUBPIECE(RAX, 2, 6), INT_SUB SUBPIECE(RAX, 1, 1)), SUBPIECE(RAX, 0, 1) ]
let mut higher_byte_exp = Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(1).into()))),
};
let expected_higher_byte_expr = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(setup.rax_variable.clone()),
}),
rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(1).into()))),
}),
}),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(1),
arg: Box::new(setup.rax_variable.clone()),
}),
};
expr = piece_base_register_assignment_expression_together(
&expr,
&setup.rax_register,
&setup.eax_register,
);
higher_byte_exp = piece_base_register_assignment_expression_together(
&higher_byte_exp,
&setup.rax_register,
&setup.ah_register,
);
assert_eq!(expr, expected_expr);
assert_eq!(higher_byte_exp, expected_higher_byte_expr);
let higher_half_rax = RegisterProperties {
register: "upper_RAX_half".to_string(),
base_register: "RAX".to_string(),
lsb: ByteSize::new(4),
size: ByteSize::new(4),
};
let mut expression = Expression::Const(Bitvector::from_u32(42));
let expected_output = Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(expression.clone()),
rhs: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(setup.rax_variable.clone()),
}),
};
expression = piece_base_register_assignment_expression_together(
&expression,
&setup.rax_register,
&higher_half_rax,
);
assert_eq!(expression, expected_output);
}
/// Check whether the format strings when printing the defs of the given block are as expected.
/// Return false if the number of elements does not match or at least one format string differs from the expected result.
fn check_defs_of_block(block: &Term<Blk>, expected: Vec<&str>) -> bool {
if block.term.defs.len() != expected.len() {
println!(
"lengths do not match: {} != {}",
block.term.defs.len(),
expected.len()
);
return false;
}
for (def, expected_def) in block.term.defs.iter().zip(expected.iter()) {
let format_string = format!("{}: {}", def.tid, def.term);
if &format_string != expected_def {
println!("Def does not match:");
println!(" given: {}", format_string);
println!("expected: {}", expected_def);
return false;
}
}
true
}
#[test]
fn piecing_or_zero_extending() {
let setup = Setup::new();
let mut register_map = setup.register_map.clone();
register_map.insert(&setup.eax_name, &setup.eax_register);
register_map.insert(&setup.rax_name, &setup.rax_register);
register_map.insert(&setup.ecx_name, &setup.ecx_register);
register_map.insert(&setup.rcx_name, &setup.rcx_register);
register_map.insert(&setup.ah_name, &setup.ah_register);
let eax_assign = Term {
tid: Tid::new("eax_assign"),
term: Def::Assign {
var: Variable::mock("EAX", 4),
value: Expression::const_from_i32(0),
},
};
let load_to_eax = Term {
tid: Tid::new("load_to_eax"),
term: Def::Load {
var: Variable::mock("EAX", 4),
address: Expression::const_from_i64(0),
},
};
let ah_assign = Term {
tid: Tid::new("ah_assign"),
term: Def::Assign {
var: Variable::mock("AH", 1),
value: Expression::Const(Bitvector::from_i8(0)),
},
};
let zext_eax_to_rax = Term {
tid: Tid::new("zext_eax_to_rax"),
term: Def::Assign {
var: Variable::mock("RAX", 8),
value: Expression::cast(setup.eax_variable.clone(), CastOpType::IntZExt),
},
};
let zext_ah_to_eax = Term {
tid: Tid::new("zext_ah_to_eax"),
term: Def::Assign {
var: Variable::mock("EAX", 4),
value: Expression::cast(
Expression::Var(Variable::mock("AH", 1)),
CastOpType::IntZExt,
),
},
};
let zext_ah_to_rax = Term {
tid: Tid::new("zext_ah_to_rax"),
term: Def::Assign {
var: Variable::mock("RAX", 8),
value: Expression::cast(
Expression::Var(Variable::mock("AH", 1)),
CastOpType::IntZExt,
),
},
};
let zext_eax_to_rcx = Term {
tid: Tid::new("zext_eax_to_rcx"),
term: Def::Assign {
var: Variable::mock("RCX", 8),
value: Expression::cast(setup.eax_variable.clone(), CastOpType::IntZExt),
},
};
// Test when the next instruction is a zero extension to the base register.
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs: vec![eax_assign.clone(), zext_eax_to_rax.clone()],
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
replace_subregister_in_block(&mut block, &register_map);
assert!(check_defs_of_block(
&block,
vec!["zext_eax_to_rax: RAX:64 = IntZExt(0x0:i32)"]
));
// Test whether zero extension to base register is still recognized
// even if the sub-register starts not at byte zero of the base register.
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs: vec![ah_assign.clone(), zext_ah_to_rax],
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
replace_subregister_in_block(&mut block, &register_map);
assert!(check_defs_of_block(
&block,
vec!["zext_ah_to_rax: RAX:64 = IntZExt(0x0:i8)"]
));
// Test when the next register is a zero extension to a different register.
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs: vec![eax_assign, zext_eax_to_rcx.clone()],
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
replace_subregister_in_block(&mut block, &register_map);
assert!(check_defs_of_block(
&block,
vec![
"eax_assign: RAX:64 = ((RAX:64)[4-7] Piece 0x0:i32)",
"zext_eax_to_rcx: RCX:64 = IntZExt((RAX:64)[0-3])"
]
));
// Test when target of zero extension is also a sub-register
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs: vec![ah_assign.clone(), zext_ah_to_eax],
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
replace_subregister_in_block(&mut block, &register_map);
assert!(check_defs_of_block(
&block,
vec![
"ah_assign: RAX:64 = (((RAX:64)[2-7] Piece 0x0:i8) Piece (RAX:64)[0-0])",
"zext_ah_to_eax: RAX:64 = ((RAX:64)[4-7] Piece IntZExt((RAX:64)[1-1]))",
]
));
// Test when loading to a sub-register with a zero extension to the base register as next instruction
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs: vec![load_to_eax.clone(), zext_eax_to_rax],
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
replace_subregister_in_block(&mut block, &register_map);
assert!(check_defs_of_block(
&block,
vec![
"load_to_eax: loaded_value:32(temp) := Load from 0x0:i64",
"zext_eax_to_rax: RAX:64 = IntZExt(loaded_value:32(temp))",
]
));
// Test when loading to a sub-register without a zero extension to the base register as next instruction
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs: vec![load_to_eax, zext_eax_to_rcx],
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
replace_subregister_in_block(&mut block, &register_map);
assert!(check_defs_of_block(
&block,
vec![
"load_to_eax: loaded_value:32(temp) := Load from 0x0:i64",
"load_to_eax_cast_to_base: RAX:64 = ((RAX:64)[4-7] Piece loaded_value:32(temp))",
"zext_eax_to_rcx: RCX:64 = IntZExt((RAX:64)[0-3])"
]
));
}
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{BTreeSet, HashMap};
use std::usize;
use super::subregister_substitution::replace_input_subregister;
use super::{Expression, ExpressionType, RegisterProperties, Variable};
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk;
......@@ -678,7 +679,7 @@ impl CallingConvention {
.map(|register_name| {
let reg = register_map.get(&register_name).cloned().unwrap();
let mut expression = IrExpression::Var(reg.into());
expression.replace_input_sub_register(register_map);
expression = replace_input_subregister(expression, register_map);
expression
})
.collect()
......@@ -745,104 +746,23 @@ impl Project {
&self.cpu_architecture,
),
};
let mut zero_extend_tids: HashSet<Tid> = HashSet::new();
// iterates over definitions and checks whether sub registers are used
// if so, they are swapped with subpieces of base registers
for sub in program.term.subs.values_mut() {
for blk in sub.term.blocks.iter_mut() {
let mut def_iter = blk.term.defs.iter_mut().peekable();
while let Some(def) = def_iter.next() {
let peeked_def = def_iter.peek();
match &mut def.term {
IrDef::Assign { var, value } => {
if let Some(zero_tid) = value
.cast_sub_registers_to_base_register_subpieces(
Some(var),
&register_map,
peeked_def,
)
{
zero_extend_tids.insert(zero_tid);
}
}
IrDef::Load { var, address } => {
if let Some(zero_tid) = address
.cast_sub_registers_to_base_register_subpieces(
Some(var),
&register_map,
peeked_def,
)
{
zero_extend_tids.insert(zero_tid);
}
}
IrDef::Store { address, value } => {
address.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
peeked_def,
);
value.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
peeked_def,
);
}
}
}
for jmp in blk.term.jmps.iter_mut() {
match &mut jmp.term {
IrJmp::BranchInd(dest) => {
dest.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
IrJmp::CBranch { condition, .. } => {
condition.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
IrJmp::CallInd { target, .. } => {
target.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
IrJmp::Return(dest) => {
dest.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
_ => (),
}
}
// Remove all tagged zero extension instruction that came after a sub register instruction
// since it has been wrapped around the former instruction.
blk.term.defs.retain(|def| {
if zero_extend_tids.contains(&def.tid) {
return false;
}
true
});
super::subregister_substitution::replace_subregister_in_block(blk, &register_map);
}
}
// Iterate over symbol arguments and replace used sub-registers
for symbol in program.term.extern_symbols.values_mut() {
for arg in symbol.parameters.iter_mut() {
if let IrArg::Register { expr, .. } = arg {
expr.replace_input_sub_register(&register_map);
*expr = replace_input_subregister(expr.clone(), &register_map);
}
}
for arg in symbol.return_values.iter_mut() {
if let IrArg::Register { expr, .. } = arg {
expr.replace_input_sub_register(&register_map);
*expr = replace_input_subregister(expr.clone(), &register_map);
}
}
}
......
use super::*;
use crate::intermediate_representation::{BinOpType, CastOpType, Variable as IrVariable};
use crate::intermediate_representation::Variable as IrVariable;
struct Setup {
project: Project,
......@@ -752,11 +752,6 @@ fn from_project_to_ir_project() {
mock_project.program.term.subs.push(sub.clone());
let ir_program = mock_project.into_ir_project(10000).program.term;
let ir_rdi_var = IrVariable {
name: String::from("RDI"),
size: ByteSize::new(8),
is_temp: false,
};
let ir_rax_var = IrVariable {
name: String::from("RAX"),
size: ByteSize::new(8),
......@@ -764,116 +759,18 @@ fn from_project_to_ir_project() {
};
// From: EDI = LOAD EDI
// To: RDI = PIECE(SUBPIECE(RDI, 4, 4), (LOAD SUBPIECE(RDI, 0, 4)))
let expected_def_0 = IrDef::Load {
var: ir_rdi_var.clone(),
address: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
},
};
// From: AH = AH INT_XOR AH
// To: RAX = PIECE(PIECE(SUBPIECE(RAX, 2, 6), (SUBPIECE(RAX, 1, 1) INT_XOR SUBPIECE(RAX, 1, 1))), SUBPIECE(RAX, 0, 1))
let expected_def_1 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::BinOp {
op: BinOpType::IntXOr,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
}),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(1),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
},
};
// From: EAX = COPY EDI
// RAX = INT_ZEXT EAX
// From: EAX = PIECE(0:2, AX)
// From: AX = SUBPIECE(EDI, 1, 2)
// From: EAX = COPY EDI && RAX = INT_ZEXT EAX
// To: Temp = PIECE(SUBPIECE(RDI, 4, 4), (LOAD SUBPIECE(RDI, 0, 4)))
// RDI = PIECE(SUBPIECE(RAX, 4, 4), Temp)
// To: RAX = PIECE(PIECE(SUBPIECE(RAX, 2, 6), (SUBPIECE(RAX, 1, 1) INT_XOR SUBPIECE(RAX, 1, 1))), SUBPIECE(RAX, 0, 1))
// To: RAX = INT_ZEXT SUBPIECE(RDI, 0, 4)
let expected_def_3 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
},
};
// From: EAX = PIECE(0:2, AX)
// To: RAX = PIECE(SUBPIECE(RAX, 4, 4), PIECE(0:2, SUBPIECE(RAX, 0, 2)))
let expected_def_4 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Const(Bitvector::zero(
ByteSize::new(2).into(),
))),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(2),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
}),
},
};
// From: AX = SUBPIECE(EDI, 1, 2)
// To: RAX = PIECE(SUBPIECE(RAX, 2, 6), SUBPIECE(RDI, 1, 2))
let expected_def_5 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(2),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
},
};
let mut target_tid = Tid::new("blk_00102016");
target_tid.address = String::from("00102016");
......@@ -889,15 +786,34 @@ fn from_project_to_ir_project() {
return_: Some(target_tid.clone()),
};
// Checks whether the zero extension was correctly removed; leaving only 5 definitions behind.
let ir_block = &ir_program.subs.get(&sub_tid).unwrap().term.blocks[0].term;
assert_eq!(ir_block.defs.len(), 5);
assert_eq!(ir_block.defs.len(), 6);
// Checks if the other definitions and the jump were correctly casted.
assert_eq!(ir_block.defs[0].term, expected_def_0);
assert_eq!(ir_block.defs[1].term, expected_def_1);
assert_eq!(ir_block.defs[2].term, expected_def_3);
assert_eq!(ir_block.defs[3].term, expected_def_4);
assert_eq!(ir_block.defs[4].term, expected_def_5);
assert_eq!(
format!("{}", ir_block.defs[0].term),
"loaded_value:32(temp) := Load from (RDI:64)[0-3]".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[1].term),
"RDI:64 = ((RDI:64)[4-7] Piece loaded_value:32(temp))".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[2].term),
"RAX:64 = (((RAX:64)[2-7] Piece ((RAX:64)[1-1] ^ (RAX:64)[1-1])) Piece (RAX:64)[0-0])"
.to_string()
);
assert_eq!(
format!("{}", ir_block.defs[3].term),
"RAX:64 = IntZExt((RDI:64)[0-3])".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[4].term),
"RAX:64 = ((RAX:64)[4-7] Piece (0x0:i16 Piece (RAX:64)[0-1]))".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[5].term),
"RAX:64 = ((RAX:64)[2-7] Piece ((RDI:64)[0-3])[1-2])".to_string()
);
assert_eq!(ir_block.jmps[0].term, expected_jmp);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment