Unverified Commit 00f223df by Enkelmann Committed by GitHub

Fix sub-register substitution in Load instructions (#353)

parent dc561415
......@@ -124,10 +124,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
} else {
// TODO: CWE78 is disabled on a standard run for now,
// because it uses up huge amounts of RAM and computation time on some binaries.
modules = modules
.into_iter()
.filter(|module| module.name != "CWE78")
.collect();
modules.retain(|module| module.name != "CWE78");
}
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());
......
......@@ -11,3 +11,4 @@ mod expressions;
pub use expressions::*;
mod term;
pub use term::*;
mod subregister_substitution;
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{BTreeSet, HashMap};
use std::usize;
use super::subregister_substitution::replace_input_subregister;
use super::{Expression, ExpressionType, RegisterProperties, Variable};
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk;
......@@ -678,7 +679,7 @@ impl CallingConvention {
.map(|register_name| {
let reg = register_map.get(&register_name).cloned().unwrap();
let mut expression = IrExpression::Var(reg.into());
expression.replace_input_sub_register(register_map);
expression = replace_input_subregister(expression, register_map);
expression
})
.collect()
......@@ -745,104 +746,23 @@ impl Project {
&self.cpu_architecture,
),
};
let mut zero_extend_tids: HashSet<Tid> = HashSet::new();
// iterates over definitions and checks whether sub registers are used
// if so, they are swapped with subpieces of base registers
for sub in program.term.subs.values_mut() {
for blk in sub.term.blocks.iter_mut() {
let mut def_iter = blk.term.defs.iter_mut().peekable();
while let Some(def) = def_iter.next() {
let peeked_def = def_iter.peek();
match &mut def.term {
IrDef::Assign { var, value } => {
if let Some(zero_tid) = value
.cast_sub_registers_to_base_register_subpieces(
Some(var),
&register_map,
peeked_def,
)
{
zero_extend_tids.insert(zero_tid);
}
}
IrDef::Load { var, address } => {
if let Some(zero_tid) = address
.cast_sub_registers_to_base_register_subpieces(
Some(var),
&register_map,
peeked_def,
)
{
zero_extend_tids.insert(zero_tid);
}
}
IrDef::Store { address, value } => {
address.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
peeked_def,
);
value.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
peeked_def,
);
}
}
}
for jmp in blk.term.jmps.iter_mut() {
match &mut jmp.term {
IrJmp::BranchInd(dest) => {
dest.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
IrJmp::CBranch { condition, .. } => {
condition.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
IrJmp::CallInd { target, .. } => {
target.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
IrJmp::Return(dest) => {
dest.cast_sub_registers_to_base_register_subpieces(
None,
&register_map,
None,
);
}
_ => (),
}
}
// Remove all tagged zero extension instruction that came after a sub register instruction
// since it has been wrapped around the former instruction.
blk.term.defs.retain(|def| {
if zero_extend_tids.contains(&def.tid) {
return false;
}
true
});
super::subregister_substitution::replace_subregister_in_block(blk, &register_map);
}
}
// Iterate over symbol arguments and replace used sub-registers
for symbol in program.term.extern_symbols.values_mut() {
for arg in symbol.parameters.iter_mut() {
if let IrArg::Register { expr, .. } = arg {
expr.replace_input_sub_register(&register_map);
*expr = replace_input_subregister(expr.clone(), &register_map);
}
}
for arg in symbol.return_values.iter_mut() {
if let IrArg::Register { expr, .. } = arg {
expr.replace_input_sub_register(&register_map);
*expr = replace_input_subregister(expr.clone(), &register_map);
}
}
}
......
use super::*;
use crate::intermediate_representation::{BinOpType, CastOpType, Variable as IrVariable};
use crate::intermediate_representation::Variable as IrVariable;
struct Setup {
project: Project,
......@@ -752,11 +752,6 @@ fn from_project_to_ir_project() {
mock_project.program.term.subs.push(sub.clone());
let ir_program = mock_project.into_ir_project(10000).program.term;
let ir_rdi_var = IrVariable {
name: String::from("RDI"),
size: ByteSize::new(8),
is_temp: false,
};
let ir_rax_var = IrVariable {
name: String::from("RAX"),
size: ByteSize::new(8),
......@@ -764,116 +759,18 @@ fn from_project_to_ir_project() {
};
// From: EDI = LOAD EDI
// To: RDI = PIECE(SUBPIECE(RDI, 4, 4), (LOAD SUBPIECE(RDI, 0, 4)))
let expected_def_0 = IrDef::Load {
var: ir_rdi_var.clone(),
address: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
},
};
// From: AH = AH INT_XOR AH
// To: RAX = PIECE(PIECE(SUBPIECE(RAX, 2, 6), (SUBPIECE(RAX, 1, 1) INT_XOR SUBPIECE(RAX, 1, 1))), SUBPIECE(RAX, 0, 1))
let expected_def_1 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::BinOp {
op: BinOpType::IntXOr,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(1),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
}),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(1),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
},
};
// From: EAX = COPY EDI
// RAX = INT_ZEXT EAX
// From: EAX = PIECE(0:2, AX)
// From: AX = SUBPIECE(EDI, 1, 2)
// From: EAX = COPY EDI && RAX = INT_ZEXT EAX
// To: Temp = PIECE(SUBPIECE(RDI, 4, 4), (LOAD SUBPIECE(RDI, 0, 4)))
// RDI = PIECE(SUBPIECE(RAX, 4, 4), Temp)
// To: RAX = PIECE(PIECE(SUBPIECE(RAX, 2, 6), (SUBPIECE(RAX, 1, 1) INT_XOR SUBPIECE(RAX, 1, 1))), SUBPIECE(RAX, 0, 1))
// To: RAX = INT_ZEXT SUBPIECE(RDI, 0, 4)
let expected_def_3 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
},
};
// From: EAX = PIECE(0:2, AX)
// To: RAX = PIECE(SUBPIECE(RAX, 4, 4), PIECE(0:2, SUBPIECE(RAX, 0, 2)))
let expected_def_4 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Const(Bitvector::zero(
ByteSize::new(2).into(),
))),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(2),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
}),
},
};
// From: AX = SUBPIECE(EDI, 1, 2)
// To: RAX = PIECE(SUBPIECE(RAX, 2, 6), SUBPIECE(RDI, 1, 2))
let expected_def_5 = IrDef::Assign {
var: ir_rax_var.clone(),
value: IrExpression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(IrExpression::Var(ir_rax_var.clone())),
}),
rhs: Box::new(IrExpression::Subpiece {
low_byte: ByteSize::new(1),
size: ByteSize::new(2),
arg: Box::new(IrExpression::Var(ir_rdi_var.clone())),
}),
},
};
let mut target_tid = Tid::new("blk_00102016");
target_tid.address = String::from("00102016");
......@@ -889,15 +786,34 @@ fn from_project_to_ir_project() {
return_: Some(target_tid.clone()),
};
// Checks whether the zero extension was correctly removed; leaving only 5 definitions behind.
let ir_block = &ir_program.subs.get(&sub_tid).unwrap().term.blocks[0].term;
assert_eq!(ir_block.defs.len(), 5);
assert_eq!(ir_block.defs.len(), 6);
// Checks if the other definitions and the jump were correctly casted.
assert_eq!(ir_block.defs[0].term, expected_def_0);
assert_eq!(ir_block.defs[1].term, expected_def_1);
assert_eq!(ir_block.defs[2].term, expected_def_3);
assert_eq!(ir_block.defs[3].term, expected_def_4);
assert_eq!(ir_block.defs[4].term, expected_def_5);
assert_eq!(
format!("{}", ir_block.defs[0].term),
"loaded_value:32(temp) := Load from (RDI:64)[0-3]".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[1].term),
"RDI:64 = ((RDI:64)[4-7] Piece loaded_value:32(temp))".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[2].term),
"RAX:64 = (((RAX:64)[2-7] Piece ((RAX:64)[1-1] ^ (RAX:64)[1-1])) Piece (RAX:64)[0-0])"
.to_string()
);
assert_eq!(
format!("{}", ir_block.defs[3].term),
"RAX:64 = IntZExt((RDI:64)[0-3])".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[4].term),
"RAX:64 = ((RAX:64)[4-7] Piece (0x0:i16 Piece (RAX:64)[0-1]))".to_string()
);
assert_eq!(
format!("{}", ir_block.defs[5].term),
"RAX:64 = ((RAX:64)[2-7] Piece ((RDI:64)[0-3])[1-2])".to_string()
);
assert_eq!(ir_block.jmps[0].term, expected_jmp);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment