Unverified Commit 9eafce76 by Enkelmann Committed by GitHub

Expression propagation (#185)

parent 6810c1f8
...@@ -395,97 +395,28 @@ fn specialize_conditional() { ...@@ -395,97 +395,28 @@ fn specialize_conditional() {
let mut state = State::new(&register("RSP"), Tid::new("func")); let mut state = State::new(&register("RSP"), Tid::new("func"));
state.set_register(&register("RAX"), IntervalDomain::mock(-10, 20).into()); state.set_register(&register("RAX"), IntervalDomain::mock(-10, 20).into());
let condition = Expression::Var(Variable::mock("FLAG", 1)); let condition = Expression::BinOp {
lhs: Box::new(Expression::Var(register("RAX"))),
// A complicated way of computing the result of `RAX <= 0` op: BinOpType::IntSLessEqual,
// and assigning the result to the `FLAG` register. rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(8).into()))),
let defs = vec![
Def::assign("def1", register("RAX"), Expression::Var(register("RAX"))),
Def::assign(
"def_that_should_be_ignored",
Variable::mock("FLAG", 1),
Expression::Const(Bitvector::from_u8(42)),
),
Def::assign(
"def2",
Variable::mock("FLAG_SLESS", 1),
Expression::BinOp {
lhs: Box::new(Expression::Var(register("RAX"))),
op: BinOpType::IntSLess,
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
),
Def::assign(
"def3",
Variable::mock("FLAG_EQUAL", 1),
Expression::BinOp {
lhs: Box::new(Expression::Var(register("RAX"))),
op: BinOpType::IntEqual,
rhs: Box::new(Expression::Const(Bitvector::from_u64(0))),
},
),
Def::assign(
"def4",
Variable::mock("FLAG_NOTEQUAL", 1),
Expression::BinOp {
lhs: Box::new(Expression::Var(Variable::mock("FLAG_SLESS", 1))),
op: BinOpType::IntNotEqual,
rhs: Box::new(Expression::Const(Bitvector::from_u8(0))),
},
),
Def::assign(
"def5",
Variable::mock("FLAG", 1),
Expression::BinOp {
lhs: Box::new(Expression::Var(Variable::mock("FLAG_EQUAL", 1))),
op: BinOpType::BoolOr,
rhs: Box::new(Expression::Var(Variable::mock("FLAG_NOTEQUAL", 1))),
},
),
];
let block = Term {
tid: Tid::new("block"),
term: Blk {
defs,
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
}; };
let block = Blk::mock();
let result = context let result = context
.specialize_conditional(&state, &condition, &block, false) .specialize_conditional(&state, &condition, &block, false)
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
result.get_register(&Variable::mock("FLAG", 1)),
Bitvector::from_u8(0).into()
);
assert_eq!(
result.get_register(&Variable::mock("FLAG_NOTEQUAL", 1)),
Bitvector::from_u8(0).into()
);
assert_eq!(
result.get_register(&Variable::mock("FLAG_EQUAL", 1)),
Bitvector::from_u8(0).into()
);
assert_eq!(
result.get_register(&Variable::mock("FLAG_SLESS", 1)),
Bitvector::from_u8(0).into()
);
// The result is technically false, since RAX == 0 should be excluded.
// This impreciseness is due to the way that the result is calculated.
assert_eq!(
result.get_register(&register("RAX")), result.get_register(&register("RAX")),
IntervalDomain::mock(0, 20).into() IntervalDomain::mock(1, 20).into()
); );
state.set_register(&register("RAX"), IntervalDomain::mock(0, 20).into()); state.set_register(&register("RAX"), IntervalDomain::mock(0, 20).into());
let result = context let result = context
.specialize_conditional(&state, &condition, &block, false) .specialize_conditional(&state, &condition, &block, true)
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
result.get_register(&register("RAX")), result.get_register(&register("RAX")),
IntervalDomain::mock_with_bounds(Some(0), 1, 20, None).into() IntervalDomain::mock_with_bounds(None, 0, 0, None).into()
); );
state.set_register(&register("RAX"), IntervalDomain::mock(-20, 0).into()); state.set_register(&register("RAX"), IntervalDomain::mock(-20, 0).into());
......
use super::*; use super::*;
use std::collections::HashSet;
impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Context<'a> { impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Context<'a> {
type Value = State; type Value = State;
...@@ -346,12 +345,12 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -346,12 +345,12 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
/// Update the state with the knowledge that some conditional evaluated to true or false. /// Update the state with the knowledge that some conditional evaluated to true or false.
fn specialize_conditional( fn specialize_conditional(
&self, &self,
value: &State, state: &State,
condition: &Expression, condition: &Expression,
block_before_condition: &Term<Blk>, _block_before_condition: &Term<Blk>,
is_true: bool, is_true: bool,
) -> Option<State> { ) -> Option<State> {
let mut specialized_state = value.clone(); let mut specialized_state = state.clone();
if specialized_state if specialized_state
.specialize_by_expression_result(condition, Bitvector::from_u8(is_true as u8).into()) .specialize_by_expression_result(condition, Bitvector::from_u8(is_true as u8).into())
.is_err() .is_err()
...@@ -359,40 +358,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont ...@@ -359,40 +358,6 @@ impl<'a> crate::analysis::forward_interprocedural_fixpoint::Context<'a> for Cont
// State is unsatisfiable // State is unsatisfiable
return None; return None;
} }
let mut modified_vars: HashSet<Variable> = HashSet::new();
for def in block_before_condition.term.defs.iter().rev() {
match &def.term {
Def::Store { .. } => (),
Def::Load { var, .. } => {
modified_vars.insert(var.clone());
}
Def::Assign {
var,
value: input_expr,
} => {
if !modified_vars.contains(var) {
// Register is not modified again between the `Def` and the end of the block.
modified_vars.insert(var.clone());
if input_expr
.input_vars()
.into_iter()
.find(|input_var| modified_vars.contains(input_var))
.is_none()
{
// Values of input registers did not change between the `Def` and the end of the block.
let expr_result = specialized_state.get_register(var);
if specialized_state
.specialize_by_expression_result(input_expr, expr_result.clone())
.is_err()
{
// State is unsatisfiable
return None;
}
}
}
}
}
}
Some(specialized_state) Some(specialized_state)
} }
} }
...@@ -144,21 +144,122 @@ impl Expression { ...@@ -144,21 +144,122 @@ impl Expression {
_ => (), _ => (),
} }
} else { } else {
match (&**lhs, &**rhs) { match (&**lhs, op, &**rhs) {
(Const(bitvec), other) | (other, Const(bitvec)) if bitvec.is_zero() => { (Const(bitvec), op, other) | (other, op, Const(bitvec))
if matches!(op, IntOr | IntXOr | BoolOr | BoolXOr) { if bitvec.is_zero() && matches!(op, IntOr | IntXOr | BoolOr | BoolXOr) =>
// `a or 0 = a` and `a xor 0 = a` {
*self = other.clone(); // `a or 0 = a` and `a xor 0 = a`
} *self = other.clone();
} }
(Const(bitvec), other) | (other, Const(bitvec)) (Const(bitvec), op, other) | (other, op, Const(bitvec))
if bitvec.clone().into_bitnot().is_zero() => if bitvec.clone().into_bitnot().is_zero()
&& matches!(op, IntAnd | BoolAnd) =>
{ {
if matches!(op, IntAnd | BoolAnd) { // `a and -1 = a` since all bits of -1 are 1.
// `a and -1 = a` since all bits of -1 are 1. *self = other.clone()
*self = other.clone() }
(
Const(bitvec),
op,
Expression::BinOp {
lhs: inner_lhs,
op: IntSub,
rhs: inner_rhs,
},
)
| (
Expression::BinOp {
lhs: inner_lhs,
op: IntSub,
rhs: inner_rhs,
},
op,
Const(bitvec),
) if (bitvec.is_zero() || bitvec.is_one())
&& matches!(op, IntEqual | IntNotEqual) =>
{
// `0 == x - y` is equivalent to `x == y`
let new_op = match (op, bitvec.is_zero()) {
(IntEqual, true) | (IntNotEqual, false) => IntEqual,
(IntEqual, false) | (IntNotEqual, true) => IntNotEqual,
_ => unreachable!(),
};
*self = Expression::BinOp {
lhs: inner_lhs.clone(),
op: new_op,
rhs: inner_rhs.clone(),
} }
} }
(
Expression::BinOp {
lhs: less_left,
op: IntSLess,
rhs: less_right,
},
BoolOr,
Expression::BinOp {
lhs: equal_left,
op: IntEqual,
rhs: equal_right,
},
)
| (
Expression::BinOp {
lhs: equal_left,
op: IntEqual,
rhs: equal_right,
},
BoolOr,
Expression::BinOp {
lhs: less_left,
op: IntSLess,
rhs: less_right,
},
) if (less_left == equal_left && less_right == equal_right)
|| (less_left == equal_right && less_right == equal_left) =>
{
// `x < y or x == y` is equivalent to `x <= y `
*self = Expression::BinOp {
lhs: less_left.clone(),
op: IntSLessEqual,
rhs: less_right.clone(),
};
}
(
Expression::BinOp {
lhs: less_left,
op: IntLess,
rhs: less_right,
},
BoolOr,
Expression::BinOp {
lhs: equal_left,
op: IntEqual,
rhs: equal_right,
},
)
| (
Expression::BinOp {
lhs: equal_left,
op: IntEqual,
rhs: equal_right,
},
BoolOr,
Expression::BinOp {
lhs: less_left,
op: IntLess,
rhs: less_right,
},
) if (less_left == equal_left && less_right == equal_right)
|| (less_left == equal_right && less_right == equal_left) =>
{
// `x < y or x == y` is equivalent to `x <= y `
*self = Expression::BinOp {
lhs: less_left.clone(),
op: IntLessEqual,
rhs: less_right.clone(),
};
}
_ => (), _ => (),
} }
} }
...@@ -179,6 +280,48 @@ impl Expression { ...@@ -179,6 +280,48 @@ impl Expression {
arg.substitute_trivial_operations(); arg.substitute_trivial_operations();
if *low_byte == ByteSize::new(0) && *size == arg.bytesize() { if *low_byte == ByteSize::new(0) && *size == arg.bytesize() {
*self = (**arg).clone(); *self = (**arg).clone();
} else {
match &**arg {
Expression::Cast {
arg: inner_arg,
op: CastOpType::IntZExt,
..
}
| Expression::Cast {
arg: inner_arg,
op: CastOpType::IntSExt,
..
} if *low_byte == ByteSize::new(0) && *size == inner_arg.bytesize() => {
// The zero or sign extended part is thrown away by the subpiece ooperation.
*self = (**inner_arg).clone();
}
Expression::BinOp {
op: BinOpType::Piece,
lhs,
rhs,
} => {
// If the subpiece extracts exactly the `lhs` or the `rhs` of the piece operation,
// we can simplify to just `lhs` or `rhs`.
if *low_byte == rhs.bytesize() && *size == lhs.bytesize() {
*self = (**lhs).clone();
} else if *low_byte == ByteSize::new(0) && *size == rhs.bytesize() {
*self = (**rhs).clone();
}
}
Expression::Subpiece {
low_byte: inner_low_byte,
size: _,
arg: inner_arg,
} => {
// Subpiece of subpiece can be simplified to a single subpiece operation.
*self = Expression::Subpiece {
low_byte: *low_byte + *inner_low_byte,
size: *size,
arg: (*inner_arg).clone(),
}
}
_ => (),
}
} }
} }
Cast { op, size, arg } => { Cast { op, size, arg } => {
...@@ -187,9 +330,73 @@ impl Expression { ...@@ -187,9 +330,73 @@ impl Expression {
&& *size == arg.bytesize() && *size == arg.bytesize()
{ {
*self = (**arg).clone(); *self = (**arg).clone();
} else if *op == CastOpType::IntSExt || *op == CastOpType::IntZExt {
match &**arg {
Expression::Cast {
op: inner_op,
size: _,
arg: inner_arg,
} if *op == *inner_op => {
// Merge two zero/sign-extension to one.
*self = Expression::Cast {
op: *op,
size: *size,
arg: inner_arg.clone(),
};
}
_ => (),
}
}
}
UnOp { op, arg } => {
arg.substitute_trivial_operations();
match &**arg {
Expression::UnOp {
op: inner_op,
arg: inner_arg,
} if op == inner_op
&& matches!(
op,
UnOpType::IntNegate | UnOpType::BoolNegate | UnOpType::Int2Comp
) =>
{
*self = (**inner_arg).clone();
}
Expression::BinOp {
lhs: inner_lhs,
op: inner_op,
rhs: inner_rhs,
} if *op == UnOpType::BoolNegate
&& matches!(
inner_op,
BinOpType::IntEqual
| BinOpType::IntNotEqual
| BinOpType::IntLess
| BinOpType::IntSLess
| BinOpType::IntLessEqual
| BinOpType::IntSLessEqual
) =>
{
// `!( x < y)` is equivalent to ` y <= x`
let new_op = match inner_op {
BinOpType::IntEqual => BinOpType::IntNotEqual,
BinOpType::IntNotEqual => BinOpType::IntEqual,
BinOpType::IntLess => BinOpType::IntLessEqual,
BinOpType::IntSLess => BinOpType::IntSLessEqual,
BinOpType::IntLessEqual => BinOpType::IntLess,
BinOpType::IntSLessEqual => BinOpType::IntSLess,
_ => unreachable!(),
};
// Note that we have to swap the left hand side with the right hand side of the binary expression.
*self = Expression::BinOp {
lhs: inner_rhs.clone(),
op: new_op,
rhs: inner_lhs.clone(),
};
}
_ => (),
} }
} }
UnOp { op: _, arg } => arg.substitute_trivial_operations(),
BinOp { op: _, lhs, rhs } => { BinOp { op: _, lhs, rhs } => {
lhs.substitute_trivial_operations(); lhs.substitute_trivial_operations();
rhs.substitute_trivial_operations(); rhs.substitute_trivial_operations();
...@@ -198,6 +405,27 @@ impl Expression { ...@@ -198,6 +405,27 @@ impl Expression {
} }
} }
/// Substitute every occurence of `input_var` in `self` with the given `replace_with_expression`.
pub fn substitute_input_var(
&mut self,
input_var: &Variable,
replace_with_expression: &Expression,
) {
use Expression::*;
match self {
Const(_) | Unknown { .. } => (),
Var(var) if var == input_var => *self = replace_with_expression.clone(),
Var(_) => (),
Subpiece { arg, .. } | Cast { arg, .. } | UnOp { arg, .. } => {
arg.substitute_input_var(input_var, replace_with_expression);
}
BinOp { lhs, rhs, .. } => {
lhs.substitute_input_var(input_var, replace_with_expression);
rhs.substitute_input_var(input_var, replace_with_expression);
}
}
}
/// This function checks for sub registers in pcode instruction and casts them into /// This function checks for sub registers in pcode instruction and casts them into
/// SUBPIECE expressions with the base register as argument. It also checks whether /// SUBPIECE expressions with the base register as argument. It also checks whether
/// the given Term<Def> has a output sub register and if so, casts it into its /// the given Term<Def> has a output sub register and if so, casts it into its
......
...@@ -15,6 +15,7 @@ struct Setup<'a> { ...@@ -15,6 +15,7 @@ struct Setup<'a> {
int_sub_subpiece_expr: Expression, int_sub_subpiece_expr: Expression,
eax_variable: Expression, eax_variable: Expression,
rax_variable: Expression, rax_variable: Expression,
rcx_variable: Expression,
} }
impl<'a> Setup<'a> { impl<'a> Setup<'a> {
...@@ -99,6 +100,11 @@ impl<'a> Setup<'a> { ...@@ -99,6 +100,11 @@ impl<'a> Setup<'a> {
size: ByteSize::new(8), size: ByteSize::new(8),
is_temp: false, is_temp: false,
}), }),
rcx_variable: Expression::Var(Variable {
name: String::from("RCX"),
size: ByteSize::new(8),
is_temp: false,
}),
} }
} }
} }
...@@ -123,6 +129,122 @@ fn trivial_expression_substitution() { ...@@ -123,6 +129,122 @@ fn trivial_expression_substitution() {
}; };
expr.substitute_trivial_operations(); expr.substitute_trivial_operations();
assert_eq!(expr, setup.rax_variable); assert_eq!(expr, setup.rax_variable);
let sub_expr = Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntSub,
rhs: Box::new(setup.rcx_variable.clone()),
};
let mut expr = Expression::BinOp {
op: BinOpType::IntEqual,
lhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(1).into()))),
rhs: Box::new(sub_expr.clone()),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntEqual,
rhs: Box::new(setup.rcx_variable.clone()),
}
);
let mut expr = Expression::BinOp {
op: BinOpType::IntNotEqual,
lhs: Box::new(sub_expr.clone()),
rhs: Box::new(Expression::Const(Bitvector::zero(ByteSize::new(1).into()))),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntNotEqual,
rhs: Box::new(setup.rcx_variable.clone()),
}
);
let mut expr = Expression::BinOp {
lhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntLess,
rhs: Box::new(setup.rcx_variable.clone()),
}),
op: BinOpType::BoolOr,
rhs: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntEqual,
rhs: Box::new(setup.rcx_variable.clone()),
}),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntLessEqual,
rhs: Box::new(setup.rcx_variable.clone()),
}
);
let mut expr = Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(Expression::Cast {
op: CastOpType::IntSExt,
size: ByteSize::new(8),
arg: Box::new(Expression::Var(Variable::mock("EAX", 4))),
}),
};
expr.substitute_trivial_operations();
assert_eq!(expr, Expression::Var(Variable::mock("EAX", 4)));
let mut expr = Expression::Subpiece {
low_byte: ByteSize::new(4),
size: ByteSize::new(4),
arg: Box::new(Expression::BinOp {
op: BinOpType::Piece,
lhs: Box::new(Expression::Var(Variable::mock("EAX", 4))),
rhs: Box::new(Expression::Var(Variable::mock("EBX", 4))),
}),
};
expr.substitute_trivial_operations();
assert_eq!(expr, Expression::Var(Variable::mock("EAX", 4)));
let mut expr = Expression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(4),
arg: Box::new(Expression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(6),
arg: Box::new(Expression::Var(Variable::mock("RAX", 8))),
}),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::Subpiece {
low_byte: ByteSize::new(2),
size: ByteSize::new(4),
arg: Box::new(Expression::Var(Variable::mock("RAX", 8))),
}
);
let mut expr = Expression::UnOp {
op: UnOpType::BoolNegate,
arg: Box::new(Expression::BinOp {
lhs: Box::new(setup.rax_variable.clone()),
op: BinOpType::IntLess,
rhs: Box::new(setup.rcx_variable.clone()),
}),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::BinOp {
lhs: Box::new(setup.rcx_variable.clone()),
op: BinOpType::IntLessEqual,
rhs: Box::new(setup.rax_variable.clone()),
}
);
} }
#[test] #[test]
......
...@@ -122,6 +122,28 @@ impl Term<Def> { ...@@ -122,6 +122,28 @@ impl Term<Def> {
_ => None, _ => None,
} }
} }
/// Substitute every occurence of `input_var` in the address and value expressions
/// with `replace_with_expression`.
/// Does not change the target variable of assignment- and load-instructions.
pub fn substitute_input_var(
&mut self,
input_var: &Variable,
replace_with_expression: &Expression,
) {
match &mut self.term {
Def::Assign { var: _, value } => {
value.substitute_input_var(input_var, replace_with_expression)
}
Def::Load { var: _, address } => {
address.substitute_input_var(input_var, replace_with_expression)
}
Def::Store { address, value } => {
address.substitute_input_var(input_var, replace_with_expression);
value.substitute_input_var(input_var, replace_with_expression);
}
}
}
} }
/// A `Jmp` instruction affects the control flow of a program, i.e. it may change the instruction pointer. /// A `Jmp` instruction affects the control flow of a program, i.e. it may change the instruction pointer.
...@@ -294,6 +316,125 @@ impl Term<Blk> { ...@@ -294,6 +316,125 @@ impl Term<Blk> {
Err(logs) Err(logs)
} }
} }
/// Wherever possible, substitute input variables of expressions
/// with the input expression that defines the input variable.
///
/// Note that substitution is only possible
/// if the input variables of the input expression itself did not change since the definition of said variable.
///
/// The expression propagation allows the [`Project::substitute_trivial_expressions`] normalization pass
/// to further simplify the generated expressions
/// and allows more dead stores to be removed during [dead variable elimination](`crate::analysis::dead_variable_elimination`).
pub fn propagate_input_expressions(&mut self) {
let mut insertable_expressions = Vec::new();
for def in self.term.defs.iter_mut() {
match &mut def.term {
Def::Assign {
var,
value: expression,
} => {
// insert known input expressions
for (input_var, input_expr) in insertable_expressions.iter() {
expression.substitute_input_var(input_var, input_expr);
}
// expressions dependent on the assigned variable are no longer insertable
insertable_expressions.retain(|(input_var, input_expr)| {
input_var != var && !input_expr.input_vars().into_iter().any(|x| x == var)
});
// If the value of the assigned variable does not depend on the former value of the variable,
// then it is insertable for future expressions.
if !expression.input_vars().into_iter().any(|x| x == var) {
insertable_expressions.push((var.clone(), expression.clone()));
}
}
Def::Load {
var,
address: expression,
} => {
// insert known input expressions
for (input_var, input_expr) in insertable_expressions.iter() {
expression.substitute_input_var(input_var, input_expr);
}
// expressions dependent on the assigned variable are no longer insertable
insertable_expressions.retain(|(input_var, input_expr)| {
input_var != var && !input_expr.input_vars().into_iter().any(|x| x == var)
});
}
Def::Store { address, value } => {
// insert known input expressions
for (input_var, input_expr) in insertable_expressions.iter() {
address.substitute_input_var(input_var, input_expr);
value.substitute_input_var(input_var, input_expr);
}
}
}
}
for jump in self.term.jmps.iter_mut() {
match &mut jump.term {
Jmp::Branch(_) | Jmp::Call { .. } | Jmp::CallOther { .. } => (),
Jmp::BranchInd(expr)
| Jmp::CBranch {
condition: expr, ..
}
| Jmp::CallInd { target: expr, .. }
| Jmp::Return(expr) => {
// insert known input expressions
for (input_var, input_expr) in insertable_expressions.iter() {
expr.substitute_input_var(input_var, input_expr);
}
}
}
}
}
/// Merge subsequent assignments to the same variable to a single assignment to that variable.
///
/// The value expressions of merged assignments can often be simplified later on
/// in the [`Project::substitute_trivial_expressions`] normalization pass.
pub fn merge_def_assignments_to_same_var(&mut self) {
let mut new_defs = Vec::new();
let mut last_def_opt = None;
for def in self.term.defs.iter() {
if let Def::Assign {
var: current_var, ..
} = &def.term
{
if let Some(Term {
term:
Def::Assign {
var: last_var,
value: last_value,
},
..
}) = &last_def_opt
{
if current_var == last_var {
let mut substituted_def = def.clone();
substituted_def.substitute_input_var(last_var, last_value);
last_def_opt = Some(substituted_def);
} else {
new_defs.push(last_def_opt.unwrap());
last_def_opt = Some(def.clone());
}
} else if last_def_opt.is_some() {
panic!(); // Only assign-defs should be saved in last_def.
} else {
last_def_opt = Some(def.clone());
}
} else {
if let Some(last_def) = last_def_opt {
new_defs.push(last_def);
}
new_defs.push(def.clone());
last_def_opt = None;
}
}
if let Some(last_def) = last_def_opt {
new_defs.push(last_def);
}
self.term.defs = new_defs;
}
} }
/// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it. /// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it.
...@@ -563,14 +704,29 @@ impl Project { ...@@ -563,14 +704,29 @@ impl Project {
log_messages log_messages
} }
/// Propagate input expressions along variable assignments.
///
/// The propagation only occurs inside basic blocks
/// but not across basic block boundaries.
fn propagate_input_expressions(&mut self) {
for sub in self.program.term.subs.iter_mut() {
for block in sub.term.blocks.iter_mut() {
block.merge_def_assignments_to_same_var();
block.propagate_input_expressions();
}
}
}
/// Run some normalization passes over the project. /// Run some normalization passes over the project.
/// ///
/// Passes: /// Passes:
/// - Propagate input expressions along variable assignments.
/// - Replace trivial expressions like `a XOR a` with their result. /// - Replace trivial expressions like `a XOR a` with their result.
/// - Replace jumps to nonexisting TIDs with jumps to an artificial sink target in the CFG. /// - Replace jumps to nonexisting TIDs with jumps to an artificial sink target in the CFG.
/// - Remove dead register assignments /// - Remove dead register assignments
#[must_use] #[must_use]
pub fn normalize(&mut self) -> Vec<LogMessage> { pub fn normalize(&mut self) -> Vec<LogMessage> {
self.propagate_input_expressions();
self.substitute_trivial_expressions(); self.substitute_trivial_expressions();
let logs = self.remove_references_to_nonexisting_tids(); let logs = self.remove_references_to_nonexisting_tids();
crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self); crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self);
...@@ -802,4 +958,71 @@ mod tests { ...@@ -802,4 +958,71 @@ mod tests {
None None
); );
} }
#[test]
fn expression_propagation() {
use crate::intermediate_representation::UnOpType;
let defs = vec![
Def::assign(
"tid_1",
Variable::mock("X", 8),
Expression::var("Y").un_op(UnOpType::IntNegate),
),
Def::assign(
"tid_2",
Variable::mock("Y", 8),
Expression::var("X").plus(Expression::var("Y")),
),
Def::assign(
"tid_3",
Variable::mock("X", 8),
Expression::var("X").un_op(UnOpType::IntNegate),
),
Def::assign(
"tid_4",
Variable::mock("Y", 8),
Expression::var("Y").un_op(UnOpType::IntNegate),
),
Def::assign(
"tid_5",
Variable::mock("Y", 8),
Expression::var("X").plus(Expression::var("Y")),
),
];
let mut block = Term {
tid: Tid::new("block"),
term: Blk {
defs,
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
block.merge_def_assignments_to_same_var();
block.propagate_input_expressions();
let result_defs = vec![
Def::assign(
"tid_1",
Variable::mock("X", 8),
Expression::var("Y").un_op(UnOpType::IntNegate),
),
Def::assign(
"tid_2",
Variable::mock("Y", 8),
Expression::var("Y")
.un_op(UnOpType::IntNegate)
.plus(Expression::var("Y")),
),
Def::assign(
"tid_3",
Variable::mock("X", 8),
Expression::var("X").un_op(UnOpType::IntNegate),
),
Def::assign(
"tid_5",
Variable::mock("Y", 8),
Expression::var("X").plus(Expression::var("Y").un_op(UnOpType::IntNegate)),
),
];
assert_eq!(block.term.defs, result_defs);
}
} }
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
int constant_system() { void constant_system() {
system("ls"); system("ls");
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
char *dest = "usr/bin/cat "; char dest[30] = "usr/bin/cat ";
strcat(dest, argv[1]); strcat(dest, argv[1]);
system(dest); system(dest);
constant_system(); constant_system();
......
...@@ -185,6 +185,8 @@ mod tests { ...@@ -185,6 +185,8 @@ mod tests {
mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason. mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_skipped(&mut tests, "x86", "clang"); // Return value detection insufficient for x86 mark_skipped(&mut tests, "x86", "clang"); // Return value detection insufficient for x86
mark_skipped(&mut tests, "arm", "clang"); // Loss of stack pointer position
mark_skipped(&mut tests, "aarch64", "clang"); // Loss of stack pointer position
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // Pointer Inference returns insufficient results for PE mark_compiler_skipped(&mut tests, "mingw32-gcc"); // Pointer Inference returns insufficient results for PE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment