Unverified Commit 060884e9 by Enkelmann Committed by GitHub

Fix control flow for sequences of conditional assignments (#337)

parent 5621a04c
0.7-dev
====
0.6 (2022-06)
====
......
......@@ -128,7 +128,7 @@ dependencies = [
[[package]]
name = "cwe_checker"
version = "0.6.0"
version = "0.7.0-dev"
dependencies = [
"cwe_checker_lib",
"directories",
......@@ -151,7 +151,7 @@ dependencies = [
[[package]]
name = "cwe_checker_lib"
version = "0.6.0"
version = "0.7.0-dev"
dependencies = [
"anyhow",
"apint",
......
[package]
name = "cwe_checker"
version = "0.6.0"
version = "0.7.0-dev"
authors = ["Nils-Edvin Enkelmann <nils-edvin.enkelmann@fkie.fraunhofer.de>"]
edition = "2021"
......
[package]
name = "cwe_checker_lib"
version = "0.6.0"
version = "0.7.0-dev"
authors = ["Nils-Edvin Enkelmann <nils-edvin.enkelmann@fkie.fraunhofer.de>"]
edition = "2021"
......
use super::*;
use crate::utils::log::LogMessage;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
/// Contains implementation of the block duplication normalization pass.
mod block_duplication_normalization;
use crate::utils::log::LogMessage;
use block_duplication_normalization::*;
/// Contains implementation of the propagate control flow normalization pass.
mod propagate_control_flow;
use propagate_control_flow::*;
/// The `Project` struct is the main data structure representing a binary.
///
......@@ -225,7 +230,10 @@ impl Project {
/// - Duplicate blocks so that if a block is contained in several functions, each function gets its own unique copy.
/// - Propagate input expressions along variable assignments.
/// - Replace trivial expressions like `a XOR a` with their result.
/// - Remove dead register assignments
/// - Remove dead register assignments.
/// - Propagate the control flow along chains of conditionals with the same condition.
/// - Substitute bitwise `AND` and `OR` operations with the stack pointer
/// in cases where the result is known due to known stack pointer alignment.
#[must_use]
pub fn normalize(&mut self) -> Vec<LogMessage> {
let mut logs =
......@@ -234,6 +242,7 @@ impl Project {
self.propagate_input_expressions();
self.substitute_trivial_expressions();
crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self);
propagate_control_flow(self);
logs.append(
crate::analysis::stack_alignment_substitution::substitute_and_on_stackpointer(self)
.unwrap_or_default()
......
use crate::analysis::graph::{Edge, Graph, Node};
use crate::intermediate_representation::*;
use petgraph::graph::NodeIndex;
use std::collections::{BTreeSet, HashMap};
/// The `propagate_control_flow` normalization pass tries to simplify the representation of
/// sequences of if-else blocks that all have the same condition
/// so that they are either all executed or none of the blocks are executed.
/// Such sequences are often generated by sequences of conditional assignment assembly instructions.
///
/// To simplify the generated control flow graph
/// (and thus propagate the knowledge that either all or none of these blocks are executed to the control flow graph)
/// we look for sequences of (conditional) jumps where the final jump target is determined by the source of the first jump
/// (because we know that the conditionals for all jumps evaluate to the same value along the sequence).
/// For such a sequence we then retarget the destination of the first jump to the final jump destination of the sequence.
pub fn propagate_control_flow(project: &mut Project) {
let extern_subs = project
.program
.term
.extern_symbols
.keys()
.cloned()
.collect();
let cfg = crate::analysis::graph::get_program_cfg(&project.program, extern_subs);
let mut jmps_to_retarget = HashMap::new();
for node in cfg.node_indices() {
if let Node::BlkStart(block, sub) = cfg[node] {
// Check whether we already know the result of a conditional at the end of the block
let known_conditional_result = get_known_conditional_at_end_of_block(&cfg, node);
// Check whether we can propagate the control flow for outgoing jumps
match &block.term.jmps[..] {
[Term {
term: Jmp::Branch(target),
tid: jump_tid,
}] => {
if let Some(true_condition) = &known_conditional_result {
if let Some(new_target) =
find_target_for_retargetable_jump(target, &sub.term, true_condition)
{
jmps_to_retarget.insert(jump_tid.clone(), new_target);
}
}
}
[Term {
term:
Jmp::CBranch {
condition,
target: if_target,
},
tid: jump_tid_if,
}, Term {
term: Jmp::Branch(else_target),
tid: jump_tid_else,
}] => {
if let Some(new_target) =
find_target_for_retargetable_jump(if_target, &sub.term, condition)
{
jmps_to_retarget.insert(jump_tid_if.clone(), new_target);
}
if let Some(new_target) = find_target_for_retargetable_jump(
else_target,
&sub.term,
&negate_condition(condition.clone()),
) {
jmps_to_retarget.insert(jump_tid_else.clone(), new_target);
}
}
_ => (),
}
}
}
retarget_jumps(project, jmps_to_retarget);
}
/// Insert the new target TIDs into jump instructions for which a new target was computed.
fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap<Tid, Tid>) {
for sub in project.program.term.subs.values_mut() {
for blk in sub.term.blocks.iter_mut() {
for jmp in blk.term.jmps.iter_mut() {
if let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) {
match &mut jmp.term {
Jmp::Branch(target) | Jmp::CBranch { target, .. } => *target = new_target,
_ => panic!("Unexpected type of jump encountered."),
}
}
}
}
}
}
/// Under the assumption that the given `true_condition` expression evaluates to `true`,
/// check whether we can retarget jumps to the given target to another final jump target.
/// I.e. we follow sequences of jumps that are not interrupted by [`Def`] instructions to their final jump target
/// using the `true_condition` to resolve the targets of conditional jumps if possible.
fn find_target_for_retargetable_jump(
target: &Tid,
sub: &Sub,
true_condition: &Expression,
) -> Option<Tid> {
let mut visited_tids = BTreeSet::from([target.clone()]);
let mut new_target = target;
while let Some(block) = sub.blocks.iter().find(|blk| blk.tid == *new_target) {
if let Some(retarget) = check_for_retargetable_block(block, true_condition) {
if !visited_tids.insert(retarget.clone()) {
// The target was already visited, so we abort the search to avoid infinite loops.
break;
}
new_target = retarget;
} else {
break;
}
}
if new_target != target {
Some(new_target.clone())
} else {
None
}
}
/// Check whether the given block does not contain any [`Def`] instructions.
/// If yes, check whether the target of the jump at the end of the block is predictable
/// under the assumption that the given `true_condition` expression evaluates to true.
/// If it can be predicted, return the target of the jump.
fn check_for_retargetable_block<'a>(
block: &'a Term<Blk>,
true_condition: &Expression,
) -> Option<&'a Tid> {
if !block.term.defs.is_empty() {
return None;
}
match &block.term.jmps[..] {
[Term {
term: Jmp::Branch(target),
..
}] => Some(target),
[Term {
term:
Jmp::CBranch {
target: if_target,
condition,
},
..
}, Term {
term: Jmp::Branch(else_target),
..
}] => {
if condition == true_condition {
Some(if_target)
} else if *condition == negate_condition(true_condition.clone()) {
Some(else_target)
} else {
None
}
}
_ => None,
}
}
/// Check whether the given node in the control flow graph has exactly on incoming edge
/// and if that edge stems from a conditional jump.
/// If both are true, return the condition expression that needs to evaluate to true whenever this edge is taken.
fn check_if_single_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option<Expression> {
let incoming_edges: Vec<_> = graph
.edges_directed(node, petgraph::Direction::Incoming)
.collect();
if incoming_edges.len() == 1 {
match incoming_edges[0].weight() {
Edge::Jump(
Term {
term: Jmp::CBranch { condition, .. },
..
},
None,
) => Some(condition.clone()),
Edge::Jump(
Term {
term: Jmp::Branch(_),
..
},
Some(Term {
term: Jmp::CBranch { condition, .. },
..
}),
) => Some(negate_condition(condition.clone())),
_ => None,
}
} else {
None
}
}
/// Check if the block at the given `BlkStart` node only has one input edge stemming from a conditional jump.
/// If yes, check whether the conditional expression for that jump will still evaluate to true at the end of the block.
/// If yes, return the conditional expression.
fn get_known_conditional_at_end_of_block(cfg: &Graph, node: NodeIndex) -> Option<Expression> {
if let Node::BlkStart(block, sub) = cfg[node] {
// Check whether we know the result of a conditional at the start of the block
let mut known_conditional_result: Option<Expression> =
if block.tid != sub.term.blocks[0].tid {
check_if_single_conditional_incoming(cfg, node)
} else {
// Function start blocks always have incoming caller edges
// even if these edges are missing in the CFG because we do not know the callers.
None
};
// If we have a known conditional result at the start of the block,
// check whether it will still hold true at the end of the block.
if let Some(conditional) = &known_conditional_result {
let input_vars = conditional.input_vars();
for def in block.term.defs.iter() {
match &def.term {
Def::Assign { var, .. } | Def::Load { var, .. } => {
if input_vars.contains(&var) {
known_conditional_result = None;
break;
}
}
Def::Store { .. } => (),
}
}
}
known_conditional_result
} else {
None
}
}
/// Negate the given boolean condition expression, removing double negations in the process.
fn negate_condition(expr: Expression) -> Expression {
if let Expression::UnOp {
op: UnOpType::BoolNegate,
arg,
} = expr
{
*arg
} else {
Expression::UnOp {
op: UnOpType::BoolNegate,
arg: Box::new(expr),
}
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use std::collections::BTreeMap;
fn mock_condition_block(name: &str, if_target: &str, else_target: &str) -> Term<Blk> {
let if_jmp = Jmp::CBranch {
target: Tid::new(if_target),
condition: Expression::Var(Variable::mock("zero_flag", ByteSize::new(1))),
};
let if_jmp = Term {
tid: Tid::new(name.to_string() + "_jmp_if"),
term: if_jmp,
};
let else_jmp = Jmp::Branch(Tid::new(else_target));
let else_jmp = Term {
tid: Tid::new(name.to_string() + "_jmp_else"),
term: else_jmp,
};
let blk = Blk {
defs: Vec::new(),
jmps: Vec::from([if_jmp, else_jmp]),
indirect_jmp_targets: Vec::new(),
};
Term {
tid: Tid::new(name),
term: blk,
}
}
fn mock_block_with_defs(name: &str, return_target: &str) -> Term<Blk> {
let def = Def::Assign {
var: Variable::mock("r0", ByteSize::new(4)),
value: Expression::Var(Variable::mock("r1", ByteSize::new(4))),
};
let def = Term {
tid: Tid::new(name.to_string() + "_def"),
term: def,
};
let jmp = Jmp::Branch(Tid::new(return_target));
let jmp = Term {
tid: Tid::new(name.to_string() + "_jmp"),
term: jmp,
};
let blk = Blk {
defs: vec![def],
jmps: vec![jmp],
indirect_jmp_targets: Vec::new(),
};
Term {
tid: Tid::new(name),
term: blk,
}
}
#[test]
fn test_propagate_control_flow() {
let sub = Sub {
name: "sub".to_string(),
calling_convention: None,
blocks: vec![
mock_condition_block("cond_blk_1", "def_blk_1", "cond_blk_2"),
mock_block_with_defs("def_blk_1", "cond_blk_2"),
mock_condition_block("cond_blk_2", "def_blk_2", "cond_blk_3"),
mock_block_with_defs("def_blk_2", "cond_blk_3"),
mock_condition_block("cond_blk_3", "def_blk_3", "end_blk"),
mock_block_with_defs("def_blk_3", "end_blk"),
mock_block_with_defs("end_blk", "end_blk"),
],
};
let sub = Term {
tid: Tid::new("sub"),
term: sub,
};
let mut project = Project::mock_arm32();
project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]);
propagate_control_flow(&mut project);
let expected_blocks = vec![
mock_condition_block("cond_blk_1", "def_blk_1", "end_blk"),
mock_block_with_defs("def_blk_1", "def_blk_2"),
mock_condition_block("cond_blk_2", "def_blk_2", "end_blk"),
mock_block_with_defs("def_blk_2", "def_blk_3"),
mock_condition_block("cond_blk_3", "def_blk_3", "end_blk"),
mock_block_with_defs("def_blk_3", "end_blk"),
mock_block_with_defs("end_blk", "end_blk"),
];
assert_eq!(
&project.program.term.subs[&Tid::new("sub")].term.blocks[..],
&expected_blocks[..]
);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment