Fix control flow for sequences of conditional assignments (#337)

060884e9 · Enkelmann · GitHub · 5621a04c · 060884e9 · 060884e9
Unverified Commit 060884e9 authored Jun 15, 2022 by Enkelmann Committed by GitHub Jun 15, 2022
6 changed files
--- a/CHANGES.md
+++ b/CHANGES.md
+0.7-dev
+====
 0.6 (2022-06)
 ====

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -128,7 +128,7 @@ dependencies = [
 [[package]]
 name = "cwe_checker"
-version = "0.6.0"
+version = "0.7.0-dev"
 dependencies = [
 "cwe_checker_lib",
 "directories",
@@ -151,7 +151,7 @@ dependencies = [
 [[package]]
 name = "cwe_checker_lib"
-version = "0.6.0"
+version = "0.7.0-dev"
 dependencies = [
 "anyhow",
 "apint",

--- a/src/caller/Cargo.toml
+++ b/src/caller/Cargo.toml
 [package]
 name = "cwe_checker"
-version = "0.6.0"
+version = "0.7.0-dev"
 authors = ["Nils-Edvin Enkelmann <nils-edvin.enkelmann@fkie.fraunhofer.de>"]
 edition = "2021"

--- a/src/cwe_checker_lib/Cargo.toml
+++ b/src/cwe_checker_lib/Cargo.toml
 [package]
 name = "cwe_checker_lib"
-version = "0.6.0"
+version = "0.7.0-dev"
 authors = ["Nils-Edvin Enkelmann <nils-edvin.enkelmann@fkie.fraunhofer.de>"]
 edition = "2021"

--- a/src/cwe_checker_lib/src/intermediate_representation/project.rs
+++ b/src/cwe_checker_lib/src/intermediate_representation/project.rs
 use super::*;
+use crate::utils::log::LogMessage;
 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+/// Contains implementation of the block duplication normalization pass.
 mod block_duplication_normalization;
-use crate::utils::log::LogMessage;
 use block_duplication_normalization::*;
+/// Contains implementation of the propagate control flow normalization pass.
+mod propagate_control_flow;
+use propagate_control_flow::*;
 /// The `Project` struct is the main data structure representing a binary.
 ///
@@ -225,7 +230,10 @@ impl Project {
    /// - Duplicate blocks so that if a block is contained in several functions, each function gets its own unique copy.
    /// - Propagate input expressions along variable assignments.
    /// - Replace trivial expressions like `a XOR a` with their result.
-    /// - Remove dead register assignments
+    /// - Remove dead register assignments.
+    /// - Propagate the control flow along chains of conditionals with the same condition.
+    /// - Substitute bitwise `AND` and `OR` operations with the stack pointer
+    /// in cases where the result is known due to known stack pointer alignment.
    #[must_use]
    pub fn normalize(&mut self) -> Vec<LogMessage> {
        let mut logs =
@@ -234,6 +242,7 @@ impl Project {
        self.propagate_input_expressions();
        self.substitute_trivial_expressions();
        crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self);
+        propagate_control_flow(self);
        logs.append(
            crate::analysis::stack_alignment_substitution::substitute_and_on_stackpointer(self)
                .unwrap_or_default()

--- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs
+++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs
+use crate::analysis::graph::{Edge, Graph, Node};
+use crate::intermediate_representation::*;
+use petgraph::graph::NodeIndex;
+use std::collections::{BTreeSet, HashMap};
+/// The `propagate_control_flow` normalization pass tries to simplify the representation of
+/// sequences of if-else blocks that all have the same condition
+/// so that they are either all executed or none of the blocks are executed.
+/// Such sequences are often generated by sequences of conditional assignment assembly instructions.
+///
+/// To simplify the generated control flow graph
+/// (and thus propagate the knowledge that either all or none of these blocks are executed to the control flow graph)
+/// we look for sequences of (conditional) jumps where the final jump target is determined by the source of the first jump
+/// (because we know that the conditionals for all jumps evaluate to the same value along the sequence).
+/// For such a sequence we then retarget the destination of the first jump to the final jump destination of the sequence.
+pub fn propagate_control_flow(project: &mut Project) {
+    let extern_subs = project
+        .program
+        .term
+        .extern_symbols
+        .keys()
+        .cloned()
+        .collect();
+    let cfg = crate::analysis::graph::get_program_cfg(&project.program, extern_subs);
+    let mut jmps_to_retarget = HashMap::new();
+    for node in cfg.node_indices() {
+        if let Node::BlkStart(block, sub) = cfg[node] {
+            // Check whether we already know the result of a conditional at the end of the block
+            let known_conditional_result = get_known_conditional_at_end_of_block(&cfg, node);
+            // Check whether we can propagate the control flow for outgoing jumps
+            match &block.term.jmps[..] {
+                [Term {
+                    term: Jmp::Branch(target),
+                    tid: jump_tid,
+                }] => {
+                    if let Some(true_condition) = &known_conditional_result {
+                        if let Some(new_target) =
+                            find_target_for_retargetable_jump(target, &sub.term, true_condition)
+                        {
+                            jmps_to_retarget.insert(jump_tid.clone(), new_target);
+                        }
+                    }
+                }
+                [Term {
+                    term:
+                        Jmp::CBranch {
+                            condition,
+                            target: if_target,
+                        },
+                    tid: jump_tid_if,
+                }, Term {
+                    term: Jmp::Branch(else_target),
+                    tid: jump_tid_else,
+                }] => {
+                    if let Some(new_target) =
+                        find_target_for_retargetable_jump(if_target, &sub.term, condition)
+                    {
+                        jmps_to_retarget.insert(jump_tid_if.clone(), new_target);
+                    }
+                    if let Some(new_target) = find_target_for_retargetable_jump(
+                        else_target,
+                        &sub.term,
+                        &negate_condition(condition.clone()),
+                    ) {
+                        jmps_to_retarget.insert(jump_tid_else.clone(), new_target);
+                    }
+                }
+                _ => (),
+            }
+        }
+    }
+    retarget_jumps(project, jmps_to_retarget);
+}
+/// Insert the new target TIDs into jump instructions for which a new target was computed.
+fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap<Tid, Tid>) {
+    for sub in project.program.term.subs.values_mut() {
+        for blk in sub.term.blocks.iter_mut() {
+            for jmp in blk.term.jmps.iter_mut() {
+                if let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) {
+                    match &mut jmp.term {
+                        Jmp::Branch(target) | Jmp::CBranch { target, .. } => *target = new_target,
+                        _ => panic!("Unexpected type of jump encountered."),
+                    }
+                }
+            }
+        }
+    }
+}
+/// Under the assumption that the given `true_condition` expression evaluates to `true`,
+/// check whether we can retarget jumps to the given target to another final jump target.
+/// I.e. we follow sequences of jumps that are not interrupted by [`Def`] instructions to their final jump target
+/// using the `true_condition` to resolve the targets of conditional jumps if possible.
+fn find_target_for_retargetable_jump(
+    target: &Tid,
+    sub: &Sub,
+    true_condition: &Expression,
+) -> Option<Tid> {
+    let mut visited_tids = BTreeSet::from([target.clone()]);
+    let mut new_target = target;
+    while let Some(block) = sub.blocks.iter().find(|blk| blk.tid == *new_target) {
+        if let Some(retarget) = check_for_retargetable_block(block, true_condition) {
+            if !visited_tids.insert(retarget.clone()) {
+                // The target was already visited, so we abort the search to avoid infinite loops.
+                break;
+            }
+            new_target = retarget;
+        } else {
+            break;
+        }
+    }
+    if new_target != target {
+        Some(new_target.clone())
+    } else {
+        None
+    }
+}
+/// Check whether the given block does not contain any [`Def`] instructions.
+/// If yes, check whether the target of the jump at the end of the block is predictable
+/// under the assumption that the given `true_condition` expression evaluates to true.
+/// If it can be predicted, return the target of the jump.
+fn check_for_retargetable_block<'a>(
+    block: &'a Term<Blk>,
+    true_condition: &Expression,
+) -> Option<&'a Tid> {
+    if !block.term.defs.is_empty() {
+        return None;
+    }
+    match &block.term.jmps[..] {
+        [Term {
+            term: Jmp::Branch(target),
+            ..
+        }] => Some(target),
+        [Term {
+            term:
+                Jmp::CBranch {
+                    target: if_target,
+                    condition,
+                },
+            ..
+        }, Term {
+            term: Jmp::Branch(else_target),
+            ..
+        }] => {
+            if condition == true_condition {
+                Some(if_target)
+            } else if *condition == negate_condition(true_condition.clone()) {
+                Some(else_target)
+            } else {
+                None
+            }
+        }
+        _ => None,
+    }
+}
+/// Check whether the given node in the control flow graph has exactly on incoming edge
+/// and if that edge stems from a conditional jump.
+/// If both are true, return the condition expression that needs to evaluate to true whenever this edge is taken.
+fn check_if_single_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option<Expression> {
+    let incoming_edges: Vec<_> = graph
+        .edges_directed(node, petgraph::Direction::Incoming)
+        .collect();
+    if incoming_edges.len() == 1 {
+        match incoming_edges[0].weight() {
+            Edge::Jump(
+                Term {
+                    term: Jmp::CBranch { condition, .. },
+                    ..
+                },
+                None,
+            ) => Some(condition.clone()),
+            Edge::Jump(
+                Term {
+                    term: Jmp::Branch(_),
+                    ..
+                },
+                Some(Term {
+                    term: Jmp::CBranch { condition, .. },
+                    ..
+                }),
+            ) => Some(negate_condition(condition.clone())),
+            _ => None,
+        }
+    } else {
+        None
+    }
+}
+/// Check if the block at the given `BlkStart` node only has one input edge stemming from a conditional jump.
+/// If yes, check whether the conditional expression for that jump will still evaluate to true at the end of the block.
+/// If yes, return the conditional expression.
+fn get_known_conditional_at_end_of_block(cfg: &Graph, node: NodeIndex) -> Option<Expression> {
+    if let Node::BlkStart(block, sub) = cfg[node] {
+        // Check whether we know the result of a conditional at the start of the block
+        let mut known_conditional_result: Option<Expression> =
+            if block.tid != sub.term.blocks[0].tid {
+                check_if_single_conditional_incoming(cfg, node)
+            } else {
+                // Function start blocks always have incoming caller edges
+                // even if these edges are missing in the CFG because we do not know the callers.
+                None
+            };
+        // If we have a known conditional result at the start of the block,
+        // check whether it will still hold true at the end of the block.
+        if let Some(conditional) = &known_conditional_result {
+            let input_vars = conditional.input_vars();
+            for def in block.term.defs.iter() {
+                match &def.term {
+                    Def::Assign { var, .. } | Def::Load { var, .. } => {
+                        if input_vars.contains(&var) {
+                            known_conditional_result = None;
+                            break;
+                        }
+                    }
+                    Def::Store { .. } => (),
+                }
+            }
+        }
+        known_conditional_result
+    } else {
+        None
+    }
+}
+/// Negate the given boolean condition expression, removing double negations in the process.
+fn negate_condition(expr: Expression) -> Expression {
+    if let Expression::UnOp {
+        op: UnOpType::BoolNegate,
+        arg,
+    } = expr
+    {
+        *arg
+    } else {
+        Expression::UnOp {
+            op: UnOpType::BoolNegate,
+            arg: Box::new(expr),
+        }
+    }
+}
+#[cfg(test)]
+pub mod tests {
+    use super::*;
+    use std::collections::BTreeMap;
+    fn mock_condition_block(name: &str, if_target: &str, else_target: &str) -> Term<Blk> {
+        let if_jmp = Jmp::CBranch {
+            target: Tid::new(if_target),
+            condition: Expression::Var(Variable::mock("zero_flag", ByteSize::new(1))),
+        };
+        let if_jmp = Term {
+            tid: Tid::new(name.to_string() + "_jmp_if"),
+            term: if_jmp,
+        };
+        let else_jmp = Jmp::Branch(Tid::new(else_target));
+        let else_jmp = Term {
+            tid: Tid::new(name.to_string() + "_jmp_else"),
+            term: else_jmp,
+        };
+        let blk = Blk {
+            defs: Vec::new(),
+            jmps: Vec::from([if_jmp, else_jmp]),
+            indirect_jmp_targets: Vec::new(),
+        };
+        Term {
+            tid: Tid::new(name),
+            term: blk,
+        }
+    }
+    fn mock_block_with_defs(name: &str, return_target: &str) -> Term<Blk> {
+        let def = Def::Assign {
+            var: Variable::mock("r0", ByteSize::new(4)),
+            value: Expression::Var(Variable::mock("r1", ByteSize::new(4))),
+        };
+        let def = Term {
+            tid: Tid::new(name.to_string() + "_def"),
+            term: def,
+        };
+        let jmp = Jmp::Branch(Tid::new(return_target));
+        let jmp = Term {
+            tid: Tid::new(name.to_string() + "_jmp"),
+            term: jmp,
+        };
+        let blk = Blk {
+            defs: vec![def],
+            jmps: vec![jmp],
+            indirect_jmp_targets: Vec::new(),
+        };
+        Term {
+            tid: Tid::new(name),
+            term: blk,
+        }
+    }
+    #[test]
+    fn test_propagate_control_flow() {
+        let sub = Sub {
+            name: "sub".to_string(),
+            calling_convention: None,
+            blocks: vec![
+                mock_condition_block("cond_blk_1", "def_blk_1", "cond_blk_2"),
+                mock_block_with_defs("def_blk_1", "cond_blk_2"),
+                mock_condition_block("cond_blk_2", "def_blk_2", "cond_blk_3"),
+                mock_block_with_defs("def_blk_2", "cond_blk_3"),
+                mock_condition_block("cond_blk_3", "def_blk_3", "end_blk"),
+                mock_block_with_defs("def_blk_3", "end_blk"),
+                mock_block_with_defs("end_blk", "end_blk"),
+            ],
+        };
+        let sub = Term {
+            tid: Tid::new("sub"),
+            term: sub,
+        };
+        let mut project = Project::mock_arm32();
+        project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]);
+        propagate_control_flow(&mut project);
+        let expected_blocks = vec![
+            mock_condition_block("cond_blk_1", "def_blk_1", "end_blk"),
+            mock_block_with_defs("def_blk_1", "def_blk_2"),
+            mock_condition_block("cond_blk_2", "def_blk_2", "end_blk"),
+            mock_block_with_defs("def_blk_2", "def_blk_3"),
+            mock_condition_block("cond_blk_3", "def_blk_3", "end_blk"),
+            mock_block_with_defs("def_blk_3", "end_blk"),
+            mock_block_with_defs("end_blk", "end_blk"),
+        ];
+        assert_eq!(
+            &project.program.term.subs[&Tid::new("sub")].term.blocks[..],
+            &expected_blocks[..]
+        );
+    }
+}