Substitute trivial expressions (#94)

16408e2c · Enkelmann · 439dfc75 · 16408e2c · 16408e2c · 16408e2c
Commit 16408e2c authored Nov 02, 2020 by Enkelmann
7 changed files
--- a/caller/src/main.rs
+++ b/caller/src/main.rs
@@ -41,6 +41,11 @@ struct CmdlineArgs {
    #[structopt(long)]
    module_versions: bool,
+    /// Output for debugging purposes.
+    /// The current behavior of this flag is unstable and subject to change.
+    #[structopt(long, hidden = true)]
+    debug: bool,
    /// Use BAP as backend (instead of Ghidra). Requires BAP and the cwe_checker-BAP-plugin to be installed.
    #[structopt(long, hidden = true)]
    bap: bool,
@@ -132,6 +137,18 @@ fn run_with_ghidra(args: CmdlineArgs) {
    let project = get_project_from_ghidra(&Path::new(&args.binary.unwrap()));
+    // Print debug and then return.
+    // Right now there is only one debug printing function.
+    // When more debug printing modes exist, this behaviour will change!
+    if args.debug {
+        cwe_checker_rs::analysis::pointer_inference::run(
+            &project,
+            serde_json::from_value(config["Memory"].clone()).unwrap(),
+            true,
+        );
+        return;
+    }
    // Execute the modules and collect their logs and CWE-warnings.
    let mut all_logs = Vec::new();
    let mut all_cwes = Vec::new();

--- a/cwe_checker_rs/src/intermediate_representation/expression.rs
+++ b/cwe_checker_rs/src/intermediate_representation/expression.rs
@@ -76,6 +76,62 @@ impl Expression {
            Cast { size, .. } | Unknown { size, .. } | Subpiece { size, .. } => *size,
        }
    }
+    /// Substitute some trivial expressions with their result.
+    /// E.g. substitute `a XOR a` with zero or substitute `a OR a` with `a`.
+    pub fn substitute_trivial_operations(&mut self) {
+        use Expression::*;
+        match self {
+            Var(_) | Const(_) | Unknown { .. } => (),
+            Subpiece {
+                low_byte,
+                size,
+                arg,
+            } => {
+                arg.substitute_trivial_operations();
+                if *low_byte == ByteSize::new(0) && *size == arg.bytesize() {
+                    *self = (**arg).clone();
+                }
+            }
+            Cast { op, size, arg } => {
+                arg.substitute_trivial_operations();
+                if (*op == CastOpType::IntSExt || *op == CastOpType::IntZExt)
+                    && *size == arg.bytesize()
+                {
+                    *self = (**arg).clone();
+                }
+            }
+            UnOp { op: _, arg } => arg.substitute_trivial_operations(),
+            BinOp { op, lhs, rhs } => {
+                lhs.substitute_trivial_operations();
+                rhs.substitute_trivial_operations();
+                if lhs == rhs {
+                    match op {
+                        BinOpType::BoolAnd
+                        | BinOpType::BoolOr
+                        | BinOpType::IntAnd
+                        | BinOpType::IntOr => {
+                            // This is an identity operation
+                            *self = (**lhs).clone();
+                        }
+                        BinOpType::BoolXOr | BinOpType::IntXOr => {
+                            // `a xor a` always equals zero.
+                            *self = Expression::Const(Bitvector::zero(lhs.bytesize().into()));
+                        }
+                        BinOpType::IntEqual
+                        | BinOpType::IntLessEqual
+                        | BinOpType::IntSLessEqual => {
+                            *self = Expression::Const(Bitvector::one(ByteSize::new(1).into()));
+                        }
+                        BinOpType::IntNotEqual | BinOpType::IntLess | BinOpType::IntSLess => {
+                            *self = Expression::Const(Bitvector::zero(ByteSize::new(1).into()));
+                        }
+                        _ => (),
+                    }
+                }
+            }
+        }
+    }
 }
 /// The type/mnemonic of a binary operation
@@ -141,3 +197,30 @@ pub enum UnOpType {
    FloatRound,
    FloatNaN,
 }
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn trivial_expression_substitution() {
+        let mut expr = Expression::BinOp {
+            op: BinOpType::IntXOr,
+            lhs: Box::new(Expression::Var(Variable {
+                name: "RAX".into(),
+                size: ByteSize::new(8),
+                is_temp: false,
+            })),
+            rhs: Box::new(Expression::Var(Variable {
+                name: "RAX".into(),
+                size: ByteSize::new(8),
+                is_temp: false,
+            })),
+        };
+        expr.substitute_trivial_operations();
+        assert_eq!(
+            expr,
+            Expression::Const(Bitvector::zero(ByteSize::new(8).into()))
+        );
+    }
+}
--- a/cwe_checker_rs/src/intermediate_representation/term.rs
+++ b/cwe_checker_rs/src/intermediate_representation/term.rs
@@ -280,3 +280,43 @@ impl Project {
        self.stack_pointer_register.size
    }
 }
+impl Project {
+    /// For all expressions contained in the project,
+    /// replace trivially computable subexpressions like `a XOR a` with their result.
+    fn substitute_trivial_expressions(&mut self) {
+        for sub in self.program.term.subs.iter_mut() {
+            for block in sub.term.blocks.iter_mut() {
+                for def in block.term.defs.iter_mut() {
+                    match &mut def.term {
+                        Def::Assign { value: expr, .. } | Def::Load { address: expr, .. } => {
+                            expr.substitute_trivial_operations()
+                        }
+                        Def::Store { address, value } => {
+                            address.substitute_trivial_operations();
+                            value.substitute_trivial_operations();
+                        }
+                    }
+                }
+                for jmp in block.term.jmps.iter_mut() {
+                    match &mut jmp.term {
+                        Jmp::Branch(_) | Jmp::Call { .. } | Jmp::CallOther { .. } => (),
+                        Jmp::BranchInd(expr)
+                        | Jmp::CBranch {
+                            condition: expr, ..
+                        }
+                        | Jmp::CallInd { target: expr, .. }
+                        | Jmp::Return(expr) => expr.substitute_trivial_operations(),
+                    }
+                }
+            }
+        }
+    }
+    /// Run some normalization passes over the project.
+    ///
+    /// Right now this only replaces trivial expressions like `a XOR a` with their result.
+    pub fn normalize(&mut self) {
+        self.substitute_trivial_expressions();
+    }
+}
--- a/cwe_checker_rs/src/pcode/expressions.rs
+++ b/cwe_checker_rs/src/pcode/expressions.rs
@@ -124,8 +124,13 @@ pub struct Expression {
 impl From<Expression> for IrExpression {
    /// Translates a P-Code expression into an expression of the internally used IR if possible.
-    /// Panics if translation is not possible,
+    /// Panics if translation is not possible.
-    /// e.g. for `LOAD`, `STORE` and and expressions that need the size of the output variable to be defined.
+    ///
+    /// Cases where translation is not possible:
+    /// - `LOAD` and `STORE`, since these are not expressions (they have side effects).
+    /// - Expressions which store the size of their output in the output variable (to which we do not have access here).
+    /// These include `SUBPIECE`, `INT_ZEXT`, `INT_SEXT`, `INT2FLOAT`, `FLOAT2FLOAT` and `TRUNC`.
+    /// Translation of these expressions is handled explicitly during translation of `Def`.
    fn from(expr: Expression) -> IrExpression {
        use ExpressionType::*;
        match expr.mnemonic {

--- a/cwe_checker_rs/src/pcode/term.rs
+++ b/cwe_checker_rs/src/pcode/term.rs
@@ -412,7 +412,7 @@ impl From<Project> for IrProject {
            tid: project.program.tid,
            term: project.program.term.into(),
        };
-        IrProject {
+        let mut ir_project = IrProject {
            program,
            cpu_architecture: project.cpu_architecture,
            stack_pointer_register: project.stack_pointer_register.into(),
@@ -421,7 +421,9 @@ impl From<Project> for IrProject {
                .into_iter()
                .map(|cconv| cconv.into())
                .collect(),
-        }
+        };
+        ir_project.normalize();
+        ir_project
    }
 }

--- a/cwe_checker_rs/src/term/mod.rs
+++ b/cwe_checker_rs/src/term/mod.rs
@@ -362,12 +362,14 @@ impl From<Project> for IrProject {
            return_register: project.return_registers,
            callee_saved_register: project.callee_saved_registers,
        };
-        IrProject {
+        let mut ir_project = IrProject {
            program,
            cpu_architecture: project.cpu_architecture,
            stack_pointer_register: project.stack_pointer_register.into(),
            calling_conventions: vec![default_cconv],
-        }
+        };
+        ir_project.normalize();
+        ir_project
    }
 }

--- a/test/src/lib.rs
+++ b/test/src/lib.rs
@@ -178,9 +178,8 @@ mod tests {
        mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure!
        mark_architecture_skipped(&mut tests, "mips"); // TODO: Check reason for failure!
        mark_architecture_skipped(&mut tests, "mipsel"); // TODO: Check reason for failure!
-        mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure!
+        mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
-        mark_architecture_skipped(&mut tests, "ppc64le"); // TODO: Check reason for failure!
+        mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
-        mark_architecture_skipped(&mut tests, "ppc"); // TODO: Check reason for failure!
        mark_skipped(&mut tests, "x86", "gcc"); // TODO: Check reason for failure!
        mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
@@ -206,9 +205,8 @@ mod tests {
        mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure!
        mark_architecture_skipped(&mut tests, "mips"); // TODO: Check reason for failure!
        mark_architecture_skipped(&mut tests, "mipsel"); // TODO: Check reason for failure!
-        mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure!
+        mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
-        mark_architecture_skipped(&mut tests, "ppc64le"); // TODO: Check reason for failure!
+        mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
-        mark_architecture_skipped(&mut tests, "ppc"); // TODO: Check reason for failure!
        mark_architecture_skipped(&mut tests, "x86"); // TODO: Check reason for failure!
        mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
@@ -234,8 +232,8 @@ mod tests {
        mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure!
        mark_skipped(&mut tests, "mips", "gcc"); // TODO: Check reason for failure!
        mark_skipped(&mut tests, "mipsel", "gcc"); // TODO: Check reason for failure!
-        mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure!
+        mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
-        mark_architecture_skipped(&mut tests, "ppc64le"); // TODO: Check reason for failure!
+        mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
        mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
        for test_case in tests {