Commit 16408e2c by Enkelmann

Substitute trivial expressions (#94)

parent 439dfc75
...@@ -41,6 +41,11 @@ struct CmdlineArgs { ...@@ -41,6 +41,11 @@ struct CmdlineArgs {
#[structopt(long)] #[structopt(long)]
module_versions: bool, module_versions: bool,
/// Output for debugging purposes.
/// The current behavior of this flag is unstable and subject to change.
#[structopt(long, hidden = true)]
debug: bool,
/// Use BAP as backend (instead of Ghidra). Requires BAP and the cwe_checker-BAP-plugin to be installed. /// Use BAP as backend (instead of Ghidra). Requires BAP and the cwe_checker-BAP-plugin to be installed.
#[structopt(long, hidden = true)] #[structopt(long, hidden = true)]
bap: bool, bap: bool,
...@@ -132,6 +137,18 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -132,6 +137,18 @@ fn run_with_ghidra(args: CmdlineArgs) {
let project = get_project_from_ghidra(&Path::new(&args.binary.unwrap())); let project = get_project_from_ghidra(&Path::new(&args.binary.unwrap()));
// Print debug and then return.
// Right now there is only one debug printing function.
// When more debug printing modes exist, this behaviour will change!
if args.debug {
cwe_checker_rs::analysis::pointer_inference::run(
&project,
serde_json::from_value(config["Memory"].clone()).unwrap(),
true,
);
return;
}
// Execute the modules and collect their logs and CWE-warnings. // Execute the modules and collect their logs and CWE-warnings.
let mut all_logs = Vec::new(); let mut all_logs = Vec::new();
let mut all_cwes = Vec::new(); let mut all_cwes = Vec::new();
......
...@@ -76,6 +76,62 @@ impl Expression { ...@@ -76,6 +76,62 @@ impl Expression {
Cast { size, .. } | Unknown { size, .. } | Subpiece { size, .. } => *size, Cast { size, .. } | Unknown { size, .. } | Subpiece { size, .. } => *size,
} }
} }
/// Substitute some trivial expressions with their result.
/// E.g. substitute `a XOR a` with zero or substitute `a OR a` with `a`.
pub fn substitute_trivial_operations(&mut self) {
use Expression::*;
match self {
Var(_) | Const(_) | Unknown { .. } => (),
Subpiece {
low_byte,
size,
arg,
} => {
arg.substitute_trivial_operations();
if *low_byte == ByteSize::new(0) && *size == arg.bytesize() {
*self = (**arg).clone();
}
}
Cast { op, size, arg } => {
arg.substitute_trivial_operations();
if (*op == CastOpType::IntSExt || *op == CastOpType::IntZExt)
&& *size == arg.bytesize()
{
*self = (**arg).clone();
}
}
UnOp { op: _, arg } => arg.substitute_trivial_operations(),
BinOp { op, lhs, rhs } => {
lhs.substitute_trivial_operations();
rhs.substitute_trivial_operations();
if lhs == rhs {
match op {
BinOpType::BoolAnd
| BinOpType::BoolOr
| BinOpType::IntAnd
| BinOpType::IntOr => {
// This is an identity operation
*self = (**lhs).clone();
}
BinOpType::BoolXOr | BinOpType::IntXOr => {
// `a xor a` always equals zero.
*self = Expression::Const(Bitvector::zero(lhs.bytesize().into()));
}
BinOpType::IntEqual
| BinOpType::IntLessEqual
| BinOpType::IntSLessEqual => {
*self = Expression::Const(Bitvector::one(ByteSize::new(1).into()));
}
BinOpType::IntNotEqual | BinOpType::IntLess | BinOpType::IntSLess => {
*self = Expression::Const(Bitvector::zero(ByteSize::new(1).into()));
}
_ => (),
}
}
}
}
}
} }
/// The type/mnemonic of a binary operation /// The type/mnemonic of a binary operation
...@@ -141,3 +197,30 @@ pub enum UnOpType { ...@@ -141,3 +197,30 @@ pub enum UnOpType {
FloatRound, FloatRound,
FloatNaN, FloatNaN,
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn trivial_expression_substitution() {
let mut expr = Expression::BinOp {
op: BinOpType::IntXOr,
lhs: Box::new(Expression::Var(Variable {
name: "RAX".into(),
size: ByteSize::new(8),
is_temp: false,
})),
rhs: Box::new(Expression::Var(Variable {
name: "RAX".into(),
size: ByteSize::new(8),
is_temp: false,
})),
};
expr.substitute_trivial_operations();
assert_eq!(
expr,
Expression::Const(Bitvector::zero(ByteSize::new(8).into()))
);
}
}
...@@ -280,3 +280,43 @@ impl Project { ...@@ -280,3 +280,43 @@ impl Project {
self.stack_pointer_register.size self.stack_pointer_register.size
} }
} }
impl Project {
/// For all expressions contained in the project,
/// replace trivially computable subexpressions like `a XOR a` with their result.
fn substitute_trivial_expressions(&mut self) {
for sub in self.program.term.subs.iter_mut() {
for block in sub.term.blocks.iter_mut() {
for def in block.term.defs.iter_mut() {
match &mut def.term {
Def::Assign { value: expr, .. } | Def::Load { address: expr, .. } => {
expr.substitute_trivial_operations()
}
Def::Store { address, value } => {
address.substitute_trivial_operations();
value.substitute_trivial_operations();
}
}
}
for jmp in block.term.jmps.iter_mut() {
match &mut jmp.term {
Jmp::Branch(_) | Jmp::Call { .. } | Jmp::CallOther { .. } => (),
Jmp::BranchInd(expr)
| Jmp::CBranch {
condition: expr, ..
}
| Jmp::CallInd { target: expr, .. }
| Jmp::Return(expr) => expr.substitute_trivial_operations(),
}
}
}
}
}
/// Run some normalization passes over the project.
///
/// Right now this only replaces trivial expressions like `a XOR a` with their result.
pub fn normalize(&mut self) {
self.substitute_trivial_expressions();
}
}
...@@ -124,8 +124,13 @@ pub struct Expression { ...@@ -124,8 +124,13 @@ pub struct Expression {
impl From<Expression> for IrExpression { impl From<Expression> for IrExpression {
/// Translates a P-Code expression into an expression of the internally used IR if possible. /// Translates a P-Code expression into an expression of the internally used IR if possible.
/// Panics if translation is not possible, /// Panics if translation is not possible.
/// e.g. for `LOAD`, `STORE` and and expressions that need the size of the output variable to be defined. ///
/// Cases where translation is not possible:
/// - `LOAD` and `STORE`, since these are not expressions (they have side effects).
/// - Expressions which store the size of their output in the output variable (to which we do not have access here).
/// These include `SUBPIECE`, `INT_ZEXT`, `INT_SEXT`, `INT2FLOAT`, `FLOAT2FLOAT` and `TRUNC`.
/// Translation of these expressions is handled explicitly during translation of `Def`.
fn from(expr: Expression) -> IrExpression { fn from(expr: Expression) -> IrExpression {
use ExpressionType::*; use ExpressionType::*;
match expr.mnemonic { match expr.mnemonic {
......
...@@ -412,7 +412,7 @@ impl From<Project> for IrProject { ...@@ -412,7 +412,7 @@ impl From<Project> for IrProject {
tid: project.program.tid, tid: project.program.tid,
term: project.program.term.into(), term: project.program.term.into(),
}; };
IrProject { let mut ir_project = IrProject {
program, program,
cpu_architecture: project.cpu_architecture, cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(), stack_pointer_register: project.stack_pointer_register.into(),
...@@ -421,7 +421,9 @@ impl From<Project> for IrProject { ...@@ -421,7 +421,9 @@ impl From<Project> for IrProject {
.into_iter() .into_iter()
.map(|cconv| cconv.into()) .map(|cconv| cconv.into())
.collect(), .collect(),
} };
ir_project.normalize();
ir_project
} }
} }
......
...@@ -362,12 +362,14 @@ impl From<Project> for IrProject { ...@@ -362,12 +362,14 @@ impl From<Project> for IrProject {
return_register: project.return_registers, return_register: project.return_registers,
callee_saved_register: project.callee_saved_registers, callee_saved_register: project.callee_saved_registers,
}; };
IrProject { let mut ir_project = IrProject {
program, program,
cpu_architecture: project.cpu_architecture, cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(), stack_pointer_register: project.stack_pointer_register.into(),
calling_conventions: vec![default_cconv], calling_conventions: vec![default_cconv],
} };
ir_project.normalize();
ir_project
} }
} }
......
...@@ -178,9 +178,8 @@ mod tests { ...@@ -178,9 +178,8 @@ mod tests {
mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "mips"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mips"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "mipsel"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mipsel"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "x86", "gcc"); // TODO: Check reason for failure! mark_skipped(&mut tests, "x86", "gcc"); // TODO: Check reason for failure!
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure! mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
...@@ -206,9 +205,8 @@ mod tests { ...@@ -206,9 +205,8 @@ mod tests {
mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "mips"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mips"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "mipsel"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mipsel"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "x86"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "x86"); // TODO: Check reason for failure!
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure! mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
...@@ -234,8 +232,8 @@ mod tests { ...@@ -234,8 +232,8 @@ mod tests {
mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "mips64el"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "mips", "gcc"); // TODO: Check reason for failure! mark_skipped(&mut tests, "mips", "gcc"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "mipsel", "gcc"); // TODO: Check reason for failure! mark_skipped(&mut tests, "mipsel", "gcc"); // TODO: Check reason for failure!
mark_architecture_skipped(&mut tests, "ppc64"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "ppc64"); // Ghidra generates mangled function names here for some reason.
mark_architecture_skipped(&mut tests, "ppc64le"); // TODO: Check reason for failure! mark_architecture_skipped(&mut tests, "ppc64le"); // Ghidra generates mangled function names here for some reason.
mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure! mark_compiler_skipped(&mut tests, "mingw32-gcc"); // TODO: Check reason for failure!
for test_case in tests { for test_case in tests {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment