Unverified Commit 11782c04 by Enkelmann Committed by GitHub

Remove dead register assignments (#177)

parent 9fa65999
...@@ -140,13 +140,14 @@ fn mock_program() -> Term<Program> { ...@@ -140,13 +140,14 @@ fn mock_program() -> Term<Program> {
fn backward_fixpoint() { fn backward_fixpoint() {
let project = Project { let project = Project {
program: mock_program(), program: mock_program(),
cpu_architecture: String::from("x86"), cpu_architecture: String::from("x86_64"),
stack_pointer_register: Variable { stack_pointer_register: Variable {
name: String::from("RSP"), name: String::from("RSP"),
size: ByteSize::new(8), size: ByteSize::new(8),
is_temp: false, is_temp: false,
}, },
calling_conventions: Vec::new(), calling_conventions: Vec::new(),
register_list: Vec::new(),
datatype_properties: DatatypeProperties::mock(), datatype_properties: DatatypeProperties::mock(),
}; };
......
use std::collections::HashSet;
use crate::analysis::graph::Graph;
use crate::intermediate_representation::*;
/// Given the variables that are alive after execution of the given `Def` term,
/// modify the set of variables to the ones that are alive before the execution of the `Def` term.
pub fn update_alive_vars_by_def(alive_variables: &mut HashSet<Variable>, def: &Term<Def>) {
match &def.term {
Def::Assign { var, value } => {
if alive_variables.contains(var) {
alive_variables.remove(var);
for input_var in value.input_vars() {
alive_variables.insert(input_var.clone());
}
} // The else-case is a dead store whose inputs do not change the set of alive variables.
}
Def::Load { var, address } => {
alive_variables.remove(var);
for input_var in address.input_vars() {
alive_variables.insert(input_var.clone());
}
}
Def::Store { address, value } => {
for input_var in address.input_vars() {
alive_variables.insert(input_var.clone());
}
for input_var in value.input_vars() {
alive_variables.insert(input_var.clone());
}
}
}
}
/// The context struct for the alive variables fixpoint computation.
///
/// The computation is a intraprocedural backwards fixpoint calculation
/// that stores at each node the set of all registers that are assumed to be alive.
/// A register is alive if its content is (assumed to be) read before it is overwritten by another value assignment.
pub struct Context<'a> {
/// The reversed control flow graph of the program.
graph: &'a Graph<'a>,
/// The set of all physical base registers (i.e. no sub registers).
/// This is the set of registers that are assumed to be alive at call/return instructions
/// and all other places in the control flow graph,
/// where the next instruction to be executed may not be known.
pub all_physical_registers: HashSet<Variable>,
}
impl<'a> Context<'a> {
/// Create a new context object for the given project and reversed control flow graph.
pub fn new(project: &'a Project, graph: &'a Graph) -> Context<'a> {
let all_physical_registers = project.register_list.iter().cloned().collect();
Context {
graph,
all_physical_registers,
}
}
}
impl<'a> crate::analysis::backward_interprocedural_fixpoint::Context<'a> for Context<'a> {
/// The value at each node is the set of variables that are known to be alive.
type Value = HashSet<Variable>;
/// Get the reversed control flow graph on which the fixpoint computation operates.
fn get_graph(&self) -> &Graph<'a> {
self.graph
}
/// Merge by taking the union of the two sets of alive registers.
fn merge(&self, var_set_1: &Self::Value, var_set_2: &Self::Value) -> Self::Value {
var_set_1.union(var_set_2).cloned().collect()
}
/// Update the set of alive registers according to the effect of the given `Def` term.
fn update_def(&self, alive_variables: &Self::Value, def: &Term<Def>) -> Option<Self::Value> {
let mut alive_variables = alive_variables.clone();
update_alive_vars_by_def(&mut alive_variables, def);
Some(alive_variables)
}
/// Update the set of alive registers according to the effect of the given jump term.
/// Adds input variables of jump conditions or jump target computations to the set of alive variables.
fn update_jumpsite(
&self,
alive_vars_after_jump: &Self::Value,
jump: &Term<Jmp>,
untaken_conditional: Option<&Term<Jmp>>,
_jumpsite: &Term<Blk>,
) -> Option<Self::Value> {
let mut alive_variables = alive_vars_after_jump.clone();
match &jump.term {
Jmp::CBranch {
condition: expression,
..
}
| Jmp::BranchInd(expression) => {
for input_var in expression.input_vars() {
alive_variables.insert(input_var.clone());
}
}
_ => (),
}
if let Some(Term {
tid: _,
term: Jmp::CBranch { condition, .. },
}) = untaken_conditional
{
for input_var in condition.input_vars() {
alive_variables.insert(input_var.clone());
}
}
Some(alive_variables)
}
/// At a call instruction we assume all physical registers to be alive.
/// Also adds inputs for the call target computation to the set of alive registers.
fn update_callsite(
&self,
_target_value: Option<&Self::Value>,
_return_value: Option<&Self::Value>,
_caller_sub: &Term<Sub>,
call: &Term<Jmp>,
_return_: &Term<Jmp>,
) -> Option<Self::Value> {
let mut alive_variables = self.all_physical_registers.clone();
if let Jmp::CallInd { target, .. } = &call.term {
for input_var in target.input_vars() {
alive_variables.insert(input_var.clone());
}
}
Some(alive_variables)
}
/// Interprocedural edge that is ignored by the fixpoint computation.
fn split_call_stub(&self, _combined_value: &Self::Value) -> Option<Self::Value> {
None
}
/// At a return instruction we assume all physical registers to be alive.
fn split_return_stub(
&self,
_combined_value: &Self::Value,
_returned_from_sub: &Term<Sub>,
) -> Option<Self::Value> {
Some(self.all_physical_registers.clone())
}
/// At a call instruction we assume all physical registers to be alive.
/// Also adds inputs for the call target computation to the set of alive registers.
fn update_call_stub(
&self,
_value_after_call: &Self::Value,
call: &Term<Jmp>,
) -> Option<Self::Value> {
let mut alive_variables = self.all_physical_registers.clone();
if let Jmp::CallInd { target, .. } = &call.term {
for input_var in target.input_vars() {
alive_variables.insert(input_var.clone());
}
}
Some(alive_variables)
}
/// This function just clones its input as it is not used by the fixpoint computation.
fn specialize_conditional(
&self,
alive_vars_after_jump: &Self::Value,
_condition: &Expression,
_is_true: bool,
) -> Option<Self::Value> {
Some(alive_vars_after_jump.clone())
}
}
//! This module contains a fixpoint computation to compute alive (resp. dead) variables
//! and a function to remove dead assignments from a project.
use crate::analysis::backward_interprocedural_fixpoint::create_computation;
use crate::analysis::graph::Node;
use crate::analysis::interprocedural_fixpoint_generic::NodeValue;
use crate::intermediate_representation::*;
use std::collections::{HashMap, HashSet};
mod alive_vars_computation;
use alive_vars_computation::*;
/// Compute alive variables by means of an intraprocedural fixpoint computation.
/// Returns a map that assigns to each basic block `Tid` the set of all variables
/// that are alive at the end of the basic block.
pub fn compute_alive_vars(project: &Project) -> HashMap<Tid, HashSet<Variable>> {
let extern_subs = project
.program
.term
.extern_symbols
.iter()
.map(|symbol| symbol.tid.clone())
.collect();
let mut graph = crate::analysis::graph::get_program_cfg(&project.program, extern_subs);
graph.reverse();
let context = Context::new(project, &graph);
let all_physical_registers = context.all_physical_registers.clone();
let mut computation = create_computation(context, None);
for node in graph.node_indices() {
match graph[node] {
Node::BlkStart(_, _) => (),
Node::BlkEnd(blk, _sub) => {
if graph
.neighbors_directed(node, petgraph::Incoming)
.next()
.is_none()
{
// A dead end in the CFG has no incoming edges in the reversed CFG.
// Since dead ends are mostly due to cases where the control flow graph is incomplete,
// we assume that all registers are alive at the end of the block.
let mut alive_vars = all_physical_registers.clone();
for jmp in blk.term.jmps.iter() {
match &jmp.term {
Jmp::CallInd {
target: expression, ..
}
| Jmp::BranchInd(expression)
| Jmp::CBranch {
condition: expression,
..
} => {
// The expressions may contain virtual registers
for input_var in expression.input_vars() {
alive_vars.insert(input_var.clone());
}
}
_ => (),
}
}
computation.set_node_value(node, NodeValue::Value(alive_vars));
} else {
computation.set_node_value(node, NodeValue::Value(HashSet::new()))
}
}
Node::CallReturn { .. } => {
computation.set_node_value(node, NodeValue::Value(HashSet::new()));
}
Node::CallSource { .. } => {
computation.set_node_value(
node,
NodeValue::CallFlowCombinator {
call_stub: Some(HashSet::new()),
interprocedural_flow: Some(HashSet::new()),
},
);
}
}
}
computation.compute_with_max_steps(100);
if !computation.has_stabilized() {
panic!("Fixpoint for dead register assignment removal did not stabilize.");
}
let mut results = HashMap::new();
for node in graph.node_indices() {
if let Node::BlkEnd(blk, _sub) = graph[node] {
if let Some(NodeValue::Value(alive_vars)) = computation.get_node_value(node) {
results.insert(blk.tid.clone(), alive_vars.clone());
} else {
panic!("Error during dead variable elimination computation.")
}
}
}
results
}
/// For the given `block` look up the variables alive at the end of the block via the given `alive_vars_map`
/// and then remove those register assignment `Def` terms from the block
/// that represent dead assignments.
/// An assignment is considered dead if the register is not read before its value is overwritten by another assignment.
fn remove_dead_var_assignments_of_block(
block: &mut Term<Blk>,
alive_vars_map: &HashMap<Tid, HashSet<Variable>>,
) {
let mut alive_vars = alive_vars_map.get(&block.tid).unwrap().clone();
let mut cleaned_defs = Vec::new();
for def in block.term.defs.iter().rev() {
match &def.term {
Def::Assign { var, .. } if alive_vars.get(var).is_none() => (), // Dead Assignment
_ => cleaned_defs.push(def.clone()),
}
alive_vars_computation::update_alive_vars_by_def(&mut alive_vars, def);
}
block.term.defs = cleaned_defs.into_iter().rev().collect();
}
/// Remove all dead assignments from all basic blocks in the given `project`.
pub fn remove_dead_var_assignments(project: &mut Project) {
let alive_vars_map = compute_alive_vars(project);
for sub in project.program.term.subs.iter_mut() {
for block in sub.term.blocks.iter_mut() {
remove_dead_var_assignments_of_block(block, &alive_vars_map);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn def_assign_term(term_index: u64, input: &str, output: &str) -> Term<Def> {
Def::assign(
&format!("def_{}", term_index),
Variable::mock(output, 8),
Expression::Var(Variable::mock(input, 8)),
)
}
#[test]
fn dead_assignment_removal() {
let defs = vec![
def_assign_term(1, "A", "B"),
def_assign_term(2, "B", "C"),
def_assign_term(3, "C", "RAX"), // dead assignment
def_assign_term(4, "B", "RAX"),
def_assign_term(5, "C", "RBX"),
def_assign_term(6, "A", "B"), // dead assignment, since the next assignment is dead
def_assign_term(7, "B", "C"), // dead assignment, since C is not a physical register
];
let block = Term {
tid: Tid::new("block"),
term: Blk {
defs: defs,
jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
},
};
let sub = Term {
tid: Tid::new("sub"),
term: Sub {
name: "sub".to_string(),
blocks: vec![block],
},
};
let mut project = Project::mock_empty();
project.program.term.subs.push(sub);
remove_dead_var_assignments(&mut project);
let cleaned_defs = vec![
def_assign_term(1, "A", "B"),
def_assign_term(2, "B", "C"),
def_assign_term(4, "B", "RAX"),
def_assign_term(5, "C", "RBX"),
];
assert_eq!(
&project.program.term.subs[0].term.blocks[0].term.defs,
&cleaned_defs
);
}
}
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
//! as well as analyses depending on these modules. //! as well as analyses depending on these modules.
pub mod backward_interprocedural_fixpoint; pub mod backward_interprocedural_fixpoint;
pub mod dead_variable_elimination;
pub mod fixpoint; pub mod fixpoint;
pub mod forward_interprocedural_fixpoint; pub mod forward_interprocedural_fixpoint;
pub mod graph; pub mod graph;
......
...@@ -89,12 +89,17 @@ fn mock_project() -> (Project, Config) { ...@@ -89,12 +89,17 @@ fn mock_project() -> (Project, Config) {
return_register: vec!["RDX".to_string()], return_register: vec!["RDX".to_string()],
callee_saved_register: vec!["callee_saved_reg".to_string()], callee_saved_register: vec!["callee_saved_reg".to_string()],
}; };
let register_list = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
.into_iter()
.map(|name| Variable::mock(name, ByteSize::new(8)))
.collect();
( (
Project { Project {
program: program_term, program: program_term,
cpu_architecture: "x86_64".to_string(), cpu_architecture: "x86_64".to_string(),
stack_pointer_register: register("RSP"), stack_pointer_register: register("RSP"),
calling_conventions: vec![cconv], calling_conventions: vec![cconv],
register_list,
datatype_properties: DatatypeProperties::mock(), datatype_properties: DatatypeProperties::mock(),
}, },
Config { Config {
......
...@@ -448,6 +448,9 @@ pub struct Project { ...@@ -448,6 +448,9 @@ pub struct Project {
pub stack_pointer_register: Variable, pub stack_pointer_register: Variable,
/// The known calling conventions that may be used for calls to extern functions. /// The known calling conventions that may be used for calls to extern functions.
pub calling_conventions: Vec<CallingConvention>, pub calling_conventions: Vec<CallingConvention>,
/// A list of all known physical registers for the CPU architecture.
/// Does only contain base registers, i.e. sub registers of other registers are not contained.
pub register_list: Vec<Variable>,
/// Contains the properties of C data types. (e.g. size) /// Contains the properties of C data types. (e.g. size)
pub datatype_properties: DatatypeProperties, pub datatype_properties: DatatypeProperties,
} }
...@@ -565,10 +568,13 @@ impl Project { ...@@ -565,10 +568,13 @@ impl Project {
/// Passes: /// Passes:
/// - Replace trivial expressions like `a XOR a` with their result. /// - Replace trivial expressions like `a XOR a` with their result.
/// - Replace jumps to nonexisting TIDs with jumps to an artificial sink target in the CFG. /// - Replace jumps to nonexisting TIDs with jumps to an artificial sink target in the CFG.
/// - Remove dead register assignments
#[must_use] #[must_use]
pub fn normalize(&mut self) -> Vec<LogMessage> { pub fn normalize(&mut self) -> Vec<LogMessage> {
self.substitute_trivial_expressions(); self.substitute_trivial_expressions();
self.remove_references_to_nonexisting_tids() let logs = self.remove_references_to_nonexisting_tids();
crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self);
logs
} }
} }
...@@ -665,6 +671,10 @@ mod tests { ...@@ -665,6 +671,10 @@ mod tests {
impl Project { impl Project {
pub fn mock_empty() -> Project { pub fn mock_empty() -> Project {
let register_list = vec!["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"]
.into_iter()
.map(|name| Variable::mock(name, ByteSize::new(8)))
.collect();
Project { Project {
program: Term { program: Term {
tid: Tid::new("program_tid"), tid: Tid::new("program_tid"),
...@@ -673,6 +683,7 @@ mod tests { ...@@ -673,6 +683,7 @@ mod tests {
cpu_architecture: "x86_64".to_string(), cpu_architecture: "x86_64".to_string(),
stack_pointer_register: Variable::mock("RSP", 8u64), stack_pointer_register: Variable::mock("RSP", 8u64),
calling_conventions: Vec::new(), calling_conventions: Vec::new(),
register_list,
datatype_properties: DatatypeProperties::mock(), datatype_properties: DatatypeProperties::mock(),
} }
} }
......
...@@ -371,6 +371,17 @@ pub struct RegisterProperties { ...@@ -371,6 +371,17 @@ pub struct RegisterProperties {
pub size: ByteSize, pub size: ByteSize,
} }
impl From<&RegisterProperties> for IrVariable {
/// Create a variable representing the same register as the given `register_prop`.
fn from(register_prop: &RegisterProperties) -> IrVariable {
IrVariable {
name: register_prop.register.clone(),
size: register_prop.size,
is_temp: false,
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
......
...@@ -642,6 +642,17 @@ impl Project { ...@@ -642,6 +642,17 @@ impl Project {
}); });
} }
} }
let register_list = self
.register_properties
.iter()
.filter_map(|reg| {
if reg.register == reg.base_register {
Some(reg.into())
} else {
None
}
})
.collect();
IrProject { IrProject {
program, program,
cpu_architecture: self.cpu_architecture, cpu_architecture: self.cpu_architecture,
...@@ -651,6 +662,7 @@ impl Project { ...@@ -651,6 +662,7 @@ impl Project {
.into_iter() .into_iter()
.map(|cconv| cconv.into()) .map(|cconv| cconv.into())
.collect(), .collect(),
register_list,
datatype_properties: self.datatype_properties, datatype_properties: self.datatype_properties,
} }
} }
......
...@@ -229,7 +229,6 @@ mod tests { ...@@ -229,7 +229,6 @@ mod tests {
let mut error_log = Vec::new(); let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_119", "Memory"); let mut tests = all_test_cases("cwe_119", "Memory");
mark_skipped(&mut tests, "aarch64", "clang"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "mips64", "gcc"); // TODO: Check reason for failure! mark_skipped(&mut tests, "mips64", "gcc"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "mips64el", "gcc"); // TODO: Check reason for failure! mark_skipped(&mut tests, "mips64el", "gcc"); // TODO: Check reason for failure!
mark_skipped(&mut tests, "mips", "clang"); // TODO: Check reason for failure! mark_skipped(&mut tests, "mips", "clang"); // TODO: Check reason for failure!
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment