Unverified Commit 9a0ae7a3 by Enkelmann Committed by GitHub

IR refactoring (#205)

parent 3a25050e
use super::{CastOpType, Expression, Variable};
use crate::prelude::*;
/// A side-effectful operation.
/// Can be a register assignment or a memory load/store operation.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Def {
/// A memory load into the register given by `var`.
Load {
/// The target register of the memory load.
/// The size of `var` also determines the number of bytes read from memory.
var: Variable,
/// The expression computing the address from which to read from.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
address: Expression,
},
/// A memory store operation.
Store {
/// The expression computing the address that is written to.
/// The size of `address` is required to match the pointer size of the corresponding CPU architecture.
address: Expression,
/// The expression computing the value that is written to memory.
/// The size of `value` also determines the number of bytes written.
value: Expression,
},
/// A register assignment, assigning the result of the expression `value` to the register `var`.
Assign {
/// The register that is written to.
var: Variable,
/// The expression computing the value that is assigned to the register.
value: Expression,
},
}
impl Term<Def> {
/// This function checks whether the instruction
/// is a zero extension of the overwritten sub register of the previous instruction.
/// If so, returns its TID
pub fn check_for_zero_extension(
&self,
output_name: String,
output_sub_register: String,
) -> Option<Tid> {
match &self.term {
Def::Assign {
var,
value:
Expression::Cast {
op: CastOpType::IntZExt,
arg,
..
},
} if output_name == var.name => {
let argument: &Expression = arg;
match argument {
Expression::Var(var) if var.name == output_sub_register => {
Some(self.tid.clone())
}
_ => None,
}
}
_ => None,
}
}
/// Substitute every occurence of `input_var` in the address and value expressions
/// with `replace_with_expression`.
/// Does not change the target variable of assignment- and load-instructions.
pub fn substitute_input_var(
&mut self,
input_var: &Variable,
replace_with_expression: &Expression,
) {
match &mut self.term {
Def::Assign { var: _, value } => {
value.substitute_input_var(input_var, replace_with_expression)
}
Def::Load { var: _, address } => {
address.substitute_input_var(input_var, replace_with_expression)
}
Def::Store { address, value } => {
address.substitute_input_var(input_var, replace_with_expression);
value.substitute_input_var(input_var, replace_with_expression);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::intermediate_representation::BinOpType;
#[test]
fn zero_extension_check() {
let eax_variable = Expression::Var(Variable {
name: String::from("EAX"),
size: ByteSize::new(4),
is_temp: false,
});
let int_sub_expr = Expression::BinOp {
op: BinOpType::IntSub,
lhs: Box::new(Expression::Var(Variable {
name: String::from("EAX"),
size: ByteSize::new(4),
is_temp: false,
})),
rhs: Box::new(Expression::Var(Variable {
name: String::from("ECX"),
size: ByteSize::new(4),
is_temp: false,
})),
};
let zero_extend_def = Term {
tid: Tid::new("zero_tid"),
term: Def::Assign {
var: Variable {
name: String::from("RAX"),
size: ByteSize::new(8),
is_temp: false,
},
value: Expression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(eax_variable.clone()),
},
},
};
// An expression that is a zero extension but does not directly contain a variable
let zero_extend_but_no_var_def = Term {
tid: Tid::new("zero_tid"),
term: Def::Assign {
var: Variable {
name: String::from("RAX"),
size: ByteSize::new(8),
is_temp: false,
},
value: Expression::Cast {
op: CastOpType::IntZExt,
size: ByteSize::new(8),
arg: Box::new(int_sub_expr.clone()),
},
},
};
let non_zero_extend_def = Term {
tid: Tid::new("zero_tid"),
term: Def::Assign {
var: Variable {
name: String::from("RAX"),
size: ByteSize::new(8),
is_temp: false,
},
value: Expression::Cast {
op: CastOpType::IntSExt,
size: ByteSize::new(8),
arg: Box::new(eax_variable.clone()),
},
},
};
assert_eq!(
zero_extend_def.check_for_zero_extension(String::from("RAX"), String::from("EAX")),
Some(Tid::new("zero_tid"))
);
assert_eq!(
zero_extend_but_no_var_def
.check_for_zero_extension(String::from("RAX"), String::from("EAX")),
None
);
assert_eq!(
non_zero_extend_def.check_for_zero_extension(String::from("RAX"), String::from("EAX")),
None
);
}
}
use super::Expression;
use crate::prelude::*;
/// A `Jmp` instruction affects the control flow of a program, i.e. it may change the instruction pointer.
/// With the exception of `CallOther`, it has no other side effects.
///
/// `Jmp` instructions carry some semantic information with it, like whether a jump is intra- or interprocedural.
/// Note that this semantic information may not always be correct.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Jmp {
/// A direct intraprocedural jump to the targeted `Blk` term identifier.
Branch(Tid),
/// An indirect intraprocedural jump to the address that the given expression evaluates to.
BranchInd(Expression),
/// A direct intraprocedural jump that is only taken if the condition evaluates to true (i.e. not zero).
CBranch {
/// The term ID of the target block of the jump.
target: Tid,
/// The jump is only taken if this expression evaluates to `true`, (i.e. not zero).
condition: Expression,
},
/// A direct interprocedural jump representing a subroutine call.
///
/// Note that this is syntactically equivalent to a `Jmp::Branch`.
Call {
/// The term ID of the target subroutine (`Sub`) or extern symbol of the call.
target: Tid,
/// The term ID of the block that the called function returns to.
/// May be `None` if it is assumed that the called function never returns.
return_: Option<Tid>,
},
/// An indirect interprocedural jump to the address the `target` expression evaluates to
/// and representing a subroutine call.
///
/// Note that this is syntactically equivalent to a `Jmp::BranchInd`.
CallInd {
/// An expression computing the target address of the call.
target: Expression,
/// The term ID of the block that the called function returns to.
/// May be `None` if it is assumed that the called function never returns.
return_: Option<Tid>,
},
/// A indirect interprocedural jump indicating a return from a subroutine.
///
/// Note that this is syntactically equivalent to a `Jmp::BranchInd`.
Return(Expression),
/// This instruction is used for all side effects that are not representable by other instructions
/// or not supported by the disassembler.
///
/// E.g. syscalls and other interrupts are mapped to `CallOther`.
/// Assembly instructions that the disassembler does not support are also mapped to `CallOther`.
/// One can use the `description` field to match for and handle known side effects (e.g. syscalls).
CallOther {
/// A description of the side effect.
description: String,
/// The block term identifier of the block
/// where the disassembler assumes that execution will continue after handling of the side effect.
return_: Option<Tid>,
},
}
......@@ -18,6 +18,18 @@ mod expression;
pub use expression::*;
mod term;
pub use term::*;
mod def;
pub use def::*;
mod jmp;
pub use jmp::*;
mod blk;
pub use blk::*;
mod sub;
pub use sub::*;
mod program;
pub use program::*;
mod project;
pub use project::*;
/// An unsigned number of bytes.
///
......@@ -172,9 +184,24 @@ impl From<String> for Datatype {
#[cfg(test)]
mod tests {
use super::*;
use apint::BitWidth;
use super::*;
impl DatatypeProperties {
pub fn mock() -> DatatypeProperties {
DatatypeProperties {
char_size: ByteSize::new(1),
double_size: ByteSize::new(8),
float_size: ByteSize::new(4),
integer_size: ByteSize::new(4),
long_double_size: ByteSize::new(8),
long_long_size: ByteSize::new(8),
long_size: ByteSize::new(4),
pointer_size: ByteSize::new(8),
short_size: ByteSize::new(2),
}
}
}
#[test]
fn check_bit_to_byte_conversion() {
......
use super::{Blk, ExternSymbol, Sub};
use crate::prelude::*;
/// The `Program` structure represents a disassembled binary.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
/// The known functions contained in the binary
pub subs: Vec<Term<Sub>>,
/// Extern symbols linked to the binary by the linker.
pub extern_symbols: Vec<ExternSymbol>,
/// Entry points into to binary,
/// i.e. the term identifiers of functions that may be called from outside of the binary.
pub entry_points: Vec<Tid>,
/// An offset that has been added to all addresses in the program compared to the addresses
/// as specified in the binary file.
///
/// In certain cases, e.g. if the binary specifies a segment to be loaded at address 0,
/// the Ghidra backend may shift the whole binary image by a constant value in memory.
/// Thus addresses as specified by the binary and addresses as reported by Ghidra may differ by a constant offset,
/// which is stored in this value.
pub address_base_offset: u64,
}
impl Program {
/// Find a block term by its term identifier.
/// WARNING: The function simply iterates through all blocks,
/// i.e. it is very inefficient for large projects!
pub fn find_block(&self, tid: &Tid) -> Option<&Term<Blk>> {
self.subs
.iter()
.map(|sub| sub.term.blocks.iter())
.flatten()
.find(|block| block.tid == *tid)
}
}
#[cfg(test)]
mod tests {
use super::*;
impl Program {
pub fn mock_empty() -> Program {
Program {
subs: Vec::new(),
extern_symbols: Vec::new(),
entry_points: Vec::new(),
address_base_offset: 0,
}
}
}
}
use super::{Blk, Datatype, Project, Variable};
use crate::prelude::*;
/// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it.
///
/// Subroutines are *single-entry*,
/// i.e. calling a subroutine will execute the first block in the list of basic blocks.
/// A subroutine may have multiple exits, which are identified by `Jmp::Return` instructions.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Sub {
/// The name of the subroutine
pub name: String,
/// The basic blocks belonging to the subroutine.
/// The first block is also the entry point of the subroutine.
pub blocks: Vec<Term<Blk>>,
}
/// A parameter or return argument of a function.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Arg {
/// The argument is passed in the given register
Register {
/// The variable object representing the register.
var: Variable,
/// An optional data type indicator.
data_type: Option<Datatype>,
},
/// The argument is passed on the stack.
/// It is positioned at the given offset (in bytes) relative to the stack pointer on function entry
/// and has the given size.
Stack {
/// The position of the argument on the stack
/// given as offset relative to the stack pointer on function entry.
offset: i64,
/// The size in bytes of the argument.
size: ByteSize,
/// An optional data type indicator.
data_type: Option<Datatype>,
},
}
impl Arg {
/// Returns the data type field of an Arg object.
pub fn get_data_type(&self) -> Option<Datatype> {
match self {
Arg::Register { data_type, .. } => data_type.clone(),
Arg::Stack { data_type, .. } => data_type.clone(),
}
}
}
/// An extern symbol represents a funtion that is dynamically linked from another binary.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
/// The term ID of the extern symbol.
pub tid: Tid,
/// Addresses of possibly multiple locations of the same extern symbol
pub addresses: Vec<String>,
/// The name of the extern symbol
pub name: String,
/// The calling convention used for the extern symbol if known
pub calling_convention: Option<String>,
/// Parameters of an extern symbol.
/// May be empty if there are no parameters or the parameters are unknown.
pub parameters: Vec<Arg>,
/// Return values of an extern symbol.
/// May be empty if there is no return value or the return values are unknown.
pub return_values: Vec<Arg>,
/// If set to `true`, the function is assumed to never return to its caller when called.
pub no_return: bool,
/// If the function has a variable number of parameters, this flag is set to `true`.
pub has_var_args: bool,
}
impl ExternSymbol {
/// If the extern symbol has exactly one return value that is passed in a register,
/// return the register.
pub fn get_unique_return_register(&self) -> Result<&Variable, Error> {
if self.return_values.len() == 1 {
match self.return_values[0] {
Arg::Register { ref var, .. } => Ok(var),
Arg::Stack { .. } => Err(anyhow!("Return value is passed on the stack")),
}
} else {
Err(anyhow!("Wrong number of return values"))
}
}
/// If the extern symbol has exactly one parameter, return the parameter.
pub fn get_unique_parameter(&self) -> Result<&Arg, Error> {
if self.parameters.len() == 1 {
Ok(&self.parameters[0])
} else {
Err(anyhow!("Wrong number of parameter values"))
}
}
/// Get the calling convention corresponding to the extern symbol.
pub fn get_calling_convention<'a>(&self, project: &'a Project) -> &'a CallingConvention {
let cconv_name: &str = self.calling_convention.as_deref().unwrap_or("default");
project
.calling_conventions
.iter()
.find(|cconv| cconv.name == cconv_name)
.unwrap()
}
}
/// Calling convention related data
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct CallingConvention {
/// The name of the calling convention
#[serde(rename = "calling_convention")]
pub name: String,
/// Possible integer parameter registers.
pub integer_parameter_register: Vec<String>,
/// Possible float parameter registers.
pub float_parameter_register: Vec<String>,
/// A list of possible return register
pub return_register: Vec<String>,
/// A list of callee-saved register,
/// i.e. the values of these registers should be the same after the call as they were before the call.
pub callee_saved_register: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
impl Sub {
pub fn mock(name: impl ToString) -> Term<Sub> {
Term {
tid: Tid::new(name.to_string()),
term: Sub {
name: name.to_string(),
blocks: Vec::new(),
},
}
}
}
impl CallingConvention {
pub fn mock() -> CallingConvention {
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register: vec!["RDI".to_string()],
float_parameter_register: vec!["XMMO".to_string()],
return_register: vec!["RAX".to_string()],
callee_saved_register: vec!["RBP".to_string()],
}
}
pub fn mock_with_parameter_registers(
integer_parameter_register: Vec<String>,
float_parameter_register: Vec<String>,
) -> CallingConvention {
CallingConvention {
name: "__stdcall".to_string(), // so that the mock is useable as standard calling convention in tests
integer_parameter_register,
float_parameter_register,
return_register: vec!["RAX".to_string()],
callee_saved_register: vec!["RBP".to_string()],
}
}
}
impl Arg {
pub fn mock_register(name: impl ToString, size_in_bytes: impl Into<ByteSize>) -> Arg {
Arg::Register {
var: Variable::mock(name.to_string(), size_in_bytes),
data_type: None,
}
}
}
impl ExternSymbol {
pub fn mock() -> ExternSymbol {
ExternSymbol {
tid: Tid::new("mock_symbol"),
addresses: vec!["UNKNOWN".to_string()],
name: "mock_symbol".to_string(),
calling_convention: Some("__stdcall".to_string()),
parameters: vec![Arg::mock_register("RDI", 8)],
return_values: vec![Arg::mock_register("RAX", 8)],
no_return: false,
has_var_args: false,
}
}
}
}
......@@ -2,10 +2,10 @@
//! for different terms.
#[cfg(test)]
use crate::intermediate_representation::{Expression, Variable};
use crate::intermediate_representation::{Def, Expression, Jmp, Variable};
#[cfg(test)]
use super::{Def, Jmp, Term, Tid};
use super::{Term, Tid};
/// ## Helper functions for building defs
#[cfg(test)]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment