Commit c1e142eb by Enkelmann Committed by Enkelmann

Finish P-Code to internal IR deserialization.

parent b70f9910
...@@ -266,7 +266,7 @@ impl From<Expression> for IrExpression { ...@@ -266,7 +266,7 @@ impl From<Expression> for IrExpression {
}, },
HIGH => { HIGH => {
assert!(width % 8 == 0); assert!(width % 8 == 0);
let low_byte = (arg.bitsize() - BitSize::from(width)).into(); let low_byte = (arg.bitsize() - width).into();
IrExpression::Subpiece { IrExpression::Subpiece {
arg: Box::new(IrExpression::from(*arg)), arg: Box::new(IrExpression::from(*arg)),
low_byte, low_byte,
......
use crate::prelude::*; use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*; use derive_more::*;
use std::convert::TryFrom; use std::convert::TryFrom;
...@@ -60,3 +59,17 @@ impl From<ByteSize> for apint::BitWidth { ...@@ -60,3 +59,17 @@ impl From<ByteSize> for apint::BitWidth {
apint::BitWidth::from((u64::from(bytesize) * 8) as usize) apint::BitWidth::from((u64::from(bytesize) * 8) as usize)
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_bit_to_byte_conversion() {
let bits: BitSize = 8;
let bytes: ByteSize = bits.into();
assert_eq!(u64::from(bytes), 1);
let bits: BitSize = bytes.into();
assert_eq!(bits, 8);
}
}
...@@ -52,3 +52,33 @@ pub struct Sub { ...@@ -52,3 +52,33 @@ pub struct Sub {
pub name: String, pub name: String,
pub blocks: Vec<Term<Blk>>, pub blocks: Vec<Term<Blk>>,
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Arg {
Register(Variable),
Stack { offset: i64, size: ByteSize },
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub name: String,
pub calling_convention: Option<String>,
pub parameters: Vec<Arg>,
pub return_values: Vec<Arg>,
pub no_return: bool,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
}
use super::Def;
use crate::intermediate_representation::BinOpType as IrBinOpType; use crate::intermediate_representation::BinOpType as IrBinOpType;
use crate::intermediate_representation::ByteSize; use crate::intermediate_representation::ByteSize;
use crate::intermediate_representation::CastOpType as IrCastOpType; use crate::intermediate_representation::CastOpType as IrCastOpType;
...@@ -5,17 +6,19 @@ use crate::intermediate_representation::Expression as IrExpression; ...@@ -5,17 +6,19 @@ use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::UnOpType as IrUnOpType; use crate::intermediate_representation::UnOpType as IrUnOpType;
use crate::intermediate_representation::Variable as IrVariable; use crate::intermediate_representation::Variable as IrVariable;
use crate::prelude::*; use crate::prelude::*;
use crate::term::{Term, Tid};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Variable { pub struct Variable {
pub name: Option<String>, pub name: Option<String>,
pub value: Option<String>, pub value: Option<String>,
pub address: Option<String>,
pub size: ByteSize, pub size: ByteSize,
pub is_virtual: bool, pub is_virtual: bool,
} }
impl From<Variable> for IrVariable { impl From<Variable> for IrVariable {
/// Translate a P-Code variable into a register variable of the internally used IR.
/// Panic if the variable does not represent a register.
fn from(pcode_var: Variable) -> IrVariable { fn from(pcode_var: Variable) -> IrVariable {
IrVariable { IrVariable {
name: pcode_var.name.unwrap(), name: pcode_var.name.unwrap(),
...@@ -26,12 +29,24 @@ impl From<Variable> for IrVariable { ...@@ -26,12 +29,24 @@ impl From<Variable> for IrVariable {
} }
impl From<Variable> for IrExpression { impl From<Variable> for IrExpression {
/// Translate a P-Code variable into a `Var`or `Const` expression of the internally used IR.
/// Panics if the translation fails.
fn from(pcode_var: Variable) -> IrExpression { fn from(pcode_var: Variable) -> IrExpression {
match (&pcode_var.name, &pcode_var.value) { match (&pcode_var.name, &pcode_var.value) {
(Some(_name), None) => IrExpression::Var(pcode_var.into()), (Some(_name), None) => IrExpression::Var(pcode_var.into()),
(None, Some(hex_value)) => { (None, Some(_hex_value)) => IrExpression::Const(pcode_var.parse_to_bitvector()),
_ => panic!("Conversion failed:\n{:?}", pcode_var),
}
}
}
impl Variable {
/// Parses a variable representing a concrete value or a concrete address to a bitvector containing the value or address.
pub fn parse_to_bitvector(&self) -> Bitvector {
match (&self.value, &self.address) {
(Some(hex_value), None) | (None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values. // TODO: Implement parsing for large hex values.
if u64::from(pcode_var.size) > 8 { if u64::from(self.size) > 8 {
panic!( panic!(
"Parsing of immediates greater than 8 bytes not yet implemented: {}", "Parsing of immediates greater than 8 bytes not yet implemented: {}",
hex_value hex_value
...@@ -39,25 +54,58 @@ impl From<Variable> for IrExpression { ...@@ -39,25 +54,58 @@ impl From<Variable> for IrExpression {
} }
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap(); let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
let mut bitvector: Bitvector = Bitvector::from_u64(val); let mut bitvector: Bitvector = Bitvector::from_u64(val);
bitvector.truncate(pcode_var.size).unwrap(); bitvector.truncate(self.size).unwrap();
IrExpression::Const(bitvector) bitvector
} }
_ => panic!(), _ => panic!(),
} }
} }
}
impl From<Variable> for ByteSize { /// Generate a virtual variable with the given name and size.
fn from(pcode_var: Variable) -> ByteSize { pub fn new_virtual(name: impl Into<String>, size: ByteSize) -> Variable {
match (&pcode_var.name, &pcode_var.value) { Variable {
(None, Some(hex_value)) => { name: Some(name.into()),
// TODO: Implement parsing for large hex values. value: None,
if u64::from(pcode_var.size) > 8 { address: None,
panic!( size,
"Parsing of immediates greater than 8 bytes not yet implemented: {}", is_virtual: true,
hex_value
);
} }
}
/// Generate a variable representing a constant
pub fn new_const(value_string: impl Into<String>, size: ByteSize) -> Variable {
Variable {
name: None,
value: Some(value_string.into()),
address: None,
size,
is_virtual: false,
}
}
/// Create a LOAD instruction out of a variable representing a load from a constant address into a virtual register.
///
/// Note that the address pointer size gets set to zero, since the function does not know the correct size for pointers.
pub fn to_load_def(&self, target_register_name: impl Into<String>) -> Def {
Def {
lhs: Some(Variable::new_virtual(target_register_name, self.size)),
rhs: Expression {
mnemonic: ExpressionType::LOAD,
input0: None,
input1: Some(Variable::new_const(
self.address.as_ref().unwrap(),
ByteSize::from(0 as u64), // We do not know the correct pointer size here.
)),
input2: None,
},
}
}
/// Translates a variable into the byte size that it represents. Panics on error.
pub fn parse_to_bytesize(self) -> ByteSize {
match (&self.name, &self.value) {
(None, Some(hex_value)) => {
assert!(u64::from(self.size) <= 8);
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap(); let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
val.into() val.into()
} }
...@@ -144,6 +192,8 @@ pub enum ExpressionType { ...@@ -144,6 +192,8 @@ pub enum ExpressionType {
} }
impl From<ExpressionType> for IrBinOpType { impl From<ExpressionType> for IrBinOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrBinOpType { fn from(expr_type: ExpressionType) -> IrBinOpType {
use ExpressionType::*; use ExpressionType::*;
use IrBinOpType::*; use IrBinOpType::*;
...@@ -195,6 +245,8 @@ impl From<ExpressionType> for IrBinOpType { ...@@ -195,6 +245,8 @@ impl From<ExpressionType> for IrBinOpType {
} }
impl From<ExpressionType> for IrUnOpType { impl From<ExpressionType> for IrUnOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrUnOpType { fn from(expr_type: ExpressionType) -> IrUnOpType {
use ExpressionType::*; use ExpressionType::*;
match expr_type { match expr_type {
...@@ -214,6 +266,8 @@ impl From<ExpressionType> for IrUnOpType { ...@@ -214,6 +266,8 @@ impl From<ExpressionType> for IrUnOpType {
} }
impl From<ExpressionType> for IrCastOpType { impl From<ExpressionType> for IrCastOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrCastOpType { fn from(expr_type: ExpressionType) -> IrCastOpType {
use ExpressionType::*; use ExpressionType::*;
match expr_type { match expr_type {
...@@ -250,15 +304,15 @@ mod tests { ...@@ -250,15 +304,15 @@ mod tests {
let _: Expression = serde_json::from_str( let _: Expression = serde_json::from_str(
r#" r#"
{ {
"mnemonic": "INT_SUB", "mnemonic": "INT_SLESS",
"input0": { "input0": {
"name": "RSP", "name": "EAX",
"size": 8, "size": 4,
"is_virtual": false "is_virtual": false
}, },
"input1": { "input1": {
"name": "00000008", "value": "00000000",
"size": 8, "size": 4,
"is_virtual": false "is_virtual": false
} }
} }
......
use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*;
use std::convert::TryFrom;
mod expressions; mod expressions;
pub use expressions::*; pub use expressions::*;
mod term; mod term;
......
use super::{Expression, Variable}; use super::{Expression, ExpressionType, Variable};
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk; use crate::intermediate_representation::Blk as IrBlk;
use crate::intermediate_representation::ByteSize;
use crate::intermediate_representation::Def as IrDef; use crate::intermediate_representation::Def as IrDef;
use crate::intermediate_representation::Expression as IrExpression; use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::ExternSymbol as IrExternSymbol;
use crate::intermediate_representation::Jmp as IrJmp; use crate::intermediate_representation::Jmp as IrJmp;
use crate::intermediate_representation::Program as IrProgram;
use crate::intermediate_representation::Project as IrProject;
use crate::intermediate_representation::Sub as IrSub; use crate::intermediate_representation::Sub as IrSub;
use crate::prelude::*; use crate::prelude::*;
use crate::term::{Term, Tid}; use crate::term::{Term, Tid};
...@@ -35,6 +40,7 @@ pub enum JmpType { ...@@ -35,6 +40,7 @@ pub enum JmpType {
} }
impl From<Jmp> for IrJmp { impl From<Jmp> for IrJmp {
/// Convert a P-Code jump to the internally used IR.
fn from(jmp: Jmp) -> IrJmp { fn from(jmp: Jmp) -> IrJmp {
use JmpType::*; use JmpType::*;
let unwrap_label_direct = |label| { let unwrap_label_direct = |label| {
...@@ -77,7 +83,6 @@ impl From<Jmp> for IrJmp { ...@@ -77,7 +83,6 @@ impl From<Jmp> for IrJmp {
} }
} }
// TODO: Remove since code duplication?
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Label { pub enum Label {
Direct(Tid), Direct(Tid),
...@@ -86,20 +91,21 @@ pub enum Label { ...@@ -86,20 +91,21 @@ pub enum Label {
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Def { pub struct Def {
pub lhs: Variable, pub lhs: Option<Variable>,
pub rhs: Expression, pub rhs: Expression,
} }
impl From<Def> for IrDef { impl From<Def> for IrDef {
/// Convert a P-Code instruction to the internally used IR.
fn from(def: Def) -> IrDef { fn from(def: Def) -> IrDef {
use super::ExpressionType::*; use super::ExpressionType::*;
match def.rhs.mnemonic { match def.rhs.mnemonic {
COPY => IrDef::Assign { COPY => IrDef::Assign {
var: def.lhs.into(), var: def.lhs.unwrap().into(),
value: def.rhs.input0.unwrap().into(), value: def.rhs.input0.unwrap().into(),
}, },
LOAD => IrDef::Load { LOAD => IrDef::Load {
var: def.lhs.into(), var: def.lhs.unwrap().into(),
address: def.rhs.input1.unwrap().into(), address: def.rhs.input1.unwrap().into(),
}, },
STORE => IrDef::Store { STORE => IrDef::Store {
...@@ -112,7 +118,7 @@ impl From<Def> for IrDef { ...@@ -112,7 +118,7 @@ impl From<Def> for IrDef {
| INT_DIV | INT_REM | INT_SDIV | INT_SREM | BOOL_XOR | BOOL_AND | BOOL_OR | INT_DIV | INT_REM | INT_SDIV | INT_SREM | BOOL_XOR | BOOL_AND | BOOL_OR
| FLOAT_EQUAL | FLOAT_NOTEQUAL | FLOAT_LESS | FLOAT_LESSEQUAL | FLOAT_ADD | FLOAT_EQUAL | FLOAT_NOTEQUAL | FLOAT_LESS | FLOAT_LESSEQUAL | FLOAT_ADD
| FLOAT_SUB | FLOAT_MULT | FLOAT_DIV => IrDef::Assign { | FLOAT_SUB | FLOAT_MULT | FLOAT_DIV => IrDef::Assign {
var: def.lhs.into(), var: def.lhs.unwrap().into(),
value: IrExpression::BinOp { value: IrExpression::BinOp {
op: def.rhs.mnemonic.into(), op: def.rhs.mnemonic.into(),
lhs: Box::new(def.rhs.input0.unwrap().into()), lhs: Box::new(def.rhs.input0.unwrap().into()),
...@@ -120,26 +126,26 @@ impl From<Def> for IrDef { ...@@ -120,26 +126,26 @@ impl From<Def> for IrDef {
}, },
}, },
SUBPIECE => IrDef::Assign { SUBPIECE => IrDef::Assign {
var: def.lhs.clone().into(), var: def.lhs.clone().unwrap().into(),
value: IrExpression::Subpiece { value: IrExpression::Subpiece {
low_byte: def.rhs.input1.unwrap().into(), low_byte: def.rhs.input1.unwrap().parse_to_bytesize(),
size: def.lhs.size, size: def.lhs.unwrap().size,
arg: Box::new(def.rhs.input0.unwrap().into()), arg: Box::new(def.rhs.input0.unwrap().into()),
}, },
}, },
INT_NEGATE | INT_2COMP | BOOL_NEGATE | FLOAT_NEGATE | FLOAT_ABS | FLOAT_SQRT INT_NEGATE | INT_2COMP | BOOL_NEGATE | FLOAT_NEGATE | FLOAT_ABS | FLOAT_SQRT
| FLOAT_CEIL | FLOAT_FLOOR | FLOAT_ROUND | FLOAT_NAN => IrDef::Assign { | FLOAT_CEIL | FLOAT_FLOOR | FLOAT_ROUND | FLOAT_NAN => IrDef::Assign {
var: def.lhs.into(), var: def.lhs.unwrap().into(),
value: IrExpression::UnOp { value: IrExpression::UnOp {
op: def.rhs.mnemonic.into(), op: def.rhs.mnemonic.into(),
arg: Box::new(def.rhs.input0.unwrap().into()), arg: Box::new(def.rhs.input0.unwrap().into()),
}, },
}, },
INT_ZEXT | INT_SEXT | INT2FLOAT | FLOAT2FLOAT | TRUNC => IrDef::Assign { INT_ZEXT | INT_SEXT | INT2FLOAT | FLOAT2FLOAT | TRUNC => IrDef::Assign {
var: def.lhs.clone().into(), var: def.lhs.clone().unwrap().into(),
value: IrExpression::Cast { value: IrExpression::Cast {
op: def.rhs.mnemonic.into(), op: def.rhs.mnemonic.into(),
size: def.lhs.size, size: def.lhs.unwrap().size,
arg: Box::new(def.rhs.input0.unwrap().into()), arg: Box::new(def.rhs.input0.unwrap().into()),
}, },
}, },
...@@ -147,6 +153,19 @@ impl From<Def> for IrDef { ...@@ -147,6 +153,19 @@ impl From<Def> for IrDef {
} }
} }
impl Def {
/// For `LOAD` instruction with address pointer size zero,
/// correct the address size to the given pointer size.
pub fn correct_pointer_sizes(&mut self, pointer_size: ByteSize) {
if self.rhs.mnemonic == ExpressionType::LOAD {
let input1 = self.rhs.input1.as_mut().unwrap();
if input1.size == ByteSize::from(0 as u64) {
input1.size = pointer_size;
}
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk { pub struct Blk {
pub defs: Vec<Term<Def>>, pub defs: Vec<Term<Def>>,
...@@ -154,6 +173,7 @@ pub struct Blk { ...@@ -154,6 +173,7 @@ pub struct Blk {
} }
impl From<Blk> for IrBlk { impl From<Blk> for IrBlk {
/// Convert a P-Code block to the internally used IR.
fn from(blk: Blk) -> IrBlk { fn from(blk: Blk) -> IrBlk {
let defs: Vec<Term<IrDef>> = blk let defs: Vec<Term<IrDef>> = blk
.defs .defs
...@@ -175,7 +195,53 @@ impl From<Blk> for IrBlk { ...@@ -175,7 +195,53 @@ impl From<Blk> for IrBlk {
} }
} }
// TODO: We need a unit test for stack parameter (that use location instead of var)! impl Blk {
/// Add `LOAD` instructions for implicit memory accesses
/// to convert them to explicit memory accesses.
///
/// The generates `LOAD`s will have (incorrect) address sizes of zero,
/// which must be corrected afterwards.
fn add_load_defs_for_implicit_ram_access(&mut self) {
let mut refactored_defs = Vec::new();
for def in self.defs.iter() {
let mut cleaned_def = def.clone();
if let Some(input) = &def.term.rhs.input0 {
if input.address.is_some() {
let load_def = input.to_load_def("$load_temp0");
cleaned_def.term.rhs.input0 = load_def.lhs.clone();
refactored_defs.push(Term {
tid: def.tid.clone().with_id_suffix("_load0"),
term: load_def,
});
}
}
if let Some(input) = &def.term.rhs.input1 {
if input.address.is_some() {
let load_def = input.to_load_def("$load_temp1");
cleaned_def.term.rhs.input1 = load_def.lhs.clone();
refactored_defs.push(Term {
tid: def.tid.clone().with_id_suffix("_load1"),
term: load_def,
});
}
}
if let Some(input) = &def.term.rhs.input2 {
if input.address.is_some() {
let load_def = input.to_load_def("$load_temp2");
cleaned_def.term.rhs.input2 = load_def.lhs.clone();
refactored_defs.push(Term {
tid: def.tid.clone().with_id_suffix("_load2"),
term: load_def,
});
}
}
refactored_defs.push(cleaned_def);
}
self.defs = refactored_defs;
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg { pub struct Arg {
pub var: Option<Variable>, pub var: Option<Variable>,
...@@ -187,7 +253,6 @@ pub struct Arg { ...@@ -187,7 +253,6 @@ pub struct Arg {
pub enum ArgIntent { pub enum ArgIntent {
INPUT, INPUT,
OUTPUT, OUTPUT,
BOTH,
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
...@@ -220,6 +285,52 @@ pub struct ExternSymbol { ...@@ -220,6 +285,52 @@ pub struct ExternSymbol {
pub name: String, pub name: String,
pub calling_convention: Option<String>, pub calling_convention: Option<String>,
pub arguments: Vec<Arg>, pub arguments: Vec<Arg>,
pub no_return: bool,
}
impl From<ExternSymbol> for IrExternSymbol {
/// Convert an extern symbol parsed from Ghidra to the internally used IR.
fn from(symbol: ExternSymbol) -> IrExternSymbol {
let mut parameters = Vec::new();
let mut return_values = Vec::new();
for arg in symbol.arguments {
let ir_arg = if let Some(var) = arg.var {
IrArg::Register(var.into())
} else if let Some(expr) = arg.location {
if expr.mnemonic == ExpressionType::LOAD {
IrArg::Stack {
offset: i64::from_str_radix(
expr.input0
.clone()
.unwrap()
.address
.unwrap()
.trim_start_matches("0x"),
16,
)
.unwrap(),
size: expr.input0.unwrap().size,
}
} else {
panic!()
}
} else {
panic!()
};
match arg.intent {
ArgIntent::INPUT => parameters.push(ir_arg),
ArgIntent::OUTPUT => return_values.push(ir_arg),
}
}
IrExternSymbol {
tid: symbol.tid,
name: symbol.name,
calling_convention: symbol.calling_convention,
parameters,
return_values,
no_return: symbol.no_return,
}
}
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
...@@ -229,6 +340,77 @@ pub struct Program { ...@@ -229,6 +340,77 @@ pub struct Program {
pub entry_points: Vec<Tid>, pub entry_points: Vec<Tid>,
} }
impl From<Program> for IrProgram {
/// Convert a program parsed from Ghidra to the internally used IR.
fn from(program: Program) -> IrProgram {
let subs = program
.subs
.into_iter()
.map(|sub_term| Term {
tid: sub_term.tid,
term: sub_term.term.into(),
})
.collect();
IrProgram {
subs,
extern_symbols: program
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
}
impl From<Project> for IrProject {
/// Convert a project parsed from Ghidra to the internally used IR.
fn from(project: Project) -> IrProject {
let program = Term {
tid: project.program.tid,
term: project.program.term.into(),
};
IrProject {
program,
cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(),
}
}
}
impl Project {
/// This function runs normalization passes to bring the project into a form
/// that can be translated into the internally used intermediate representation.
///
/// Currently implemented normalization passes:
///
/// ### Insert explicit `LOAD` instructions for implicit memory loads in P-Code.
///
/// Ghidra generates implicit loads for memory accesses, whose address is a constant.
/// The pass converts them to explicit `LOAD` instructions.
pub fn normalize(&mut self) {
// Insert explicit `LOAD` instructions for implicit memory loads in P-Code.
let generic_pointer_size = self.stack_pointer_register.size;
for sub in self.program.term.subs.iter_mut() {
for block in sub.term.blocks.iter_mut() {
block.term.add_load_defs_for_implicit_ram_access();
// The artificially created LOADs have pointers of size 0,
// which we have to correct.
for def in block.term.defs.iter_mut() {
def.term.correct_pointer_sizes(generic_pointer_size);
}
}
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
...@@ -292,7 +474,7 @@ mod tests { ...@@ -292,7 +474,7 @@ mod tests {
#[test] #[test]
fn jmp_deserialization() { fn jmp_deserialization() {
let _: Term<Jmp> = serde_json::from_str( let jmp_term: Term<Jmp> = serde_json::from_str(
r#" r#"
{ {
"tid": { "tid": {
...@@ -322,11 +504,12 @@ mod tests { ...@@ -322,11 +504,12 @@ mod tests {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrJmp = jmp_term.term.into();
} }
#[test] #[test]
fn blk_deserialization() { fn blk_deserialization() {
let _: Term<Blk> = serde_json::from_str( let block_term: Term<Blk> = serde_json::from_str(
r#" r#"
{ {
"tid": { "tid": {
...@@ -341,6 +524,7 @@ mod tests { ...@@ -341,6 +524,7 @@ mod tests {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrBlk = block_term.term.into();
} }
#[test] #[test]
...@@ -358,11 +542,27 @@ mod tests { ...@@ -358,11 +542,27 @@ mod tests {
"#, "#,
) )
.unwrap(); .unwrap();
let _: Arg = serde_json::from_str(
r#"
{
"location": {
"mnemonic": "LOAD",
"input0": {
"address": "0x4",
"size": 4,
"is_virtual": false
}
},
"intent": "INPUT"
}
"#,
)
.unwrap();
} }
#[test] #[test]
fn sub_deserialization() { fn sub_deserialization() {
let _: Term<Sub> = serde_json::from_str( let sub_term: Term<Sub> = serde_json::from_str(
r#" r#"
{ {
"tid": { "tid": {
...@@ -377,42 +577,30 @@ mod tests { ...@@ -377,42 +577,30 @@ mod tests {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrSub = sub_term.term.into();
} }
#[test] #[test]
fn extern_symbol_deserialization() { fn extern_symbol_deserialization() {
let _: ExternSymbol = serde_json::from_str( let symbol: ExternSymbol = serde_json::from_str(
r#" r#"
{ {
"tid": { "tid": {
"id": "sub_0010b020", "id": "sub_08048410",
"address": "0010b020" "address": "08048410"
}, },
"address": "0010b020", "address": "08048410",
"name": "strncmp", "name": "atoi",
"calling_convention": "__stdcall", "calling_convention": "__cdecl",
"arguments": [ "arguments": [
{ {
"var": { "location": {
"name": "RDI", "mnemonic": "LOAD",
"size": 8, "input0": {
"is_virtual": false "address": "0x4",
}, "size": 4,
"intent": "INPUT"
},
{
"var": {
"name": "RSI",
"size": 8,
"is_virtual": false
},
"intent": "INPUT"
},
{
"var": {
"name": "RDX",
"size": 8,
"is_virtual": false "is_virtual": false
}
}, },
"intent": "INPUT" "intent": "INPUT"
}, },
...@@ -424,16 +612,18 @@ mod tests { ...@@ -424,16 +612,18 @@ mod tests {
}, },
"intent": "OUTPUT" "intent": "OUTPUT"
} }
] ],
"no_return": false
} }
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrExternSymbol = symbol.into();
} }
#[test] #[test]
fn program_deserialization() { fn program_deserialization() {
let _: Term<Program> = serde_json::from_str( let program_term: Term<Program> = serde_json::from_str(
r#" r#"
{ {
"tid": { "tid": {
...@@ -449,5 +639,73 @@ mod tests { ...@@ -449,5 +639,73 @@ mod tests {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrProgram = program_term.term.into();
}
#[test]
fn project_deserialization() {
let project: Project = serde_json::from_str(
r#"
{
"program": {
"tid": {
"id": "prog_08048000",
"address": "08048000"
},
"term": {
"subs": [],
"extern_symbols": [],
"entry_points":[]
}
},
"stack_pointer_register": {
"name": "ESP",
"size": 32,
"is_virtual": false
},
"cpu_architecture": "x86_32"
}
"#,
)
.unwrap();
let _: IrProject = project.into();
}
#[test]
fn add_load_defs_for_implicit_ram_access() {
let mut blk: Blk = Blk { defs: Vec::new(), jmps: Vec::new()};
blk.defs.push(serde_json::from_str(r#"
{
"tid": {
"id": "instr_001053f8_0",
"address": "001053f8"
},
"term": {
"lhs": {
"name": "EDI",
"value": null,
"address": null,
"size": 4,
"is_virtual": false
},
"rhs": {
"mnemonic": "COPY",
"input0": {
"name": null,
"value": null,
"address": "0010a018",
"size": 4,
"is_virtual": false
},
"input1": null,
"input2": null
}
}
}
"#).unwrap());
blk.add_load_defs_for_implicit_ram_access();
assert_eq!(blk.defs[0].term.lhs.as_ref().unwrap().name.as_ref().unwrap(), "$load_temp0");
assert_eq!(blk.defs[1].term.rhs.input0.as_ref().unwrap().name.as_ref().unwrap(), "$load_temp0");
assert_eq!(blk.defs.len(), 2);
} }
} }
use crate::bil::*; use crate::bil::*;
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk; use crate::intermediate_representation::Blk as IrBlk;
use crate::intermediate_representation::Def as IrDef; use crate::intermediate_representation::Def as IrDef;
use crate::intermediate_representation::Expression as IrExpression; use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::Jmp as IrJmp; use crate::intermediate_representation::Jmp as IrJmp;
use crate::intermediate_representation::Program as IrProgram;
use crate::intermediate_representation::Project as IrProject;
use crate::intermediate_representation::Sub as IrSub; use crate::intermediate_representation::Sub as IrSub;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
...@@ -22,6 +25,14 @@ impl Tid { ...@@ -22,6 +25,14 @@ impl Tid {
address: "UNKNOWN".to_string(), address: "UNKNOWN".to_string(),
} }
} }
/// Add a suffix to the ID string and return the new `Tid`
pub fn with_id_suffix(self, suffix: &str) -> Self {
Tid {
id: self.id + suffix,
address: self.address,
}
}
} }
impl std::fmt::Display for Tid { impl std::fmt::Display for Tid {
...@@ -43,12 +54,14 @@ pub struct Def { ...@@ -43,12 +54,14 @@ pub struct Def {
} }
impl Def { impl Def {
fn to_ir_defs(self) -> Vec<IrDef> { fn into_ir_defs(self) -> Vec<IrDef> {
match self.rhs { match self.rhs {
Expression::Load { address, .. } => { Expression::Load { address, .. } => {
let (defs, cleaned_address, _) = extract_loads_from_expression(*address, 0); let (defs, cleaned_address, _) = extract_loads_from_expression(*address, 0);
let mut ir_defs: Vec<IrDef> = let mut ir_defs: Vec<IrDef> = defs
defs.into_iter().map(|def| def.to_ir_assignment()).collect(); .into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Load { ir_defs.push(IrDef::Load {
address: cleaned_address.into(), address: cleaned_address.into(),
var: self.lhs.into(), var: self.lhs.into(),
...@@ -61,8 +74,10 @@ impl Def { ...@@ -61,8 +74,10 @@ impl Def {
let (mut more_defs, cleaned_value, _) = let (mut more_defs, cleaned_value, _) =
extract_loads_from_expression(*value, counter); extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs); defs.append(&mut more_defs);
let mut ir_defs: Vec<IrDef> = let mut ir_defs: Vec<IrDef> = defs
defs.into_iter().map(|def| def.to_ir_assignment()).collect(); .into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Store { ir_defs.push(IrDef::Store {
address: cleaned_address.into(), address: cleaned_address.into(),
value: cleaned_value.into(), value: cleaned_value.into(),
...@@ -93,8 +108,10 @@ impl Def { ...@@ -93,8 +108,10 @@ impl Def {
extract_loads_from_expression(*value, counter); extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs); defs.append(&mut more_defs);
defs.append(&mut even_more_defs); defs.append(&mut even_more_defs);
let mut ir_defs: Vec<IrDef> = let mut ir_defs: Vec<IrDef> = defs
defs.into_iter().map(|def| def.to_ir_assignment()).collect(); .into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Store { ir_defs.push(IrDef::Store {
address: cleaned_adress.into(), address: cleaned_adress.into(),
value: IrExpression::Unknown { value: IrExpression::Unknown {
...@@ -104,11 +121,11 @@ impl Def { ...@@ -104,11 +121,11 @@ impl Def {
}); });
ir_defs ir_defs
} }
_ => vec![self.to_ir_assignment()], _ => vec![self.into_ir_assignment()],
} }
} }
fn to_ir_assignment(self) -> IrDef { fn into_ir_assignment(self) -> IrDef {
IrDef::Assign { IrDef::Assign {
var: self.lhs.into(), var: self.lhs.into(),
value: self.rhs.into(), value: self.rhs.into(),
...@@ -184,7 +201,7 @@ impl From<Blk> for IrBlk { ...@@ -184,7 +201,7 @@ impl From<Blk> for IrBlk {
fn from(blk: Blk) -> IrBlk { fn from(blk: Blk) -> IrBlk {
let mut ir_def_terms = Vec::new(); let mut ir_def_terms = Vec::new();
for def_term in blk.defs { for def_term in blk.defs {
let ir_defs = def_term.term.to_ir_defs(); let ir_defs = def_term.term.into_ir_defs();
assert!(!ir_defs.is_empty()); assert!(!ir_defs.is_empty());
if ir_defs.len() == 1 { if ir_defs.len() == 1 {
ir_def_terms.push(Term { ir_def_terms.push(Term {
...@@ -248,6 +265,28 @@ pub struct Program { ...@@ -248,6 +265,28 @@ pub struct Program {
pub entry_points: Vec<Tid>, pub entry_points: Vec<Tid>,
} }
impl From<Program> for IrProgram {
fn from(program: Program) -> IrProgram {
let subs = program
.subs
.into_iter()
.map(|sub_term| Term {
tid: sub_term.tid,
term: sub_term.term.into(),
})
.collect();
IrProgram {
subs,
extern_symbols: program
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project { pub struct Project {
pub program: Term<Program>, pub program: Term<Program>,
...@@ -294,6 +333,20 @@ impl Project { ...@@ -294,6 +333,20 @@ impl Project {
} }
} }
impl From<Project> for IrProject {
fn from(project: Project) -> IrProject {
let program = Term {
tid: project.program.tid,
term: project.program.term.into(),
};
IrProject {
program,
cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(),
}
}
}
impl Label { impl Label {
/// Replace let-bindings inside the expression for `Indirect` labels. /// Replace let-bindings inside the expression for `Indirect` labels.
fn replace_let_bindings(&mut self) { fn replace_let_bindings(&mut self) {
...@@ -334,6 +387,40 @@ impl ArgIntent { ...@@ -334,6 +387,40 @@ impl ArgIntent {
} }
} }
impl From<Arg> for IrArg {
fn from(arg: Arg) -> IrArg {
match arg.location {
Expression::Var(var) => IrArg::Register(var.into()),
Expression::Load {
address,
size: bitsize,
..
} => {
let offset = match *address {
Expression::BinOp {
op: BinOpType::PLUS,
lhs,
rhs,
} => {
assert!(matches!(*lhs, Expression::Var(_)));
if let Expression::Const(bitvec) = *rhs {
bitvec.try_to_i64().unwrap()
} else {
panic!()
}
}
_ => panic!(),
};
IrArg::Stack {
offset,
size: bitsize.into(),
}
}
_ => panic!(),
}
}
}
fn extract_loads_from_expression(expr: Expression, counter: u64) -> (Vec<Def>, Expression, u64) { fn extract_loads_from_expression(expr: Expression, counter: u64) -> (Vec<Def>, Expression, u64) {
use Expression::*; use Expression::*;
match expr { match expr {
......
use super::Arg; use super::{Arg, ArgIntent};
use crate::bil::*; use crate::bil::*;
use crate::intermediate_representation::ExternSymbol as IrExternSymbol;
use crate::prelude::*; use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
...@@ -51,6 +52,35 @@ impl ExternSymbol { ...@@ -51,6 +52,35 @@ impl ExternSymbol {
} }
} }
impl From<ExternSymbol> for IrExternSymbol {
fn from(symbol: ExternSymbol) -> IrExternSymbol {
let mut parameters = Vec::new();
let mut return_values = Vec::new();
for arg in symbol.arguments.into_iter() {
if matches!(
arg.intent,
ArgIntent::Input | ArgIntent::Both | ArgIntent::Unknown
) {
parameters.push(arg.clone().into());
}
if matches!(
arg.intent,
ArgIntent::Output | ArgIntent::Both | ArgIntent::Unknown
) {
return_values.push(arg.into());
}
}
IrExternSymbol {
tid: symbol.tid,
name: symbol.name,
calling_convention: symbol.calling_convention,
parameters,
return_values,
no_return: false, // Last time I checked BAP had an attribute for non-returning functions, but did not actually set it.
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment