Commit c1e142eb by Enkelmann Committed by Enkelmann

Finish P-Code to internal IR deserialization.

parent b70f9910
...@@ -266,7 +266,7 @@ impl From<Expression> for IrExpression { ...@@ -266,7 +266,7 @@ impl From<Expression> for IrExpression {
}, },
HIGH => { HIGH => {
assert!(width % 8 == 0); assert!(width % 8 == 0);
let low_byte = (arg.bitsize() - BitSize::from(width)).into(); let low_byte = (arg.bitsize() - width).into();
IrExpression::Subpiece { IrExpression::Subpiece {
arg: Box::new(IrExpression::from(*arg)), arg: Box::new(IrExpression::from(*arg)),
low_byte, low_byte,
......
use crate::prelude::*; use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*; use derive_more::*;
use std::convert::TryFrom; use std::convert::TryFrom;
...@@ -60,3 +59,17 @@ impl From<ByteSize> for apint::BitWidth { ...@@ -60,3 +59,17 @@ impl From<ByteSize> for apint::BitWidth {
apint::BitWidth::from((u64::from(bytesize) * 8) as usize) apint::BitWidth::from((u64::from(bytesize) * 8) as usize)
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_bit_to_byte_conversion() {
let bits: BitSize = 8;
let bytes: ByteSize = bits.into();
assert_eq!(u64::from(bytes), 1);
let bits: BitSize = bytes.into();
assert_eq!(bits, 8);
}
}
...@@ -52,3 +52,33 @@ pub struct Sub { ...@@ -52,3 +52,33 @@ pub struct Sub {
pub name: String, pub name: String,
pub blocks: Vec<Term<Blk>>, pub blocks: Vec<Term<Blk>>,
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Arg {
Register(Variable),
Stack { offset: i64, size: ByteSize },
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub name: String,
pub calling_convention: Option<String>,
pub parameters: Vec<Arg>,
pub return_values: Vec<Arg>,
pub no_return: bool,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
}
use super::Def;
use crate::intermediate_representation::BinOpType as IrBinOpType; use crate::intermediate_representation::BinOpType as IrBinOpType;
use crate::intermediate_representation::ByteSize; use crate::intermediate_representation::ByteSize;
use crate::intermediate_representation::CastOpType as IrCastOpType; use crate::intermediate_representation::CastOpType as IrCastOpType;
...@@ -5,17 +6,19 @@ use crate::intermediate_representation::Expression as IrExpression; ...@@ -5,17 +6,19 @@ use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::UnOpType as IrUnOpType; use crate::intermediate_representation::UnOpType as IrUnOpType;
use crate::intermediate_representation::Variable as IrVariable; use crate::intermediate_representation::Variable as IrVariable;
use crate::prelude::*; use crate::prelude::*;
use crate::term::{Term, Tid};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Variable { pub struct Variable {
pub name: Option<String>, pub name: Option<String>,
pub value: Option<String>, pub value: Option<String>,
pub address: Option<String>,
pub size: ByteSize, pub size: ByteSize,
pub is_virtual: bool, pub is_virtual: bool,
} }
impl From<Variable> for IrVariable { impl From<Variable> for IrVariable {
/// Translate a P-Code variable into a register variable of the internally used IR.
/// Panic if the variable does not represent a register.
fn from(pcode_var: Variable) -> IrVariable { fn from(pcode_var: Variable) -> IrVariable {
IrVariable { IrVariable {
name: pcode_var.name.unwrap(), name: pcode_var.name.unwrap(),
...@@ -26,12 +29,24 @@ impl From<Variable> for IrVariable { ...@@ -26,12 +29,24 @@ impl From<Variable> for IrVariable {
} }
impl From<Variable> for IrExpression { impl From<Variable> for IrExpression {
/// Translate a P-Code variable into a `Var`or `Const` expression of the internally used IR.
/// Panics if the translation fails.
fn from(pcode_var: Variable) -> IrExpression { fn from(pcode_var: Variable) -> IrExpression {
match (&pcode_var.name, &pcode_var.value) { match (&pcode_var.name, &pcode_var.value) {
(Some(_name), None) => IrExpression::Var(pcode_var.into()), (Some(_name), None) => IrExpression::Var(pcode_var.into()),
(None, Some(hex_value)) => { (None, Some(_hex_value)) => IrExpression::Const(pcode_var.parse_to_bitvector()),
_ => panic!("Conversion failed:\n{:?}", pcode_var),
}
}
}
impl Variable {
/// Parses a variable representing a concrete value or a concrete address to a bitvector containing the value or address.
pub fn parse_to_bitvector(&self) -> Bitvector {
match (&self.value, &self.address) {
(Some(hex_value), None) | (None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values. // TODO: Implement parsing for large hex values.
if u64::from(pcode_var.size) > 8 { if u64::from(self.size) > 8 {
panic!( panic!(
"Parsing of immediates greater than 8 bytes not yet implemented: {}", "Parsing of immediates greater than 8 bytes not yet implemented: {}",
hex_value hex_value
...@@ -39,25 +54,58 @@ impl From<Variable> for IrExpression { ...@@ -39,25 +54,58 @@ impl From<Variable> for IrExpression {
} }
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap(); let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
let mut bitvector: Bitvector = Bitvector::from_u64(val); let mut bitvector: Bitvector = Bitvector::from_u64(val);
bitvector.truncate(pcode_var.size).unwrap(); bitvector.truncate(self.size).unwrap();
IrExpression::Const(bitvector) bitvector
} }
_ => panic!(), _ => panic!(),
} }
} }
}
impl From<Variable> for ByteSize { /// Generate a virtual variable with the given name and size.
fn from(pcode_var: Variable) -> ByteSize { pub fn new_virtual(name: impl Into<String>, size: ByteSize) -> Variable {
match (&pcode_var.name, &pcode_var.value) { Variable {
name: Some(name.into()),
value: None,
address: None,
size,
is_virtual: true,
}
}
/// Generate a variable representing a constant
pub fn new_const(value_string: impl Into<String>, size: ByteSize) -> Variable {
Variable {
name: None,
value: Some(value_string.into()),
address: None,
size,
is_virtual: false,
}
}
/// Create a LOAD instruction out of a variable representing a load from a constant address into a virtual register.
///
/// Note that the address pointer size gets set to zero, since the function does not know the correct size for pointers.
pub fn to_load_def(&self, target_register_name: impl Into<String>) -> Def {
Def {
lhs: Some(Variable::new_virtual(target_register_name, self.size)),
rhs: Expression {
mnemonic: ExpressionType::LOAD,
input0: None,
input1: Some(Variable::new_const(
self.address.as_ref().unwrap(),
ByteSize::from(0 as u64), // We do not know the correct pointer size here.
)),
input2: None,
},
}
}
/// Translates a variable into the byte size that it represents. Panics on error.
pub fn parse_to_bytesize(self) -> ByteSize {
match (&self.name, &self.value) {
(None, Some(hex_value)) => { (None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values. assert!(u64::from(self.size) <= 8);
if u64::from(pcode_var.size) > 8 {
panic!(
"Parsing of immediates greater than 8 bytes not yet implemented: {}",
hex_value
);
}
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap(); let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
val.into() val.into()
} }
...@@ -144,6 +192,8 @@ pub enum ExpressionType { ...@@ -144,6 +192,8 @@ pub enum ExpressionType {
} }
impl From<ExpressionType> for IrBinOpType { impl From<ExpressionType> for IrBinOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrBinOpType { fn from(expr_type: ExpressionType) -> IrBinOpType {
use ExpressionType::*; use ExpressionType::*;
use IrBinOpType::*; use IrBinOpType::*;
...@@ -195,6 +245,8 @@ impl From<ExpressionType> for IrBinOpType { ...@@ -195,6 +245,8 @@ impl From<ExpressionType> for IrBinOpType {
} }
impl From<ExpressionType> for IrUnOpType { impl From<ExpressionType> for IrUnOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrUnOpType { fn from(expr_type: ExpressionType) -> IrUnOpType {
use ExpressionType::*; use ExpressionType::*;
match expr_type { match expr_type {
...@@ -214,6 +266,8 @@ impl From<ExpressionType> for IrUnOpType { ...@@ -214,6 +266,8 @@ impl From<ExpressionType> for IrUnOpType {
} }
impl From<ExpressionType> for IrCastOpType { impl From<ExpressionType> for IrCastOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrCastOpType { fn from(expr_type: ExpressionType) -> IrCastOpType {
use ExpressionType::*; use ExpressionType::*;
match expr_type { match expr_type {
...@@ -249,20 +303,20 @@ mod tests { ...@@ -249,20 +303,20 @@ mod tests {
fn expression_deserialization() { fn expression_deserialization() {
let _: Expression = serde_json::from_str( let _: Expression = serde_json::from_str(
r#" r#"
{ {
"mnemonic": "INT_SUB", "mnemonic": "INT_SLESS",
"input0": { "input0": {
"name": "RSP", "name": "EAX",
"size": 8, "size": 4,
"is_virtual": false "is_virtual": false
}, },
"input1": { "input1": {
"name": "00000008", "value": "00000000",
"size": 8, "size": 4,
"is_virtual": false "is_virtual": false
}
} }
} "#,
"#,
) )
.unwrap(); .unwrap();
} }
......
use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*;
use std::convert::TryFrom;
mod expressions; mod expressions;
pub use expressions::*; pub use expressions::*;
mod term; mod term;
......
use crate::bil::*; use crate::bil::*;
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk; use crate::intermediate_representation::Blk as IrBlk;
use crate::intermediate_representation::Def as IrDef; use crate::intermediate_representation::Def as IrDef;
use crate::intermediate_representation::Expression as IrExpression; use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::Jmp as IrJmp; use crate::intermediate_representation::Jmp as IrJmp;
use crate::intermediate_representation::Program as IrProgram;
use crate::intermediate_representation::Project as IrProject;
use crate::intermediate_representation::Sub as IrSub; use crate::intermediate_representation::Sub as IrSub;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
...@@ -22,6 +25,14 @@ impl Tid { ...@@ -22,6 +25,14 @@ impl Tid {
address: "UNKNOWN".to_string(), address: "UNKNOWN".to_string(),
} }
} }
/// Add a suffix to the ID string and return the new `Tid`
pub fn with_id_suffix(self, suffix: &str) -> Self {
Tid {
id: self.id + suffix,
address: self.address,
}
}
} }
impl std::fmt::Display for Tid { impl std::fmt::Display for Tid {
...@@ -43,12 +54,14 @@ pub struct Def { ...@@ -43,12 +54,14 @@ pub struct Def {
} }
impl Def { impl Def {
fn to_ir_defs(self) -> Vec<IrDef> { fn into_ir_defs(self) -> Vec<IrDef> {
match self.rhs { match self.rhs {
Expression::Load { address, .. } => { Expression::Load { address, .. } => {
let (defs, cleaned_address, _) = extract_loads_from_expression(*address, 0); let (defs, cleaned_address, _) = extract_loads_from_expression(*address, 0);
let mut ir_defs: Vec<IrDef> = let mut ir_defs: Vec<IrDef> = defs
defs.into_iter().map(|def| def.to_ir_assignment()).collect(); .into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Load { ir_defs.push(IrDef::Load {
address: cleaned_address.into(), address: cleaned_address.into(),
var: self.lhs.into(), var: self.lhs.into(),
...@@ -61,8 +74,10 @@ impl Def { ...@@ -61,8 +74,10 @@ impl Def {
let (mut more_defs, cleaned_value, _) = let (mut more_defs, cleaned_value, _) =
extract_loads_from_expression(*value, counter); extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs); defs.append(&mut more_defs);
let mut ir_defs: Vec<IrDef> = let mut ir_defs: Vec<IrDef> = defs
defs.into_iter().map(|def| def.to_ir_assignment()).collect(); .into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Store { ir_defs.push(IrDef::Store {
address: cleaned_address.into(), address: cleaned_address.into(),
value: cleaned_value.into(), value: cleaned_value.into(),
...@@ -93,8 +108,10 @@ impl Def { ...@@ -93,8 +108,10 @@ impl Def {
extract_loads_from_expression(*value, counter); extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs); defs.append(&mut more_defs);
defs.append(&mut even_more_defs); defs.append(&mut even_more_defs);
let mut ir_defs: Vec<IrDef> = let mut ir_defs: Vec<IrDef> = defs
defs.into_iter().map(|def| def.to_ir_assignment()).collect(); .into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Store { ir_defs.push(IrDef::Store {
address: cleaned_adress.into(), address: cleaned_adress.into(),
value: IrExpression::Unknown { value: IrExpression::Unknown {
...@@ -104,11 +121,11 @@ impl Def { ...@@ -104,11 +121,11 @@ impl Def {
}); });
ir_defs ir_defs
} }
_ => vec![self.to_ir_assignment()], _ => vec![self.into_ir_assignment()],
} }
} }
fn to_ir_assignment(self) -> IrDef { fn into_ir_assignment(self) -> IrDef {
IrDef::Assign { IrDef::Assign {
var: self.lhs.into(), var: self.lhs.into(),
value: self.rhs.into(), value: self.rhs.into(),
...@@ -184,7 +201,7 @@ impl From<Blk> for IrBlk { ...@@ -184,7 +201,7 @@ impl From<Blk> for IrBlk {
fn from(blk: Blk) -> IrBlk { fn from(blk: Blk) -> IrBlk {
let mut ir_def_terms = Vec::new(); let mut ir_def_terms = Vec::new();
for def_term in blk.defs { for def_term in blk.defs {
let ir_defs = def_term.term.to_ir_defs(); let ir_defs = def_term.term.into_ir_defs();
assert!(!ir_defs.is_empty()); assert!(!ir_defs.is_empty());
if ir_defs.len() == 1 { if ir_defs.len() == 1 {
ir_def_terms.push(Term { ir_def_terms.push(Term {
...@@ -248,6 +265,28 @@ pub struct Program { ...@@ -248,6 +265,28 @@ pub struct Program {
pub entry_points: Vec<Tid>, pub entry_points: Vec<Tid>,
} }
impl From<Program> for IrProgram {
fn from(program: Program) -> IrProgram {
let subs = program
.subs
.into_iter()
.map(|sub_term| Term {
tid: sub_term.tid,
term: sub_term.term.into(),
})
.collect();
IrProgram {
subs,
extern_symbols: program
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project { pub struct Project {
pub program: Term<Program>, pub program: Term<Program>,
...@@ -294,6 +333,20 @@ impl Project { ...@@ -294,6 +333,20 @@ impl Project {
} }
} }
impl From<Project> for IrProject {
fn from(project: Project) -> IrProject {
let program = Term {
tid: project.program.tid,
term: project.program.term.into(),
};
IrProject {
program,
cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(),
}
}
}
impl Label { impl Label {
/// Replace let-bindings inside the expression for `Indirect` labels. /// Replace let-bindings inside the expression for `Indirect` labels.
fn replace_let_bindings(&mut self) { fn replace_let_bindings(&mut self) {
...@@ -334,6 +387,40 @@ impl ArgIntent { ...@@ -334,6 +387,40 @@ impl ArgIntent {
} }
} }
impl From<Arg> for IrArg {
fn from(arg: Arg) -> IrArg {
match arg.location {
Expression::Var(var) => IrArg::Register(var.into()),
Expression::Load {
address,
size: bitsize,
..
} => {
let offset = match *address {
Expression::BinOp {
op: BinOpType::PLUS,
lhs,
rhs,
} => {
assert!(matches!(*lhs, Expression::Var(_)));
if let Expression::Const(bitvec) = *rhs {
bitvec.try_to_i64().unwrap()
} else {
panic!()
}
}
_ => panic!(),
};
IrArg::Stack {
offset,
size: bitsize.into(),
}
}
_ => panic!(),
}
}
}
fn extract_loads_from_expression(expr: Expression, counter: u64) -> (Vec<Def>, Expression, u64) { fn extract_loads_from_expression(expr: Expression, counter: u64) -> (Vec<Def>, Expression, u64) {
use Expression::*; use Expression::*;
match expr { match expr {
......
use super::Arg; use super::{Arg, ArgIntent};
use crate::bil::*; use crate::bil::*;
use crate::intermediate_representation::ExternSymbol as IrExternSymbol;
use crate::prelude::*; use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
...@@ -51,6 +52,35 @@ impl ExternSymbol { ...@@ -51,6 +52,35 @@ impl ExternSymbol {
} }
} }
impl From<ExternSymbol> for IrExternSymbol {
fn from(symbol: ExternSymbol) -> IrExternSymbol {
let mut parameters = Vec::new();
let mut return_values = Vec::new();
for arg in symbol.arguments.into_iter() {
if matches!(
arg.intent,
ArgIntent::Input | ArgIntent::Both | ArgIntent::Unknown
) {
parameters.push(arg.clone().into());
}
if matches!(
arg.intent,
ArgIntent::Output | ArgIntent::Both | ArgIntent::Unknown
) {
return_values.push(arg.into());
}
}
IrExternSymbol {
tid: symbol.tid,
name: symbol.name,
calling_convention: symbol.calling_convention,
parameters,
return_values,
no_return: false, // Last time I checked BAP had an attribute for non-returning functions, but did not actually set it.
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment