Commit c1e142eb by Enkelmann Committed by Enkelmann

Finish P-Code to internal IR deserialization.

parent b70f9910
......@@ -266,7 +266,7 @@ impl From<Expression> for IrExpression {
},
HIGH => {
assert!(width % 8 == 0);
let low_byte = (arg.bitsize() - BitSize::from(width)).into();
let low_byte = (arg.bitsize() - width).into();
IrExpression::Subpiece {
arg: Box::new(IrExpression::from(*arg)),
low_byte,
......
use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*;
use std::convert::TryFrom;
......@@ -60,3 +59,17 @@ impl From<ByteSize> for apint::BitWidth {
apint::BitWidth::from((u64::from(bytesize) * 8) as usize)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_bit_to_byte_conversion() {
let bits: BitSize = 8;
let bytes: ByteSize = bits.into();
assert_eq!(u64::from(bytes), 1);
let bits: BitSize = bytes.into();
assert_eq!(bits, 8);
}
}
......@@ -52,3 +52,33 @@ pub struct Sub {
pub name: String,
pub blocks: Vec<Term<Blk>>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Arg {
Register(Variable),
Stack { offset: i64, size: ByteSize },
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub name: String,
pub calling_convention: Option<String>,
pub parameters: Vec<Arg>,
pub return_values: Vec<Arg>,
pub no_return: bool,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
}
use super::Def;
use crate::intermediate_representation::BinOpType as IrBinOpType;
use crate::intermediate_representation::ByteSize;
use crate::intermediate_representation::CastOpType as IrCastOpType;
......@@ -5,17 +6,19 @@ use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::UnOpType as IrUnOpType;
use crate::intermediate_representation::Variable as IrVariable;
use crate::prelude::*;
use crate::term::{Term, Tid};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Variable {
pub name: Option<String>,
pub value: Option<String>,
pub address: Option<String>,
pub size: ByteSize,
pub is_virtual: bool,
}
impl From<Variable> for IrVariable {
/// Translate a P-Code variable into a register variable of the internally used IR.
/// Panic if the variable does not represent a register.
fn from(pcode_var: Variable) -> IrVariable {
IrVariable {
name: pcode_var.name.unwrap(),
......@@ -26,12 +29,24 @@ impl From<Variable> for IrVariable {
}
impl From<Variable> for IrExpression {
/// Translate a P-Code variable into a `Var`or `Const` expression of the internally used IR.
/// Panics if the translation fails.
fn from(pcode_var: Variable) -> IrExpression {
match (&pcode_var.name, &pcode_var.value) {
(Some(_name), None) => IrExpression::Var(pcode_var.into()),
(None, Some(hex_value)) => {
(None, Some(_hex_value)) => IrExpression::Const(pcode_var.parse_to_bitvector()),
_ => panic!("Conversion failed:\n{:?}", pcode_var),
}
}
}
impl Variable {
/// Parses a variable representing a concrete value or a concrete address to a bitvector containing the value or address.
pub fn parse_to_bitvector(&self) -> Bitvector {
match (&self.value, &self.address) {
(Some(hex_value), None) | (None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values.
if u64::from(pcode_var.size) > 8 {
if u64::from(self.size) > 8 {
panic!(
"Parsing of immediates greater than 8 bytes not yet implemented: {}",
hex_value
......@@ -39,25 +54,58 @@ impl From<Variable> for IrExpression {
}
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
let mut bitvector: Bitvector = Bitvector::from_u64(val);
bitvector.truncate(pcode_var.size).unwrap();
IrExpression::Const(bitvector)
bitvector.truncate(self.size).unwrap();
bitvector
}
_ => panic!(),
}
}
}
impl From<Variable> for ByteSize {
fn from(pcode_var: Variable) -> ByteSize {
match (&pcode_var.name, &pcode_var.value) {
/// Generate a virtual variable with the given name and size.
pub fn new_virtual(name: impl Into<String>, size: ByteSize) -> Variable {
Variable {
name: Some(name.into()),
value: None,
address: None,
size,
is_virtual: true,
}
}
/// Generate a variable representing a constant
pub fn new_const(value_string: impl Into<String>, size: ByteSize) -> Variable {
Variable {
name: None,
value: Some(value_string.into()),
address: None,
size,
is_virtual: false,
}
}
/// Create a LOAD instruction out of a variable representing a load from a constant address into a virtual register.
///
/// Note that the address pointer size gets set to zero, since the function does not know the correct size for pointers.
pub fn to_load_def(&self, target_register_name: impl Into<String>) -> Def {
Def {
lhs: Some(Variable::new_virtual(target_register_name, self.size)),
rhs: Expression {
mnemonic: ExpressionType::LOAD,
input0: None,
input1: Some(Variable::new_const(
self.address.as_ref().unwrap(),
ByteSize::from(0 as u64), // We do not know the correct pointer size here.
)),
input2: None,
},
}
}
/// Translates a variable into the byte size that it represents. Panics on error.
pub fn parse_to_bytesize(self) -> ByteSize {
match (&self.name, &self.value) {
(None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values.
if u64::from(pcode_var.size) > 8 {
panic!(
"Parsing of immediates greater than 8 bytes not yet implemented: {}",
hex_value
);
}
assert!(u64::from(self.size) <= 8);
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
val.into()
}
......@@ -144,6 +192,8 @@ pub enum ExpressionType {
}
impl From<ExpressionType> for IrBinOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrBinOpType {
use ExpressionType::*;
use IrBinOpType::*;
......@@ -195,6 +245,8 @@ impl From<ExpressionType> for IrBinOpType {
}
impl From<ExpressionType> for IrUnOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrUnOpType {
use ExpressionType::*;
match expr_type {
......@@ -214,6 +266,8 @@ impl From<ExpressionType> for IrUnOpType {
}
impl From<ExpressionType> for IrCastOpType {
/// Translates expression types.
/// Panics when given a type not representable by the target type.
fn from(expr_type: ExpressionType) -> IrCastOpType {
use ExpressionType::*;
match expr_type {
......@@ -249,20 +303,20 @@ mod tests {
fn expression_deserialization() {
let _: Expression = serde_json::from_str(
r#"
{
"mnemonic": "INT_SUB",
"input0": {
"name": "RSP",
"size": 8,
"is_virtual": false
},
"input1": {
"name": "00000008",
"size": 8,
"is_virtual": false
{
"mnemonic": "INT_SLESS",
"input0": {
"name": "EAX",
"size": 4,
"is_virtual": false
},
"input1": {
"value": "00000000",
"size": 4,
"is_virtual": false
}
}
}
"#,
"#,
)
.unwrap();
}
......
use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*;
use std::convert::TryFrom;
mod expressions;
pub use expressions::*;
mod term;
......
use crate::bil::*;
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk;
use crate::intermediate_representation::Def as IrDef;
use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::Jmp as IrJmp;
use crate::intermediate_representation::Program as IrProgram;
use crate::intermediate_representation::Project as IrProject;
use crate::intermediate_representation::Sub as IrSub;
use serde::{Deserialize, Serialize};
......@@ -22,6 +25,14 @@ impl Tid {
address: "UNKNOWN".to_string(),
}
}
/// Add a suffix to the ID string and return the new `Tid`
pub fn with_id_suffix(self, suffix: &str) -> Self {
Tid {
id: self.id + suffix,
address: self.address,
}
}
}
impl std::fmt::Display for Tid {
......@@ -43,12 +54,14 @@ pub struct Def {
}
impl Def {
fn to_ir_defs(self) -> Vec<IrDef> {
fn into_ir_defs(self) -> Vec<IrDef> {
match self.rhs {
Expression::Load { address, .. } => {
let (defs, cleaned_address, _) = extract_loads_from_expression(*address, 0);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.to_ir_assignment()).collect();
let mut ir_defs: Vec<IrDef> = defs
.into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Load {
address: cleaned_address.into(),
var: self.lhs.into(),
......@@ -61,8 +74,10 @@ impl Def {
let (mut more_defs, cleaned_value, _) =
extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.to_ir_assignment()).collect();
let mut ir_defs: Vec<IrDef> = defs
.into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Store {
address: cleaned_address.into(),
value: cleaned_value.into(),
......@@ -93,8 +108,10 @@ impl Def {
extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs);
defs.append(&mut even_more_defs);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.to_ir_assignment()).collect();
let mut ir_defs: Vec<IrDef> = defs
.into_iter()
.map(|def| def.into_ir_assignment())
.collect();
ir_defs.push(IrDef::Store {
address: cleaned_adress.into(),
value: IrExpression::Unknown {
......@@ -104,11 +121,11 @@ impl Def {
});
ir_defs
}
_ => vec![self.to_ir_assignment()],
_ => vec![self.into_ir_assignment()],
}
}
fn to_ir_assignment(self) -> IrDef {
fn into_ir_assignment(self) -> IrDef {
IrDef::Assign {
var: self.lhs.into(),
value: self.rhs.into(),
......@@ -184,7 +201,7 @@ impl From<Blk> for IrBlk {
fn from(blk: Blk) -> IrBlk {
let mut ir_def_terms = Vec::new();
for def_term in blk.defs {
let ir_defs = def_term.term.to_ir_defs();
let ir_defs = def_term.term.into_ir_defs();
assert!(!ir_defs.is_empty());
if ir_defs.len() == 1 {
ir_def_terms.push(Term {
......@@ -248,6 +265,28 @@ pub struct Program {
pub entry_points: Vec<Tid>,
}
impl From<Program> for IrProgram {
fn from(program: Program) -> IrProgram {
let subs = program
.subs
.into_iter()
.map(|sub_term| Term {
tid: sub_term.tid,
term: sub_term.term.into(),
})
.collect();
IrProgram {
subs,
extern_symbols: program
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
......@@ -294,6 +333,20 @@ impl Project {
}
}
impl From<Project> for IrProject {
fn from(project: Project) -> IrProject {
let program = Term {
tid: project.program.tid,
term: project.program.term.into(),
};
IrProject {
program,
cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(),
}
}
}
impl Label {
/// Replace let-bindings inside the expression for `Indirect` labels.
fn replace_let_bindings(&mut self) {
......@@ -334,6 +387,40 @@ impl ArgIntent {
}
}
impl From<Arg> for IrArg {
fn from(arg: Arg) -> IrArg {
match arg.location {
Expression::Var(var) => IrArg::Register(var.into()),
Expression::Load {
address,
size: bitsize,
..
} => {
let offset = match *address {
Expression::BinOp {
op: BinOpType::PLUS,
lhs,
rhs,
} => {
assert!(matches!(*lhs, Expression::Var(_)));
if let Expression::Const(bitvec) = *rhs {
bitvec.try_to_i64().unwrap()
} else {
panic!()
}
}
_ => panic!(),
};
IrArg::Stack {
offset,
size: bitsize.into(),
}
}
_ => panic!(),
}
}
}
fn extract_loads_from_expression(expr: Expression, counter: u64) -> (Vec<Def>, Expression, u64) {
use Expression::*;
match expr {
......
use super::Arg;
use super::{Arg, ArgIntent};
use crate::bil::*;
use crate::intermediate_representation::ExternSymbol as IrExternSymbol;
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
......@@ -51,6 +52,35 @@ impl ExternSymbol {
}
}
impl From<ExternSymbol> for IrExternSymbol {
fn from(symbol: ExternSymbol) -> IrExternSymbol {
let mut parameters = Vec::new();
let mut return_values = Vec::new();
for arg in symbol.arguments.into_iter() {
if matches!(
arg.intent,
ArgIntent::Input | ArgIntent::Both | ArgIntent::Unknown
) {
parameters.push(arg.clone().into());
}
if matches!(
arg.intent,
ArgIntent::Output | ArgIntent::Both | ArgIntent::Unknown
) {
return_values.push(arg.into());
}
}
IrExternSymbol {
tid: symbol.tid,
name: symbol.name,
calling_convention: symbol.calling_convention,
parameters,
return_values,
no_return: false, // Last time I checked BAP had an attribute for non-returning functions, but did not actually set it.
}
}
}
#[cfg(test)]
mod tests {
use super::*;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment