Commit 263c1498 by Enkelmann Committed by Enkelmann

Implement expression deserialization for Pcode

parent d6b95643
...@@ -2,6 +2,7 @@ use std::process::Command; ...@@ -2,6 +2,7 @@ use std::process::Command;
use structopt::StructOpt; use structopt::StructOpt;
// TODO: Add validation function for `--partial=???` parameter. // TODO: Add validation function for `--partial=???` parameter.
// TODO: `--partial` option needs better documentation on how to specify the list of checks to run.
// TODO: Add module version printing function // TODO: Add module version printing function
#[derive(Debug, StructOpt)] #[derive(Debug, StructOpt)]
...@@ -45,8 +46,7 @@ fn main() { ...@@ -45,8 +46,7 @@ fn main() {
if cmdline_args.module_versions { if cmdline_args.module_versions {
println!("printing module versions"); println!("printing module versions");
todo!(); todo!(); // TODO: implement!
return;
} else if let Some(exit_code) = build_bap_command(&cmdline_args).status().unwrap().code() { } else if let Some(exit_code) = build_bap_command(&cmdline_args).status().unwrap().code() {
std::process::exit(exit_code); std::process::exit(exit_code);
} }
......
use super::ByteSize;
use super::Variable;
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Expression {
Var(Variable),
Const(Bitvector),
BinOp {
op: BinOpType,
lhs: Box<Expression>,
rhs: Box<Expression>,
},
UnOp {
op: UnOpType,
arg: Box<Expression>,
},
Cast {
op: CastOpType,
size: ByteSize,
arg: Box<Expression>,
},
Unknown {
description: String,
size: ByteSize,
},
Subpiece {
low_byte: ByteSize,
size: ByteSize,
arg: Box<Expression>,
},
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BinOpType {
Piece,
IntEqual,
IntNotEqual,
IntLess,
IntSLess,
IntAdd,
IntSub,
IntCarry,
IntSCarry,
IntSBorrow,
IntXOr,
IntAnd,
IntOr,
IntLeft,
IntRight,
IntSRight,
IntMult,
IntDiv,
IntRem,
IntSDiv,
IntSRem,
BoolXOr,
BoolAnd,
BoolOr,
FloatEqual,
FloatNotEqual,
FloatLess,
FloatLessEqual,
FloatAdd,
FloatSub,
FloatMult,
FloatDiv,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum CastOpType {
IntZExt,
IntSExt,
Int2Float,
Float2Float,
Trunc,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum UnOpType {
IntNegate,
Int2Comp,
BoolNegate,
FloatNegate,
FloatAbs,
FloatSqrt,
FloatCeil,
FloatFloor,
FloatRound,
FloatNaN,
}
use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*;
use std::convert::TryFrom;
mod variable;
pub use variable::*;
mod expression;
pub use expression::*;
mod term;
pub use term::*;
// TODO: move ByteSize and BitSize into their own module
#[derive(
Serialize,
Deserialize,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Hash,
Clone,
Copy,
Display,
Binary,
Octal,
LowerHex,
UpperHex,
From,
Into,
Not,
Add,
Sub,
Mul,
Div,
Rem,
Shr,
Shl,
AddAssign,
SubAssign,
MulAssign,
DivAssign,
RemAssign,
ShrAssign,
ShlAssign,
Sum,
)]
#[serde(transparent)]
pub struct ByteSize(u64);
impl From<ByteSize> for BitSize {
fn from(bytesize: ByteSize) -> BitSize {
u16::try_from(u64::from(bytesize) * 8).unwrap()
}
}
impl From<ByteSize> for apint::BitWidth {
fn from(bytesize: ByteSize) -> apint::BitWidth {
apint::BitWidth::from((u64::from(bytesize) * 8) as usize)
}
}
\ No newline at end of file
use crate::prelude::*;
use crate::term::{Term, Tid};
use super::{Variable, Expression, ByteSize};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Def {
Load {
var: Variable,
address: Expression,
},
Store {
address: Expression,
value: Expression,
},
Assign {
var: Variable,
value: Expression,
},
}
\ No newline at end of file
use crate::prelude::*;
use super::ByteSize;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Variable {
pub name: String,
pub size: ByteSize,
pub is_temp: bool,
}
\ No newline at end of file
...@@ -13,6 +13,8 @@ pub mod bil; ...@@ -13,6 +13,8 @@ pub mod bil;
pub mod ffi; pub mod ffi;
pub mod term; pub mod term;
pub mod utils; pub mod utils;
pub mod pcode;
pub mod intermediate_representation;
mod prelude { mod prelude {
pub use apint::Width; pub use apint::Width;
......
use crate::prelude::*;
use crate::term::{Term, Tid};
use crate::intermediate_representation::ByteSize;
use crate::intermediate_representation::Variable as IrVariable;
use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::BinOpType as IrBinOpType;
use crate::intermediate_representation::UnOpType as IrUnOpType;
use crate::intermediate_representation::CastOpType as IrCastOpType;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Variable {
pub name: Option<String>,
pub value: Option<String>,
pub size: ByteSize,
pub is_virtual: bool,
}
impl From<Variable> for IrVariable {
fn from(pcode_var: Variable) -> IrVariable {
IrVariable {
name: pcode_var.name.unwrap(),
size: pcode_var.size,
is_temp: pcode_var.is_virtual, // TODO: rename `pcode_var.is_virtual` to `is_temp`
}
}
}
impl From<Variable> for IrExpression {
fn from(pcode_var: Variable) -> IrExpression {
match (&pcode_var.name, &pcode_var.value) {
(Some(_name), None) => IrExpression::Var(pcode_var.into()),
(None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values.
if pcode_var.size > 8.into() {
panic!("Parsing of immediates greater than 8 bytes not yet implemented: {}", hex_value);
}
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
let mut bitvector: Bitvector = Bitvector::from_u64(val);
bitvector.truncate(pcode_var.size).unwrap();
IrExpression::Const(bitvector)
},
_ => panic!(),
}
}
}
impl From<Variable> for ByteSize {
fn from(pcode_var: Variable) -> ByteSize {
match (&pcode_var.name, &pcode_var.value) {
(None, Some(hex_value)) => {
// TODO: Implement parsing for large hex values.
if pcode_var.size > 8.into() {
panic!("Parsing of immediates greater than 8 bytes not yet implemented: {}", hex_value);
}
let val: u64 = u64::from_str_radix(&hex_value, 16).unwrap();
val.into()
},
_ => panic!(),
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Expression {
pub mnemonic: ExpressionType,
pub input0: Option<Variable>,
pub input1: Option<Variable>,
pub input2: Option<Variable>,
}
impl From<Expression> for IrExpression {
fn from(expr: Expression) -> IrExpression {
match expr.mnemonic {
_ => todo!(),
}
}
}
#[allow(non_camel_case_types)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum ExpressionType {
COPY,
LOAD,
STORE,
PIECE,
SUBPIECE,
INT_EQUAL,
INT_NOTEQUAL,
INT_LESS,
INT_SLESS,
INT_ADD,
INT_SUB,
INT_CARRY,
INT_SCARRY,
INT_SBORROW,
INT_XOR,
INT_AND,
INT_OR,
INT_LEFT,
INT_RIGHT,
INT_SRIGHT,
INT_MULT,
INT_DIV,
INT_REM,
INT_SDIV,
INT_SREM,
BOOL_XOR,
BOOL_AND,
BOOL_OR,
FLOAT_EQUAL,
FLOAT_NOTEQUAL,
FLOAT_LESS,
FLOAT_LESSEQUAL,
FLOAT_ADD,
FLOAT_SUB,
FLOAT_MULT,
FLOAT_DIV,
INT_NEGATE,
INT_2COMP,
BOOL_NEGATE,
FLOAT_NEGATE,
FLOAT_ABS,
FLOAT_SQRT,
FLOAT_CEIL,
FLOAT_FLOOR,
FLOAT_ROUND,
FLOAT_NAN,
INT_ZEXT,
INT_SEXT,
INT2FLOAT,
FLOAT2FLOAT,
TRUNC,
}
impl From<ExpressionType> for IrBinOpType {
fn from(expr_type: ExpressionType) -> IrBinOpType {
use ExpressionType::*;
match expr_type {
PIECE => IrBinOpType::Piece,
INT_EQUAL => IrBinOpType::IntEqual,
INT_NOTEQUAL => IrBinOpType::IntNotEqual,
INT_LESS => IrBinOpType::IntLess,
INT_SLESS => IrBinOpType::IntSLess,
INT_ADD => IrBinOpType::IntAdd,
INT_SUB => IrBinOpType::IntSub,
INT_CARRY => IrBinOpType::IntCarry,
INT_SCARRY => IrBinOpType::IntSCarry,
INT_SBORROW => IrBinOpType::IntSBorrow,
INT_XOR => IrBinOpType::IntXOr,
INT_AND => IrBinOpType::IntAnd,
INT_OR => IrBinOpType::IntOr,
INT_LEFT => IrBinOpType::IntLeft,
INT_RIGHT => IrBinOpType::IntRight,
INT_SRIGHT => IrBinOpType::IntSRight,
INT_MULT => IrBinOpType::IntMult,
INT_DIV => IrBinOpType::IntDiv,
INT_REM => IrBinOpType::IntRem,
INT_SDIV => IrBinOpType::IntSDiv,
INT_SREM => IrBinOpType::IntSRem,
BOOL_XOR => IrBinOpType::BoolXOr,
BOOL_AND => IrBinOpType::BoolAnd,
BOOL_OR => IrBinOpType::BoolOr,
FLOAT_EQUAL => IrBinOpType::FloatEqual,
FLOAT_NOTEQUAL => IrBinOpType::FloatNotEqual,
FLOAT_LESS => IrBinOpType::FloatLess,
FLOAT_LESSEQUAL => IrBinOpType::FloatLessEqual,
FLOAT_ADD => IrBinOpType::FloatAdd,
FLOAT_SUB => IrBinOpType::FloatSub,
FLOAT_MULT => IrBinOpType::FloatMult,
FLOAT_DIV => IrBinOpType::FloatDiv,
_ => panic!(),
}
}
}
impl From<ExpressionType> for IrUnOpType {
fn from(expr_type: ExpressionType) -> IrUnOpType {
use ExpressionType::*;
match expr_type {
INT_NEGATE => IrUnOpType::IntNegate,
INT_2COMP => IrUnOpType::Int2Comp,
BOOL_NEGATE => IrUnOpType::BoolNegate,
FLOAT_NEGATE => IrUnOpType::FloatNegate,
FLOAT_ABS => IrUnOpType::FloatAbs,
FLOAT_SQRT => IrUnOpType::FloatSqrt,
FLOAT_CEIL => IrUnOpType::FloatCeil,
FLOAT_FLOOR => IrUnOpType::FloatFloor,
FLOAT_ROUND => IrUnOpType::FloatRound,
FLOAT_NAN => IrUnOpType::FloatNaN,
_ => panic!(),
}
}
}
impl From<ExpressionType> for IrCastOpType {
fn from(expr_type: ExpressionType) -> IrCastOpType {
use ExpressionType::*;
match expr_type {
INT_ZEXT => IrCastOpType::IntZExt,
INT_SEXT => IrCastOpType::IntSExt,
INT2FLOAT => IrCastOpType::Int2Float,
FLOAT2FLOAT => IrCastOpType::Float2Float,
TRUNC => IrCastOpType::Trunc,
_ => panic!(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn variable_deserialization() {
let _: Variable = serde_json::from_str(
r#"
{
"name": "RSP",
"size": 8,
"is_virtual": false
}
"#,
)
.unwrap();
}
#[test]
fn expression_deserialization() {
let _: Expression = serde_json::from_str(
r#"
{
"mnemonic": "INT_SUB",
"input0": {
"name": "RSP",
"size": 8,
"is_virtual": false
},
"input1": {
"name": "00000008",
"size": 8,
"is_virtual": false
}
}
"#,
)
.unwrap();
}
}
\ No newline at end of file
use crate::prelude::*;
use crate::term::{Term, Tid};
use derive_more::*;
use std::convert::TryFrom;
mod expressions;
pub use expressions::*;
mod term;
pub use term::*;
use super::{Expression, Variable};
use crate::intermediate_representation::Def as IrDef;
use crate::intermediate_representation::Expression as IrExpression;
use crate::prelude::*;
use crate::term::{Term, Tid};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Call {
pub target: Label,
pub return_: Option<Label>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Jmp {
pub mnemonic: JmpType,
pub goto: Option<Label>,
pub call: Option<Call>,
pub condition: Option<Expression>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum JmpType {
BRANCH,
CBRANCH,
BRANCHIND,
CALL,
CALLIND,
RETURN,
}
// TODO: Remove since code duplication?
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Label {
Direct(Tid),
Indirect(Variable),
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Def {
pub lhs: Variable,
pub rhs: Expression,
}
impl From<Def> for IrDef {
fn from(def: Def) -> IrDef {
use super::ExpressionType::*;
match def.rhs.mnemonic {
COPY => IrDef::Assign {
var: def.lhs.into(),
value: IrExpression::from(def.rhs),
},
LOAD => IrDef::Load {
var: def.lhs.into(),
address: def.rhs.input1.unwrap().into(),
},
STORE => IrDef::Store {
address: def.rhs.input1.unwrap().into(),
value: def.rhs.input2.unwrap().into(),
},
PIECE | INT_EQUAL | INT_NOTEQUAL | INT_LESS | INT_SLESS | INT_ADD | INT_SUB | INT_CARRY
| INT_SCARRY | INT_SBORROW | INT_XOR | INT_AND | INT_OR | INT_LEFT | INT_RIGHT
| INT_SRIGHT | INT_MULT | INT_DIV | INT_REM | INT_SDIV | INT_SREM | BOOL_XOR | BOOL_AND
| BOOL_OR | FLOAT_EQUAL | FLOAT_NOTEQUAL | FLOAT_LESS | FLOAT_LESSEQUAL | FLOAT_ADD
| FLOAT_SUB | FLOAT_MULT | FLOAT_DIV => IrDef::Assign {
var: def.lhs.into(),
value: IrExpression::BinOp {
op: def.rhs.mnemonic.into(),
lhs: Box::new(def.rhs.input0.unwrap().into()),
rhs: Box::new(def.rhs.input1.unwrap().into()),
},
},
SUBPIECE => IrDef::Assign {
var: def.lhs.clone().into(),
value: IrExpression::Subpiece {
low_byte: def.rhs.input1.unwrap().into(),
size: def.lhs.size,
arg: Box::new(def.rhs.input0.unwrap().into()),
},
},
INT_NEGATE | INT_2COMP | BOOL_NEGATE | FLOAT_NEGATE | FLOAT_ABS | FLOAT_SQRT | FLOAT_CEIL
| FLOAT_FLOOR | FLOAT_ROUND | FLOAT_NAN => IrDef::Assign {
var: def.lhs.into(),
value: IrExpression::UnOp {
op: def.rhs.mnemonic.into(),
arg: Box::new(def.rhs.input0.unwrap().into()),
},
},
INT_ZEXT | INT_SEXT | INT2FLOAT | FLOAT2FLOAT | TRUNC => IrDef::Assign {
var: def.lhs.clone().into(),
value: IrExpression::Cast {
op: def.rhs.mnemonic.into(),
size: def.lhs.size,
arg: Box::new(def.rhs.input0.unwrap().into()),
},
},
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk {
pub defs: Vec<Term<Def>>,
pub jmps: Vec<Term<Jmp>>,
}
// TODO: We need a unit test for stack parameter (that use location instead of var)!
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
pub var: Option<Variable>,
pub location: Option<Expression>,
pub intent: ArgIntent,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum ArgIntent {
INPUT,
OUTPUT,
BOTH,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Sub {
pub name: String,
pub blocks: Vec<Term<Blk>>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub address: String,
pub name: String,
pub calling_convention: Option<String>,
pub arguments: Vec<Arg>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn def_deserialization() {
let def: Def = serde_json::from_str(
r#"
{
"lhs": {
"name": "CF",
"size": 1,
"is_virtual": false
},
"rhs": {
"mnemonic": "INT_CARRY",
"input0": {
"name": "RDX",
"size": 8,
"is_virtual": false
},
"input1": {
"name": "RDI",
"size": 8,
"is_virtual": false
}
}
}
"#,
)
.unwrap();
let _ : IrDef = def.into();
}
#[test]
fn label_deserialization() {
let _: Label = serde_json::from_str(
r#"
{
"Direct": {
"id": "blk_00103901",
"address": "00103901"
}
}
"#,
)
.unwrap();
let _: Label = serde_json::from_str(
r#"
{
"Indirect": {
"name": "00109ef0",
"size": 8,
"is_virtual": false
}
}
"#,
)
.unwrap();
}
#[test]
fn jmp_deserialization() {
let _: Term<Jmp> = serde_json::from_str(
r#"
{
"tid": {
"id": "instr_00102014_2",
"address": "00102014"
},
"term": {
"type_": "CALL",
"mnemonic": "CALLIND",
"call": {
"target": {
"Indirect": {
"name": "RAX",
"size": 8,
"is_virtual": false
}
},
"return": {
"direct": {
"id": "blk_00102016",
"address": "00102016"
}
}
}
}
}
"#,
)
.unwrap();
}
#[test]
fn blk_deserialization() {
let _: Term<Blk> = serde_json::from_str(
r#"
{
"tid": {
"id": "blk_00101000",
"address": "00101000"
},
"term": {
"defs": [],
"jmps": []
}
}
"#,
)
.unwrap();
}
#[test]
fn arg_deserialization() {
let _: Arg = serde_json::from_str(
r#"
{
"var": {
"name": "RDI",
"size": 8,
"is_virtual": false
},
"intent": "INPUT"
}
"#,
)
.unwrap();
}
#[test]
fn sub_deserialization() {
let _: Term<Sub> = serde_json::from_str(
r#"
{
"tid": {
"id": "sub_00101000",
"address": "00101000"
},
"term": {
"name": "sub_name",
"blocks": []
}
}
"#,
)
.unwrap();
}
#[test]
fn extern_symbol_deserialization() {
let _: ExternSymbol = serde_json::from_str(
r#"
{
"tid": {
"id": "sub_0010b020",
"address": "0010b020"
},
"address": "0010b020",
"name": "strncmp",
"calling_convention": "__stdcall",
"arguments": [
{
"var": {
"name": "RDI",
"size": 8,
"is_virtual": false
},
"intent": "INPUT"
},
{
"var": {
"name": "RSI",
"size": 8,
"is_virtual": false
},
"intent": "INPUT"
},
{
"var": {
"name": "RDX",
"size": 8,
"is_virtual": false
},
"intent": "INPUT"
},
{
"var": {
"name": "EAX",
"size": 4,
"is_virtual": false
},
"intent": "OUTPUT"
}
]
}
"#,
)
.unwrap();
}
#[test]
fn program_deserialization() {
let _: Term<Program> = serde_json::from_str(
r#"
{
"tid": {
"id": "prog_00101000",
"address": "00101000"
},
"term": {
"subs": [],
"extern_symbols": [],
"entry_points":[]
}
}
"#,
)
.unwrap();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment