Unverified Commit aeeea7f8 by Enkelmann Committed by GitHub

Remove BAP parsing code (#149)

parent 11ca1728
......@@ -48,50 +48,12 @@ struct CmdlineArgs {
/// The current behavior of this flag is unstable and subject to change.
#[structopt(long, hidden = true)]
debug: bool,
/// Use BAP as backend (instead of Ghidra). Requires BAP and the cwe_checker-BAP-plugin to be installed.
#[structopt(long, hidden = true)]
bap: bool,
}
fn main() {
let cmdline_args = CmdlineArgs::from_args();
if cmdline_args.bap {
// Use BAP as backend
if let Some(exit_code) = build_bap_command(&cmdline_args).status().unwrap().code() {
std::process::exit(exit_code);
}
} else {
// Use Ghidra as backend
run_with_ghidra(cmdline_args);
}
}
/// Build the BAP command corresponding to the given command line arguments.
fn build_bap_command(args: &CmdlineArgs) -> Command {
let mut command = Command::new("bap");
command.arg(args.binary.as_ref().unwrap());
command.arg("--pass=cwe-checker");
if let Some(ref string) = args.config {
command.arg("--cwe-checker-config=".to_string() + string);
}
if let Some(ref string) = args.out {
command.arg("--cwe-checker-out=".to_string() + string);
}
if let Some(ref string) = args.partial {
command.arg("--cwe-checker-partial=".to_string() + string);
}
if args.json {
command.arg("--cwe-checker-json");
}
if args.quiet {
command.arg("--cwe-checker-no-logging");
}
if args.module_versions {
command.arg("--cwe-checker-module-versions");
}
command
run_with_ghidra(cmdline_args);
}
/// Check the existence of a file
......
......@@ -9,7 +9,6 @@ apint = "0.2"
serde = {version = "1.0", features = ["derive", "rc"]}
serde_json = "1.0"
serde_yaml = "0.8"
ocaml = "0.9.2"
petgraph = { version = "0.5", features = ["default", "serde-1"] }
fnv = "1.0" # a faster hash function for small keys like integers
anyhow = "1.0" # for easy error types
......
use super::{AbstractDomain, HasTop, RegisterDomain, SizedDomain};
use crate::bil::BitSize;
use crate::intermediate_representation::*;
use crate::prelude::*;
......@@ -64,17 +63,14 @@ impl RegisterDomain for BitvectorDomain {
match (self, rhs) {
(BitvectorDomain::Value(lhs_bitvec), BitvectorDomain::Value(rhs_bitvec)) => match op {
Piece => {
let new_bitwidth = BitSize::from(self.bytesize() + rhs.bytesize());
let new_bitwidth = (self.bytesize() + rhs.bytesize()).as_bit_length();
let upper_bits = lhs_bitvec
.clone()
.into_zero_extend(new_bitwidth as usize)
.into_zero_extend(new_bitwidth)
.unwrap()
.into_checked_shl(BitSize::from(rhs.bytesize()) as usize)
.unwrap();
let lower_bits = rhs_bitvec
.clone()
.into_zero_extend(new_bitwidth as usize)
.into_checked_shl(rhs.bytesize().as_bit_length())
.unwrap();
let lower_bits = rhs_bitvec.clone().into_zero_extend(new_bitwidth).unwrap();
BitvectorDomain::Value(upper_bits | &lower_bits)
}
IntAdd => BitvectorDomain::Value(lhs_bitvec + rhs_bitvec),
......@@ -259,9 +255,9 @@ impl RegisterDomain for BitvectorDomain {
BitvectorDomain::Value(
bitvec
.clone()
.into_checked_lshr(BitSize::from(low_byte) as usize)
.into_checked_lshr(low_byte.as_bit_length())
.unwrap()
.into_truncate(BitSize::from(size) as usize)
.into_truncate(size.as_bit_length())
.unwrap(),
)
} else {
......@@ -342,7 +338,7 @@ impl std::convert::TryFrom<&BitvectorDomain> for Bitvector {
impl std::fmt::Display for BitvectorDomain {
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Top(bytesize) => write!(formatter, "Top:u{}", BitSize::from(*bytesize)),
Self::Top(bytesize) => write!(formatter, "Top:u{}", bytesize.as_bit_length()),
Self::Value(bitvector) => write!(
formatter,
"0x{:016x}:u{:?}",
......
use super::{AbstractDomain, HasTop, SizedDomain};
use crate::bil::Bitvector;
use crate::intermediate_representation::ByteSize;
use crate::prelude::*;
use apint::{Int, Width};
use derive_more::Deref;
use serde::{Deserialize, Serialize};
......@@ -238,7 +238,6 @@ impl<T: AbstractDomain + SizedDomain + HasTop + std::fmt::Debug> MemRegionData<T
mod tests {
use super::*;
use crate::abstract_domain::RegisterDomain;
use crate::bil::Bitvector;
use crate::intermediate_representation::*;
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash, PartialOrd, Ord)]
......
use crate::{bil::Bitvector, intermediate_representation::*};
use crate::intermediate_representation::*;
use super::{create_computation, mock_context, NodeValue};
......
use super::Data;
use crate::abstract_domain::*;
use crate::bil::Bitvector;
use crate::prelude::*;
use derive_more::Deref;
use serde::{Deserialize, Serialize};
......
use super::object::*;
use super::Data;
use crate::abstract_domain::*;
use crate::bil::Bitvector;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
......
use crate::intermediate_representation::BinOpType as IrBinOpType;
use crate::intermediate_representation::ByteSize;
use crate::intermediate_representation::CastOpType as IrCastOpType;
use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::UnOpType as IrUnOpType;
use apint::Width;
use serde::{Deserialize, Serialize};
pub mod variable;
pub use variable::*;
pub type Bitvector = apint::ApInt;
pub type BitSize = u16;
impl From<BitSize> for ByteSize {
/// Convert to `ByteSize`, while always rounding up to the nearest full byte.
fn from(bitsize: BitSize) -> ByteSize {
((bitsize as u64 + 7) / 8).into()
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Expression {
Var(Variable),
Const(Bitvector),
Load {
memory: Box<Expression>,
address: Box<Expression>,
endian: Endianness,
size: BitSize,
},
Store {
memory: Box<Expression>,
address: Box<Expression>,
value: Box<Expression>,
endian: Endianness,
size: BitSize,
},
BinOp {
op: BinOpType,
lhs: Box<Expression>,
rhs: Box<Expression>,
},
UnOp {
op: UnOpType,
arg: Box<Expression>,
},
Cast {
kind: CastType,
width: BitSize,
arg: Box<Expression>,
},
Let {
var: Variable,
bound_exp: Box<Expression>,
body_exp: Box<Expression>,
},
Unknown {
description: String,
type_: Type,
},
IfThenElse {
condition: Box<Expression>,
true_exp: Box<Expression>,
false_exp: Box<Expression>,
},
Extract {
low_bit: BitSize,
high_bit: BitSize,
arg: Box<Expression>,
},
Concat {
left: Box<Expression>,
right: Box<Expression>,
},
}
impl Expression {
/// Resolve all let-bindings inside an expression to create an equivalent expression without usage of let-bindings.
pub fn replace_let_bindings(&mut self) {
use Expression::*;
match self {
Var(_) | Const(_) | Unknown { .. } => (),
Load {
memory, address, ..
} => {
memory.replace_let_bindings();
address.replace_let_bindings();
}
Store {
memory,
address,
value,
..
} => {
memory.replace_let_bindings();
address.replace_let_bindings();
value.replace_let_bindings();
}
BinOp { op: _, lhs, rhs } => {
lhs.replace_let_bindings();
rhs.replace_let_bindings();
}
UnOp { op: _, arg } => arg.replace_let_bindings(),
Cast {
kind: _,
width: _,
arg,
} => arg.replace_let_bindings(),
Let {
var,
bound_exp,
body_exp,
} => {
let to_replace = Expression::Var(var.clone());
body_exp.replace_let_bindings();
body_exp.substitute(&to_replace, bound_exp);
*self = *body_exp.clone();
}
IfThenElse {
condition,
true_exp,
false_exp,
} => {
condition.replace_let_bindings();
true_exp.replace_let_bindings();
false_exp.replace_let_bindings();
}
Extract {
low_bit: _,
high_bit: _,
arg,
} => arg.replace_let_bindings(),
Concat { left, right } => {
left.replace_let_bindings();
right.replace_let_bindings();
}
}
}
/// Substitutes all subexpressions equal to `to_replace` with the expression `replace_with`.
fn substitute(&mut self, to_replace: &Expression, replace_with: &Expression) {
use Expression::*;
if self == to_replace {
*self = replace_with.clone();
} else {
match self {
Var(_) | Const(_) | Unknown { .. } => (),
Load {
memory, address, ..
} => {
memory.substitute(to_replace, replace_with);
address.substitute(to_replace, replace_with);
}
Store {
memory,
address,
value,
..
} => {
memory.substitute(to_replace, replace_with);
address.substitute(to_replace, replace_with);
value.substitute(to_replace, replace_with);
}
BinOp { op: _, lhs, rhs } => {
lhs.substitute(to_replace, replace_with);
rhs.substitute(to_replace, replace_with);
}
UnOp { op: _, arg } => arg.substitute(to_replace, replace_with),
Cast {
kind: _,
width: _,
arg,
} => arg.substitute(to_replace, replace_with),
Let {
var: _,
bound_exp,
body_exp,
} => {
bound_exp.substitute(to_replace, replace_with);
body_exp.substitute(to_replace, replace_with);
}
IfThenElse {
condition,
true_exp,
false_exp,
} => {
condition.substitute(to_replace, replace_with);
true_exp.substitute(to_replace, replace_with);
false_exp.substitute(to_replace, replace_with);
}
Extract {
low_bit: _,
high_bit: _,
arg,
} => arg.substitute(to_replace, replace_with),
Concat { left, right } => {
left.substitute(to_replace, replace_with);
right.substitute(to_replace, replace_with);
}
}
}
}
/// Compute the bitsize of the value that the expression computes.
/// Return zero for `Store` expressions.
pub fn bitsize(&self) -> BitSize {
use Expression::*;
match self {
Var(var) => var.bitsize().unwrap(),
Const(bitvector) => bitvector.width().to_usize() as u16,
Load { size, .. } => *size,
Store { .. } => 0,
BinOp { op, lhs, rhs: _ } => {
use BinOpType::*;
match op {
EQ | NEQ | LT | LE | SLT | SLE => 1,
_ => lhs.bitsize(),
}
}
UnOp { arg, .. } => arg.bitsize(),
Cast { width, .. } => *width,
Let { .. } => panic!(),
Unknown {
description: _,
type_,
} => type_.bitsize().unwrap(),
IfThenElse { true_exp, .. } => true_exp.bitsize(),
Extract {
low_bit, high_bit, ..
} => high_bit - low_bit,
Concat { left, right } => left.bitsize() + right.bitsize(),
}
}
}
impl From<Expression> for IrExpression {
/// Convert a BAP IR expression to an internal IR expression.
/// Panics on expressions that are not expressions in the internal IR.
/// Replaces `IfThenElse` expressions with `Unknown` expressions (thus losing some information).
fn from(expr: Expression) -> IrExpression {
use Expression::*;
match expr {
Var(var) => IrExpression::Var(var.into()),
Const(bitvector) => {
// The internal IR expects everything to be byte-sized, so we have to extend the bitvector if necessary.
let size: ByteSize = bitvector.width().into();
IrExpression::Const(
bitvector
.into_zero_extend(apint::BitWidth::from(size))
.unwrap(),
)
}
Load { .. } | Store { .. } | Let { .. } => panic!(),
IfThenElse { true_exp, .. } => IrExpression::Unknown {
description: "BAP-IfThenElse-expression".into(),
size: true_exp.bitsize().into(),
},
Unknown { description, type_ } => IrExpression::Unknown {
description,
size: type_.bitsize().unwrap().into(),
},
BinOp { op, lhs, rhs } => IrExpression::BinOp {
op: op.into(),
lhs: Box::new(IrExpression::from(*lhs)),
rhs: Box::new(IrExpression::from(*rhs)),
},
UnOp { op, arg } => IrExpression::UnOp {
op: op.into(),
arg: Box::new(IrExpression::from(*arg)),
},
Cast { kind, width, arg } => {
use CastType::*;
match kind {
UNSIGNED => {
if width % 8 == 0 {
IrExpression::Cast {
arg: Box::new(IrExpression::from(*arg)),
op: IrCastOpType::IntZExt,
size: width.into(),
}
} else {
IrExpression::Unknown {
description: serde_json::to_string(&Cast { kind, width, arg })
.unwrap(),
size: width.into(),
}
}
}
SIGNED => {
if width % 8 == 0 {
IrExpression::Cast {
arg: Box::new(IrExpression::from(*arg)),
op: IrCastOpType::IntSExt,
size: width.into(),
}
} else {
IrExpression::Unknown {
description: serde_json::to_string(&Cast { kind, width, arg })
.unwrap(),
size: width.into(),
}
}
}
HIGH => {
if width == 1 {
IrExpression::BinOp {
op: IrBinOpType::IntSLess,
lhs: Box::new(IrExpression::Const(Bitvector::zero(
(arg.bitsize() as usize).into(),
))),
rhs: Box::new(IrExpression::from(*arg)),
}
} else if width % 8 == 0 {
let low_byte = (arg.bitsize() - width).into();
IrExpression::Subpiece {
arg: Box::new(IrExpression::from(*arg)),
low_byte,
size: width.into(),
}
} else {
IrExpression::Unknown {
description: serde_json::to_string(&Cast { kind, width, arg })
.unwrap(),
size: width.into(),
}
}
}
LOW => {
if width == 1 {
IrExpression::Subpiece {
low_byte: ByteSize::new(0),
size: ByteSize::new(1),
arg: Box::new(IrExpression::BinOp {
op: IrBinOpType::IntAnd,
lhs: Box::new(IrExpression::Const(Bitvector::one(
(arg.bitsize() as usize).into(),
))),
rhs: Box::new(IrExpression::from(*arg)),
}),
}
} else if width % 8 == 0 {
IrExpression::Subpiece {
arg: Box::new(IrExpression::from(*arg)),
low_byte: (0u64).into(),
size: width.into(),
}
} else {
IrExpression::Unknown {
description: serde_json::to_string(&Cast { kind, width, arg })
.unwrap(),
size: width.into(),
}
}
}
}
}
Extract {
low_bit,
high_bit,
arg,
} => {
if low_bit % 8 == 0 && (high_bit + 1) % 8 == 0 {
IrExpression::Subpiece {
size: (high_bit - low_bit + 1).into(),
low_byte: low_bit.into(),
arg: Box::new(IrExpression::from(*arg)),
}
} else {
IrExpression::Unknown {
description: serde_json::to_string(&Extract {
low_bit,
high_bit,
arg,
})
.unwrap(),
size: (high_bit - low_bit + 1).into(),
}
}
}
Concat { left, right } => IrExpression::BinOp {
op: IrBinOpType::Piece,
lhs: Box::new(IrExpression::from(*left)),
rhs: Box::new(IrExpression::from(*right)),
},
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum CastType {
UNSIGNED,
SIGNED,
HIGH,
LOW,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BinOpType {
PLUS,
MINUS,
TIMES,
DIVIDE,
SDIVIDE,
MOD,
SMOD,
LSHIFT,
RSHIFT,
ARSHIFT,
AND,
OR,
XOR,
EQ,
NEQ,
LT,
LE,
SLT,
SLE,
}
impl From<BinOpType> for IrBinOpType {
/// Translate binary operation types.
fn from(op: BinOpType) -> IrBinOpType {
use BinOpType::*;
use IrBinOpType::*;
match op {
PLUS => IntAdd,
MINUS => IntSub,
TIMES => IntMult,
DIVIDE => IntDiv,
SDIVIDE => IntSDiv,
MOD => IntRem,
SMOD => IntSRem,
LSHIFT => IntLeft,
RSHIFT => IntRight,
ARSHIFT => IntSRight,
AND => IntAnd,
OR => IntOr,
XOR => IntXOr,
EQ => IntEqual,
NEQ => IntNotEqual,
LT => IntLess,
LE => IntLessEqual,
SLT => IntSLess,
SLE => IntSLessEqual,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum UnOpType {
NEG,
NOT,
}
impl From<UnOpType> for IrUnOpType {
/// Translate unary operation types.
fn from(op: UnOpType) -> IrUnOpType {
use UnOpType::*;
match op {
NEG => IrUnOpType::Int2Comp,
NOT => IrUnOpType::IntNegate,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Endianness {
LittleEndian,
BigEndian,
}
#[cfg(test)]
mod tests {
use super::*;
fn register(name: &str) -> Variable {
Variable {
name: name.into(),
type_: Type::Immediate(64),
is_temp: false,
}
}
#[test]
fn variant_deserialization() {
let string = "\"UNSIGNED\"";
assert_eq!(CastType::UNSIGNED, serde_json::from_str(string).unwrap());
let string = "\"NEG\"";
assert_eq!(UnOpType::NEG, serde_json::from_str(string).unwrap());
}
#[test]
fn bitvector_deserialization() {
let bitv = Bitvector::from_u64(234);
let string = serde_json::to_string(&bitv).unwrap();
println!("{}", string);
println!("{:?}", bitv);
let string = "{\"digits\":[234],\"width\":[64]}";
assert_eq!(bitv, serde_json::from_str(string).unwrap());
}
#[test]
fn expression_deserialization() {
let string = "{\"BinOp\":{\"lhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}},\"op\":\"PLUS\",\"rhs\":{\"Const\":{\"digits\":[234],\"width\":[8]}}}}";
let bitv = Bitvector::from_u8(234);
let exp = Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Const(bitv.clone())),
rhs: Box::new(Expression::Const(bitv)),
};
println!("{}", serde_json::to_string(&exp).unwrap());
assert_eq!(exp, serde_json::from_str(string).unwrap())
}
#[test]
fn replace_let_bindings() {
let mut source_exp = Expression::Let {
var: register("x"),
bound_exp: Box::new(Expression::Const(Bitvector::from_u64(12))),
body_exp: Box::new(Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Var(register("x"))),
rhs: Box::new(Expression::Const(Bitvector::from_u64(42))),
}),
};
let target_exp = Expression::BinOp {
op: BinOpType::PLUS,
lhs: Box::new(Expression::Const(Bitvector::from_u64(12))),
rhs: Box::new(Expression::Const(Bitvector::from_u64(42))),
};
source_exp.replace_let_bindings();
assert_eq!(source_exp, target_exp);
}
}
use super::BitSize;
use crate::intermediate_representation::Variable as IrVariable;
use crate::prelude::*;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct Variable {
pub name: String,
pub type_: Type,
pub is_temp: bool,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
pub enum Type {
Immediate(BitSize),
Memory {
addr_size: BitSize,
elem_size: BitSize,
},
Unknown,
}
impl Type {
pub fn bitsize(&self) -> Result<BitSize, Error> {
if let Type::Immediate(bitsize) = self {
Ok(*bitsize)
} else {
Err(anyhow!("Not a register type"))
}
}
}
impl Variable {
pub fn bitsize(&self) -> Result<BitSize, Error> {
self.type_.bitsize()
}
}
impl From<Variable> for IrVariable {
fn from(var: Variable) -> IrVariable {
let size = if let Type::Immediate(bitsize) = var.type_ {
bitsize.into()
} else {
panic!()
};
IrVariable {
name: var.name,
size,
is_temp: var.is_temp,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn type_deserialization() {
let typ = Type::Immediate(64);
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
let typ = Type::Memory {
addr_size: 64,
elem_size: 8,
};
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
let typ = Type::Unknown;
let string = serde_json::to_string_pretty(&typ).expect("Serialization failed");
println!("{}", &string);
let _: Type = serde_json::from_str(&string).expect("Deserialization failed");
}
#[test]
fn var_type_from_ocaml() {
let json_string = "{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}";
let typ = Type::Memory {
addr_size: 64,
elem_size: 8,
};
assert_eq!(typ, serde_json::from_str(json_string).unwrap())
}
#[test]
fn var_from_ocaml() {
let json_string = "{\"is_temp\":false,\"name\":\"RAX\",\"type_\":{\"Memory\":{\"addr_size\":64,\"elem_size\":8}}}";
let var = Variable {
name: "RAX".to_string(),
type_: Type::Memory {
addr_size: 64,
elem_size: 8,
},
is_temp: false,
};
assert_eq!(var, serde_json::from_str(json_string).unwrap())
}
}
......@@ -4,7 +4,6 @@ use crate::analysis::backward_interprocedural_fixpoint::Context as BackwardConte
use crate::{
abstract_domain::{BitvectorDomain, DataDomain, PointerDomain, SizedDomain},
analysis::pointer_inference::{Data, State as PointerInferenceState},
bil::Bitvector,
intermediate_representation::{Expression, Variable},
};
......
use super::serde::JsonBuilder;
use super::OcamlSendable;
use crate::utils::log::CweWarning;
use crate::{analysis::pointer_inference::PointerInference, term::*};
use super::failwith_on_panic;
#[allow(unreachable_code)]
#[allow(unused_variables)]
fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarning>, Vec<String>) {
let json_builder = unsafe { JsonBuilder::from_ocaml(&program_jsonbuilder_val) };
let program_json = serde_json::Value::from(json_builder);
let mut project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
project.replace_let_bindings();
let mut project: crate::intermediate_representation::Project = project.into();
let all_logs = project.normalize();
let config: crate::analysis::pointer_inference::Config =
serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone())
.unwrap();
// let pi_analysis = crate::analysis::pointer_inference::run(&project, config, false);
let pi_analysis: PointerInference = panic!("Running the pointer inference analysis with the BAP backend is deprecated. Please use the Ghidra backend for this analysis instead.");
let (mut logs, cwes) = pi_analysis.collected_logs;
all_logs.append(&mut logs);
(
cwes,
all_logs
.into_iter()
.map(|log| format! {"{}", log})
.collect(),
)
}
caml!(rs_run_pointer_inference(program_jsonbuilder_val) {
failwith_on_panic( || {
let cwe_warnings_and_log = run_pointer_inference(program_jsonbuilder_val);
let cwe_warnings_and_log_json = serde_json::to_string(&cwe_warnings_and_log).unwrap();
let ocaml_string = ocaml::Str::from(&cwe_warnings_and_log_json as &str);
ocaml::Value::from(ocaml_string)
})
});
#[allow(unused_variables)]
fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value) {
let json_builder = unsafe { JsonBuilder::from_ocaml(&program_jsonbuilder_val) };
let program_json = serde_json::Value::from(json_builder);
let mut project: Project =
serde_json::from_value(program_json).expect("Project deserialization failed");
project.replace_let_bindings();
let mut project: crate::intermediate_representation::Project = project.into();
let _ = project.normalize();
let config: crate::analysis::pointer_inference::Config =
serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone())
.unwrap();
panic!("Running the pointer inference analysis with the BAP backend is deprecated. Please use the Ghidra backend for this analysis instead.");
// crate::analysis::pointer_inference::run(&project, config, true); // Note: This discard all CweWarnings and log messages.
}
caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) {
failwith_on_panic( || {
run_pointer_inference_and_print_debug(program_jsonbuilder_val);
ocaml::Value::unit()
})
});
/*!
# Foreign Function Interface
This module contains all functions that interact with Ocaml via the foreign function interface.
*/
use std::rc::Rc;
pub mod analysis;
pub mod serde;
/// Helper function for catching panics at the ffi-border.
/// If a panic occurs while executing F and that panic unwinds the stack,
/// the panic is caught and an Ocaml failwith exception is thrown instead.
///
/// Stack unwinding through a panic across a ffi-boundary is undefined behaviour.
/// As of Rust 1.41 catching panics at ffi-borders is still not the default behaviour,
/// since it would break backwards compatibility with some crates depending on this undefined behaviour.
/// Throwing an Ocaml failwith exception instead allows stack unwinding and better error messages.
/// Note that the Ocaml exception should *not* be caught,
/// since recovering from it may lead to undefined behavior on the Rust side.
fn failwith_on_panic<F, T>(closure: F) -> T
where
F: FnOnce() -> T,
{
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(closure)) {
Ok(value) => value,
Err(_) => {
// Throw an Ocaml failwith-exception.
// This may not be safe if the exception is caught and recovered from on the Ocaml side!
// We assume that these errors are only caught for error printing but not for recovering from it.
ocaml::runtime::failwith("Rust-Panic catched at FFI-boundary");
std::process::abort();
}
}
}
/// This is a convenience trait for objects that may be sent as opaque objects across the ffi-boundary to Ocaml.
/// For that they are wrapped as Rc<T>.
/// Note that this trait does not prevent memory leaks in itself!
/// Whenever such an object is created and sent across the ffi-boundary,
/// the finalizer must be attached to it on the Ocaml side!
trait OcamlSendable: std::marker::Sized {
/// Pack the object into an Ocaml value
fn into_ocaml(self) -> ocaml::Value {
let boxed_val = Rc::new(self);
ocaml::Value::nativeint(Rc::into_raw(boxed_val) as isize)
}
/// Unpack an object that is stored as a `Rc<T>` wrapped in an Ocaml value.
///
/// Note that the caller has to ensure that the wrapped object has the correct type.
unsafe fn from_ocaml(ocaml_val: &ocaml::Value) -> &Self {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
ptr.as_ref().unwrap()
}
/// Unpack a `Rc<T>` object wrapped in an Ocaml value and return a clone of it.
///
/// Note that the caller has to ensure that the wrapped object has the correct type.
unsafe fn from_ocaml_rc(ocaml_val: &ocaml::Value) -> Rc<Self> {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
let rc_box = Rc::from_raw(ptr);
let rc_clone = rc_box.clone(); // Increasing the reference count by 1
let _ = Rc::into_raw(rc_box); // Do not decrease the reference count when rc_box goes out of scope!
rc_clone
}
fn ocaml_finalize(ocaml_val: ocaml::Value) {
let ptr: *const Self = ocaml_val.nativeint_val() as *const Self;
let _ = unsafe { Rc::from_raw(ptr) };
}
}
/*!
# FFI-functions for generating serde_json objects
This module defines functions for generating opaque serde_json::Value objects in Ocaml
which can then be deserialized with Serde on the Rust side. Signatures of the provided functions:
```Ocaml
type serde_json = nativeint (* This stores pointers, so treat this as an opaque type! *)
external build_null: unit -> serde_json = "rs_build_serde_null"
external build_bool: bool -> serde_json = "rs_build_serde_bool"
external build_number: int -> serde_json = "rs_build_serde_number"
external build_string: string -> serde_json = "rs_build_serde_string"
external build_array: serde_json list -> serde_json = "rs_build_serde_array_from_list"
external build_object: (string * serde_json) list -> serde_json = "rs_build_serde_object"
external build_bitvector: string -> serde_json = "rs_build_serde_bitvector"
(* Convert a serde_json object to a json string (used for unit tests). *)
external to_string: serde_json -> string = "rs_convert_json_to_string"
```
*/
use super::OcamlSendable;
use ocaml::{FromValue, ToValue};
use std::rc::Rc;
use std::str::FromStr;
use super::failwith_on_panic;
/// A builder type for serde_json::Value objects.
///
/// Hiding the recursive nature of the data type behind reference counts prevents unneccessary
/// deep copies when creating json objects from Ocaml, which would lead to a runtime quadratic in the size of the json object.
/// However, when converting to serde_json::Value, one deep copy is still necessary.
#[derive(Clone, Debug)]
pub enum JsonBuilder {
Null,
Bool(bool),
Number(isize),
PositiveNumber(u64), // currently used only for deserialization of bitvector
String(String),
Array(Vec<Rc<JsonBuilder>>),
Object(Vec<(String, Rc<JsonBuilder>)>),
}
impl OcamlSendable for JsonBuilder {}
/// Creating a serde_json::Value performing deep copy.
impl From<&JsonBuilder> for serde_json::Value {
fn from(builder: &JsonBuilder) -> serde_json::Value {
match builder {
JsonBuilder::Null => serde_json::Value::Null,
JsonBuilder::Bool(val) => serde_json::Value::Bool(*val),
JsonBuilder::Number(val) => serde_json::Value::Number(serde_json::Number::from(*val)),
JsonBuilder::PositiveNumber(val) => {
serde_json::Value::Number(serde_json::Number::from(*val))
}
JsonBuilder::String(val) => serde_json::Value::String(val.to_string()),
JsonBuilder::Array(elem_vec) => elem_vec
.iter()
.map(|rc_elem| serde_json::Value::from(&**rc_elem))
.collect(),
JsonBuilder::Object(tuple_vec) => serde_json::Value::Object(
tuple_vec
.iter()
.map(|(string_ref, json_builder)| {
(
string_ref.to_string(),
serde_json::Value::from(&**json_builder),
)
})
.collect(),
),
}
}
}
caml!(rs_finalize_json_builder(builder_val) {
failwith_on_panic( || {
JsonBuilder::ocaml_finalize(builder_val);
ocaml::Value::unit()
})
});
/// Build JsonBuilder::Null as Ocaml value
fn build_serde_null() -> ocaml::Value {
JsonBuilder::Null.into_ocaml()
}
caml!(rs_build_serde_null(_unit) {
failwith_on_panic( || {
build_serde_null()
})
});
/// Build JsonBuilder::Bool as Ocaml value
fn build_serde_bool(bool_val: ocaml::Value) -> ocaml::Value {
let boolean: bool = bool::from_value(bool_val);
JsonBuilder::Bool(boolean).into_ocaml()
}
caml!(rs_build_serde_bool(bool_val) {
failwith_on_panic( || {
build_serde_bool(bool_val)
})
});
/// Build JsonBuilder::Number as Ocaml value
fn build_serde_number(num: ocaml::Value) -> ocaml::Value {
let num: isize = ocaml::Value::isize_val(&num);
JsonBuilder::Number(num).into_ocaml()
}
caml!(rs_build_serde_number(number) {
failwith_on_panic( || {
build_serde_number(number)
})
});
/// Build JsonBuilder::Object representing a bitvector from a string generated by `Bitvector.to_string` in Ocaml
fn build_serde_bitvector(bitvector_string_val: ocaml::Value) -> ocaml::Value {
let string = <&str>::from_value(bitvector_string_val);
let elements: Vec<&str> = string.split(':').collect();
let width = usize::from_str(&elements[1][0..(elements[1].len() - 1)])
.expect("Bitvector width parsing failed");
assert!(width > 0);
let mut num_list = Vec::new();
let mut number_slice: &str = elements[0];
if number_slice.starts_with("0x") {
number_slice = &number_slice[2..];
}
while !number_slice.is_empty() {
if number_slice.len() > 16 {
let digit = u64::from_str_radix(&number_slice[(number_slice.len() - 16)..], 16)
.expect("Bitvector value parsing failed");
num_list.push(Rc::new(JsonBuilder::PositiveNumber(digit)));
number_slice = &number_slice[..(number_slice.len() - 16)];
} else {
let digit =
u64::from_str_radix(&number_slice, 16).expect("Bitvector value parsing failed");
num_list.push(Rc::new(JsonBuilder::PositiveNumber(digit)));
number_slice = "";
};
}
while num_list.len() <= (width - 1) / 64 {
num_list.push(Rc::new(JsonBuilder::PositiveNumber(0)));
}
num_list.reverse(); // since the digits were parsed in reverse order
let mut width_list = Vec::new();
width_list.push(Rc::new(JsonBuilder::Number(width as isize)));
let result = JsonBuilder::Object(vec![
("digits".to_string(), Rc::new(JsonBuilder::Array(num_list))),
("width".to_string(), Rc::new(JsonBuilder::Array(width_list))),
]);
result.into_ocaml()
}
caml!(rs_build_serde_bitvector(bitvector_string) {
failwith_on_panic( || {
build_serde_bitvector(bitvector_string)
})
});
/// Build JsonBuilder::String as Ocaml value
fn build_serde_string(string_val: ocaml::Value) -> ocaml::Value {
let string = String::from_value(string_val);
JsonBuilder::String(string).into_ocaml()
}
caml!(rs_build_serde_string(string_val) {
failwith_on_panic( || {
build_serde_string(string_val)
})
});
/// Build JsonBuilder::Array as Ocaml value from an Ocaml list
fn build_serde_array_from_list(list_val: ocaml::Value) -> ocaml::Value {
let ocaml_list = ocaml::List::from(list_val);
let value_vec = ocaml_list.to_vec();
let vec = value_vec
.into_iter()
.map(|ocaml_val| unsafe { JsonBuilder::from_ocaml_rc(&ocaml_val) })
.collect();
JsonBuilder::Array(vec).into_ocaml()
}
caml!(rs_build_serde_array_from_list(list_val) {
failwith_on_panic( || {
build_serde_array_from_list(list_val)
})
});
/// Build JsonBuilder::Object as Ocaml value from an Ocaml list of tuples
fn build_serde_object(tuple_list_val: ocaml::Value) -> ocaml::Value {
let ocaml_list = ocaml::List::from(tuple_list_val);
let pairs_vec = ocaml_list.to_vec();
let pairs = pairs_vec
.into_iter()
.map(|ocaml_tuple| {
let tuple = ocaml::Tuple::from(ocaml_tuple);
let key_ocaml = tuple
.get(0)
.expect("Error: Ocaml tuple contains no element");
let key = String::from_value(key_ocaml);
let value_ocaml: ocaml::Value = tuple
.get(1)
.expect("Error: Ocaml tuple contains not enough elements");
let data = unsafe { JsonBuilder::from_ocaml_rc(&value_ocaml) };
(key, data)
})
.collect();
JsonBuilder::Object(pairs).into_ocaml()
}
caml!(rs_build_serde_object(tuple_list_val) {
failwith_on_panic( || {
build_serde_object(tuple_list_val)
})
});
/// Get the Json string corresponding to a JsonBuilder object and return it as an Ocaml value.
fn get_json_string(builder_val: ocaml::Value) -> ocaml::Value {
let builder = unsafe { JsonBuilder::from_ocaml(&builder_val) };
let json_string = serde_json::Value::from(builder).to_string();
ocaml::Str::from(&json_string as &str).to_value()
}
caml!(rs_convert_json_to_string(builder_val) {
failwith_on_panic( || {
get_json_string(builder_val)
})
});
......@@ -9,7 +9,6 @@
use crate::prelude::*;
use derive_more::*;
use std::convert::TryFrom;
mod variable;
pub use variable::*;
......@@ -18,6 +17,13 @@ pub use expression::*;
mod term;
pub use term::*;
/// A bitvector is a fixed-length vector of bits
/// with the semantics of a CPU register,
/// i.e. it supports two's complement modulo arithmetic.
///
/// Bitvector is just an alias for the [`apint::ApInt`] type.
pub type Bitvector = apint::ApInt;
/// An unsigned number of bytes.
///
/// Used to represent sizes of values in registers or in memory.
......@@ -61,12 +67,6 @@ pub use term::*;
#[serde(transparent)]
pub struct ByteSize(u64);
impl From<ByteSize> for BitSize {
fn from(bytesize: ByteSize) -> BitSize {
u16::try_from(u64::from(bytesize) * 8).unwrap()
}
}
impl From<ByteSize> for apint::BitWidth {
fn from(bytesize: ByteSize) -> apint::BitWidth {
apint::BitWidth::from((u64::from(bytesize) * 8) as usize)
......@@ -81,21 +81,31 @@ impl From<apint::BitWidth> for ByteSize {
}
impl ByteSize {
/// Create a new `ByteSize` object
pub fn new(value: u64) -> ByteSize {
ByteSize(value)
}
/// Convert to the equivalent size in bits (by multiplying with 8).
pub fn as_bit_length(self) -> usize {
(u64::from(self) * 8) as usize
}
}
#[cfg(test)]
mod tests {
use apint::BitWidth;
use super::*;
#[test]
fn check_bit_to_byte_conversion() {
let bits: BitSize = 8;
let bits: BitWidth = BitWidth::new(8).unwrap();
let bytes: ByteSize = bits.into();
assert_eq!(u64::from(bytes), 1);
let bits: BitSize = bytes.into();
assert_eq!(bits, 8);
let bits: BitWidth = bytes.into();
assert_eq!(bits.to_usize(), 8);
assert_eq!(ByteSize::new(2).as_bit_length(), 16);
}
}
......@@ -4,9 +4,6 @@
Parts of the cwe_checker that are written in Rust.
*/
#[macro_use]
extern crate ocaml;
use crate::analysis::graph::Graph;
use crate::analysis::pointer_inference::PointerInference;
use crate::intermediate_representation::Project;
......@@ -15,20 +12,16 @@ use crate::utils::log::{CweWarning, LogMessage};
pub mod abstract_domain;
pub mod analysis;
pub mod bil;
pub mod checkers;
pub mod ffi;
pub mod intermediate_representation;
pub mod pcode;
pub mod term;
pub mod utils;
mod prelude {
pub use apint::Width;
pub use serde::{Deserialize, Serialize};
pub use crate::bil::{BitSize, Bitvector};
pub use crate::intermediate_representation::ByteSize;
pub use crate::intermediate_representation::{Bitvector, ByteSize};
pub use crate::intermediate_representation::{Term, Tid};
pub use crate::AnalysisResults;
pub use anyhow::{anyhow, Error};
......
use crate::bil::*;
use crate::intermediate_representation::Arg as IrArg;
use crate::intermediate_representation::Blk as IrBlk;
use crate::intermediate_representation::CallingConvention as IrCallingConvention;
use crate::intermediate_representation::Def as IrDef;
use crate::intermediate_representation::Expression as IrExpression;
use crate::intermediate_representation::Jmp as IrJmp;
use crate::intermediate_representation::Program as IrProgram;
use crate::intermediate_representation::Project as IrProject;
use crate::intermediate_representation::Sub as IrSub;
use crate::intermediate_representation::{Term, Tid};
use serde::{Deserialize, Serialize};
pub mod symbol;
use symbol::ExternSymbol;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Def {
pub lhs: Variable,
pub rhs: Expression,
}
impl Def {
/// Convert one `Def` into one or more `Def`s of the internal IR.
///
/// `Load` expressions get transferred to their own `Def`,
/// since they are not representable as expressions in the internal IR.
/// `IfThenElse` expressions are translated to `Unknown` expressions in the process,
/// thus resulting in possible information loss.
fn into_ir_defs(self) -> Vec<IrDef> {
match self.rhs {
Expression::Load { address, .. } => {
let (defs, cleaned_address, _) = extract_loads_from_expression(*address, 0);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.into_ir_load()).collect();
ir_defs.push(IrDef::Load {
address: cleaned_address.into(),
var: self.lhs.into(),
});
ir_defs
}
Expression::Store { address, value, .. } => {
let (mut defs, cleaned_address, counter) =
extract_loads_from_expression(*address, 0);
let (mut more_defs, cleaned_value, _) =
extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.into_ir_load()).collect();
ir_defs.push(IrDef::Store {
address: cleaned_address.into(),
value: cleaned_value.into(),
});
ir_defs
}
Expression::IfThenElse {
condition,
true_exp,
false_exp,
} => {
let (defs, cleaned_if_then_else, _) = extract_loads_from_expression(
Expression::IfThenElse {
condition,
true_exp,
false_exp,
},
0,
);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.into_ir_load()).collect();
if let Expression::IfThenElse {
condition: _,
true_exp,
false_exp,
} = cleaned_if_then_else
{
match (*true_exp, *false_exp) {
(Expression::Store { address, value, .. }, Expression::Var(var))
| (Expression::Var(var), Expression::Store { address, value, .. })
if var == self.lhs =>
{
// The IfThenElse-expression is a conditional store to memory
ir_defs.push(IrDef::Store {
address: IrExpression::from(*address),
value: IrExpression::Unknown {
description: "BAP conditional store".into(),
size: value.bitsize().into(),
},
});
}
_ => ir_defs.push(IrDef::Assign {
var: self.lhs.clone().into(),
value: IrExpression::Unknown {
description: "BAP IfThenElse expression".into(),
size: self.lhs.bitsize().unwrap().into(),
},
}),
}
ir_defs
} else {
panic!()
}
}
_ => {
let (defs, cleaned_rhs, _) = extract_loads_from_expression(self.rhs, 0);
let mut ir_defs: Vec<IrDef> =
defs.into_iter().map(|def| def.into_ir_load()).collect();
ir_defs.push(IrDef::Assign {
var: self.lhs.into(),
value: cleaned_rhs.into(),
});
ir_defs
}
}
}
/// Translate a `Load` into its internal IR representation.
/// Panics if right hand side expression is not a `Load`.
fn into_ir_load(self) -> IrDef {
if let Expression::Load { address, .. } = self.rhs {
IrDef::Load {
address: IrExpression::from(*address),
var: self.lhs.into(),
}
} else {
panic!()
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Jmp {
pub condition: Option<Expression>,
pub kind: JmpKind,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum JmpKind {
Call(Call),
Goto(Label),
Return(Label),
Interrupt { value: isize, return_addr: Tid },
}
impl From<Jmp> for IrJmp {
/// Translate jump types.
fn from(jmp: Jmp) -> IrJmp {
match jmp.kind {
JmpKind::Goto(Label::Direct(tid)) => IrJmp::Branch(tid),
JmpKind::Goto(Label::Indirect(expr)) => IrJmp::BranchInd(expr.into()),
JmpKind::Return(Label::Indirect(expr)) => IrJmp::Return(expr.into()),
JmpKind::Return(Label::Direct(_)) => panic!(),
JmpKind::Call(call) => {
let return_ = match call.return_ {
Some(Label::Direct(tid)) => Some(tid),
None => None,
_ => panic!(),
};
match call.target {
Label::Direct(tid) => IrJmp::Call {
target: tid,
return_,
},
Label::Indirect(expr) => IrJmp::CallInd {
target: expr.into(),
return_,
},
}
}
JmpKind::Interrupt { value, return_addr } => IrJmp::CallOther {
description: format!("Interrupt {}", value),
return_: Some(return_addr),
},
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Call {
pub target: Label,
pub return_: Option<Label>,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum Label {
Direct(Tid),
Indirect(Expression),
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Blk {
pub defs: Vec<Term<Def>>,
pub jmps: Vec<Term<Jmp>>,
}
impl From<Blk> for IrBlk {
/// Translates block types.
fn from(blk: Blk) -> IrBlk {
let mut ir_def_terms = Vec::new();
for def_term in blk.defs {
let ir_defs = def_term.term.into_ir_defs();
assert!(!ir_defs.is_empty());
if ir_defs.len() == 1 {
ir_def_terms.push(Term {
tid: def_term.tid,
term: ir_defs.into_iter().next().unwrap(),
});
} else {
for (counter, ir_def) in ir_defs.into_iter().enumerate() {
ir_def_terms.push(Term {
tid: def_term
.tid
.clone()
.with_id_suffix(&format!("_{}", counter)),
term: ir_def,
});
}
}
}
let ir_jmp_terms = blk
.jmps
.into_iter()
.map(|jmp_term| {
let (jmp, defs) = extract_loads_from_jump(jmp_term.term);
let mut ir_defs = Vec::new();
for def in defs.into_iter() {
ir_defs.append(&mut def.into_ir_defs());
}
for (counter, ir_def) in ir_defs.into_iter().enumerate() {
ir_def_terms.push(Term {
tid: jmp_term
.tid
.clone()
.with_id_suffix(&format!("_{}", counter)),
term: ir_def,
});
}
Term {
tid: jmp_term.tid,
term: jmp.into(),
}
})
.collect();
IrBlk {
defs: ir_def_terms,
jmps: ir_jmp_terms,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Sub {
pub name: String,
pub blocks: Vec<Term<Blk>>,
}
impl From<Sub> for IrSub {
/// Translate `Sub` types.
fn from(sub: Sub) -> IrSub {
let blocks = sub
.blocks
.into_iter()
.map(|block_term| Term {
tid: block_term.tid,
term: block_term.term.into(),
})
.collect();
IrSub {
name: sub.name,
blocks,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
}
impl From<Program> for IrProgram {
/// Translate program types.
fn from(program: Program) -> IrProgram {
let subs = program
.subs
.into_iter()
.map(|sub_term| Term {
tid: sub_term.tid,
term: sub_term.term.into(),
})
.collect();
IrProgram {
subs,
extern_symbols: program
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
address_base_offset: 0,
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Project {
pub program: Term<Program>,
pub cpu_architecture: String,
pub stack_pointer_register: Variable,
pub callee_saved_registers: Vec<String>,
pub parameter_registers: Vec<String>,
pub return_registers: Vec<String>,
}
impl Project {
/// Get the bitsize of pointer values for the architecture of the project.
pub fn get_pointer_bitsize(&self) -> BitSize {
self.stack_pointer_register.bitsize().unwrap()
}
/// Substitute all let-binding-expressions in the project with equivalent expressions,
/// that do not contain the let-bindings.
/// This way subsequent analyses do not have to handle expressions containing let-bindings.
pub fn replace_let_bindings(&mut self) {
for sub in self.program.term.subs.iter_mut() {
for blk in sub.term.blocks.iter_mut() {
for def in blk.term.defs.iter_mut() {
def.term.rhs.replace_let_bindings();
}
for jmp in blk.term.jmps.iter_mut() {
if let Some(ref mut condition) = jmp.term.condition {
condition.replace_let_bindings();
}
match &mut jmp.term.kind {
JmpKind::Call(call) => {
call.target.replace_let_bindings();
if let Some(ref mut return_target) = call.return_ {
return_target.replace_let_bindings();
}
}
JmpKind::Goto(label) | JmpKind::Return(label) => {
label.replace_let_bindings()
}
JmpKind::Interrupt { .. } => (),
}
}
}
}
}
}
impl From<Project> for IrProject {
/// Translate project types.
fn from(project: Project) -> IrProject {
let program = Term {
tid: project.program.tid,
term: project.program.term.into(),
};
let default_cconv = IrCallingConvention {
name: "default".to_string(),
parameter_register: project.parameter_registers,
return_register: project.return_registers,
callee_saved_register: project.callee_saved_registers,
};
IrProject {
program,
cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(),
calling_conventions: vec![default_cconv],
}
}
}
impl Label {
/// Replace let-bindings inside the expression for `Indirect` labels.
fn replace_let_bindings(&mut self) {
if let Label::Indirect(expression) = self {
expression.replace_let_bindings();
}
}
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct Arg {
pub var: Variable,
pub location: Expression,
pub intent: ArgIntent,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub enum ArgIntent {
Input,
Output,
Both,
Unknown,
}
impl ArgIntent {
pub fn is_input(&self) -> bool {
match self {
Self::Input | Self::Both | Self::Unknown => true,
Self::Output => false,
}
}
pub fn is_output(&self) -> bool {
match self {
Self::Output | Self::Both | Self::Unknown => true,
Self::Input => false,
}
}
}
impl Arg {
/// Translate extern symbol argument types.
pub fn into_ir_args(self) -> Vec<IrArg> {
match self.location {
Expression::Var(var) => vec![IrArg::Register(var.into())],
Expression::Concat { left, right } => match (*left, *right) {
(Expression::Var(var_left), Expression::Var(var_right)) => vec![
IrArg::Register(var_left.into()),
IrArg::Register(var_right.into()),
],
_ => panic!(),
},
Expression::Load {
address,
size: bitsize,
..
} => match *address {
Expression::Var(_) => vec![IrArg::Stack {
offset: 0,
size: bitsize.into(),
}],
Expression::BinOp {
op: BinOpType::PLUS,
lhs,
rhs,
} => {
assert!(matches!(*lhs, Expression::Var(_)));
let offset = if let Expression::Const(bitvec) = *rhs {
bitvec.try_to_i64().unwrap()
} else {
panic!()
};
vec![IrArg::Stack {
offset,
size: bitsize.into(),
}]
}
_ => panic!(),
},
_ => panic!(),
}
}
}
/// Substitute each `Load` subexpression with a temporary variable
/// and a `Def` containing the `Load` into said variable.
///
/// The function is recursive and the counter is needed to keep track how many `Load` expressions
/// have already been extracted (which is used to generate unique names for the temporary variables).
fn extract_loads_from_expression(expr: Expression, counter: u64) -> (Vec<Def>, Expression, u64) {
use Expression::*;
match expr {
Load {
memory,
address,
endian,
size,
} => {
let (mut defs, cleaned_address, mut counter) =
extract_loads_from_expression(*address, counter);
counter += 1;
let temp_var = Variable {
name: format!("temp_{}", counter),
type_: Type::Immediate(size),
is_temp: true,
};
defs.push(Def {
lhs: temp_var.clone(),
rhs: Load {
memory,
address: Box::new(cleaned_address),
endian,
size,
},
});
(defs, Var(temp_var), counter)
}
Var(_) | Const(_) | Unknown { .. } => (Vec::new(), expr, counter),
Let { .. } => panic!(),
Store {
memory,
address,
value,
endian,
size,
} => {
let (mut defs, cleaned_address, counter) =
extract_loads_from_expression(*address, counter);
let (mut more_defs, cleaned_value, counter) =
extract_loads_from_expression(*value, counter);
defs.append(&mut more_defs);
(
defs,
Store {
address: Box::new(cleaned_address),
value: Box::new(cleaned_value),
memory,
endian,
size,
},
counter,
)
}
IfThenElse {
condition,
true_exp,
false_exp,
} => {
let (mut defs, cleaned_cond, counter) =
extract_loads_from_expression(*condition, counter);
let (mut defs_true, cleaned_true, counter) =
extract_loads_from_expression(*true_exp, counter);
let (mut defs_false, cleaned_false, counter) =
extract_loads_from_expression(*false_exp, counter);
defs.append(&mut defs_true);
defs.append(&mut defs_false);
(
defs,
IfThenElse {
condition: Box::new(cleaned_cond),
true_exp: Box::new(cleaned_true),
false_exp: Box::new(cleaned_false),
},
counter,
)
}
BinOp { op, lhs, rhs } => {
let (mut defs, cleaned_lhs, counter) = extract_loads_from_expression(*lhs, counter);
let (mut defs_rhs, cleaned_rhs, counter) = extract_loads_from_expression(*rhs, counter);
defs.append(&mut defs_rhs);
(
defs,
BinOp {
op,
lhs: Box::new(cleaned_lhs),
rhs: Box::new(cleaned_rhs),
},
counter,
)
}
UnOp { op, arg } => {
let (defs, cleaned_arg, counter) = extract_loads_from_expression(*arg, counter);
(
defs,
UnOp {
op,
arg: Box::new(cleaned_arg),
},
counter,
)
}
Cast { kind, width, arg } => {
let (defs, cleaned_arg, counter) = extract_loads_from_expression(*arg, counter);
(
defs,
Cast {
kind,
width,
arg: Box::new(cleaned_arg),
},
counter,
)
}
Extract {
low_bit,
high_bit,
arg,
} => {
let (defs, cleaned_arg, counter) = extract_loads_from_expression(*arg, counter);
(
defs,
Extract {
low_bit,
high_bit,
arg: Box::new(cleaned_arg),
},
counter,
)
}
Concat { left, right } => {
let (mut defs, cleaned_left, counter) = extract_loads_from_expression(*left, counter);
let (mut defs_right, cleaned_right, counter) =
extract_loads_from_expression(*right, counter);
defs.append(&mut defs_right);
(
defs,
Concat {
left: Box::new(cleaned_left),
right: Box::new(cleaned_right),
},
counter,
)
}
}
}
/// Substitutes each `Load` expression in the target or conditition fields of a jump
/// with a temporary variable and a `Def` containing the `Load` into said variable.
fn extract_loads_from_jump(mut jmp: Jmp) -> (Jmp, Vec<Def>) {
let mut counter = 0;
let mut defs = Vec::new();
if let Some(condition) = jmp.condition {
let (mut new_defs, cleaned_condition, new_counter) =
extract_loads_from_expression(condition, counter);
counter = new_counter;
defs.append(&mut new_defs);
jmp.condition = Some(cleaned_condition);
}
match jmp.kind {
JmpKind::Goto(Label::Indirect(ref mut target)) => {
let (mut new_defs, cleaned_target, _) =
extract_loads_from_expression(target.clone(), counter);
defs.append(&mut new_defs);
*target = cleaned_target;
}
JmpKind::Call(ref mut call) => {
if let Label::Indirect(ref mut target) = call.target {
let (mut new_defs, cleaned_target, _) =
extract_loads_from_expression(target.clone(), counter);
defs.append(&mut new_defs);
*target = cleaned_target;
}
}
_ => (),
}
(jmp, defs)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn term_deserialization() {
let string = "{\"term\":{\"defs\":[],\"jmps\":[]},\"tid\":{\"id\":\"@block\",\"address\":\"UNKNOWN\"}}";
let tid = Tid::new("@block".to_string());
let block_term = Term {
tid,
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
};
println!("{}", serde_json::to_string(&block_term).unwrap());
assert_eq!(block_term, serde_json::from_str(&string).unwrap());
}
}
use super::{Arg, ArgIntent};
use crate::bil::*;
use crate::intermediate_representation::ExternSymbol as IrExternSymbol;
use crate::prelude::*;
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct ExternSymbol {
pub tid: Tid,
pub address: String,
pub name: String,
pub calling_convention: Option<String>,
pub arguments: Vec<Arg>,
}
impl ExternSymbol {
/// Returns the return register of an extern symbol.
/// Returns an error if the function has not exactly one return argument
/// or if the return argument is not a register.
pub fn get_unique_return_register(&self) -> Result<&crate::bil::variable::Variable, Error> {
let return_args: Vec<_> = self
.arguments
.iter()
.filter(|arg| arg.intent.is_output())
.collect();
if return_args.len() != 1 {
return Err(anyhow!(
"Wrong number of return register: Got {}, expected 1",
return_args.len()
));
}
match &return_args[0].location {
Expression::Var(var) => Ok(var),
_ => Err(anyhow!("Return location is not a register")),
}
}
/// Returns the parameter expression of an extern symbol.
/// Returns an error if the function has not exactly one parameter argument.
pub fn get_unique_parameter(&self) -> Result<&crate::bil::Expression, Error> {
let param_args: Vec<_> = self
.arguments
.iter()
.filter(|arg| arg.intent.is_input())
.collect();
if param_args.len() != 1 {
return Err(anyhow!(
"Wrong number of return register: Got {}, expected 1",
param_args.len()
));
}
Ok(&param_args[0].location)
}
}
impl From<ExternSymbol> for IrExternSymbol {
fn from(symbol: ExternSymbol) -> IrExternSymbol {
let mut parameters = Vec::new();
let mut return_values = Vec::new();
for arg in symbol.arguments.into_iter() {
if matches!(
arg.intent,
ArgIntent::Input | ArgIntent::Both | ArgIntent::Unknown
) {
for ir_arg in arg.clone().into_ir_args() {
parameters.push(ir_arg);
}
}
if matches!(
arg.intent,
ArgIntent::Output | ArgIntent::Both | ArgIntent::Unknown
) {
for ir_arg in arg.into_ir_args() {
return_values.push(ir_arg);
}
}
}
IrExternSymbol {
tid: symbol.tid,
addresses: vec![symbol.address],
name: symbol.name,
calling_convention: None, // We do not parse more than one calling convention from BAP at the moment. So we assume everything uses the standard one.
parameters,
return_values,
no_return: false, // Last time I checked BAP had an attribute for non-returning functions, but did not actually set it.
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extern_symbol_serialization() {
let symbol = ExternSymbol {
tid: Tid::new("Tid"),
address: "Somewhere".to_string(),
name: "extern_fn".to_string(),
calling_convention: Some("cconv".to_string()),
arguments: Vec::new(),
};
let json: String = serde_json::to_string_pretty(&symbol).unwrap();
println!("{}", json);
let _symbol: ExternSymbol = serde_json::from_str(&json).unwrap();
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment