Unverified Commit 41088680 by Enkelmann Committed by GitHub

fix address size of implicit stores (#170)

parent a7dccec4
...@@ -41,17 +41,17 @@ impl From<Variable> for IrExpression { ...@@ -41,17 +41,17 @@ impl From<Variable> for IrExpression {
fn from(pcode_var: Variable) -> IrExpression { fn from(pcode_var: Variable) -> IrExpression {
match (&pcode_var.name, &pcode_var.value) { match (&pcode_var.name, &pcode_var.value) {
(Some(_name), None) => IrExpression::Var(pcode_var.into()), (Some(_name), None) => IrExpression::Var(pcode_var.into()),
(None, Some(_hex_value)) => IrExpression::Const(pcode_var.parse_to_bitvector()), (None, Some(_hex_value)) => IrExpression::Const(pcode_var.parse_const_to_bitvector()),
_ => panic!("Conversion failed:\n{:?}", pcode_var), _ => panic!("Conversion failed:\n{:?}", pcode_var),
} }
} }
} }
impl Variable { impl Variable {
/// Parses a variable representing a concrete value or a concrete address to a bitvector containing the value or address. /// Parses a variable representing a concrete value to a bitvector containing the value.
pub fn parse_to_bitvector(&self) -> Bitvector { pub fn parse_const_to_bitvector(&self) -> Bitvector {
match (&self.value, &self.address) { match &self.value {
(Some(hex_value), None) | (None, Some(hex_value)) => { Some(hex_value) => {
let mut bitvector = Bitvector::from_str_radix(16, hex_value).unwrap(); let mut bitvector = Bitvector::from_str_radix(16, hex_value).unwrap();
match bitvector.width().cmp(&self.size.into()) { match bitvector.width().cmp(&self.size.into()) {
std::cmp::Ordering::Greater => bitvector.truncate(self.size).unwrap(), std::cmp::Ordering::Greater => bitvector.truncate(self.size).unwrap(),
...@@ -64,6 +64,26 @@ impl Variable { ...@@ -64,6 +64,26 @@ impl Variable {
} }
} }
/// Parses a variable representing an address to a pointer-sized bitvector containing the address.
pub fn parse_address_to_bitvector(&self, generic_pointer_size: ByteSize) -> Bitvector {
match &self.address {
Some(hex_value) => {
let mut bitvector = Bitvector::from_str_radix(16, hex_value).unwrap();
match bitvector.width().cmp(&generic_pointer_size.into()) {
std::cmp::Ordering::Greater => {
bitvector.truncate(generic_pointer_size).unwrap()
}
std::cmp::Ordering::Less => {
bitvector.zero_extend(generic_pointer_size).unwrap()
}
std::cmp::Ordering::Equal => (),
}
bitvector
}
_ => panic!(),
}
}
/// Generate a virtual variable with the given name and size. /// Generate a virtual variable with the given name and size.
pub fn new_virtual(name: impl Into<String>, size: ByteSize) -> Variable { pub fn new_virtual(name: impl Into<String>, size: ByteSize) -> Variable {
Variable { Variable {
...@@ -415,24 +435,36 @@ mod tests { ...@@ -415,24 +435,36 @@ mod tests {
size: ByteSize::new(8), size: ByteSize::new(8),
is_virtual: false, is_virtual: false,
}; };
assert_eq!(var.parse_to_bitvector(), Bitvector::from_u64(0)); assert_eq!(var.parse_const_to_bitvector(), Bitvector::from_u64(0));
var.value = Some("0010f".to_string()); var.value = Some("0010f".to_string());
assert_eq!(var.parse_to_bitvector(), Bitvector::from_u64(271)); assert_eq!(var.parse_const_to_bitvector(), Bitvector::from_u64(271));
var.value = Some("1ff".to_string()); var.value = Some("1ff".to_string());
var.size = ByteSize::new(1); var.size = ByteSize::new(1);
assert_eq!(var.parse_to_bitvector(), Bitvector::from_u8(255)); assert_eq!(var.parse_const_to_bitvector(), Bitvector::from_u8(255));
var.size = ByteSize::new(16); var.size = ByteSize::new(16);
assert_eq!(var.parse_to_bitvector(), Bitvector::from_u128(511)); assert_eq!(var.parse_const_to_bitvector(), Bitvector::from_u128(511));
var.value = Some("00_ffffffffffffffff_ffffffffffffffff".to_string()); var.value = Some("00_ffffffffffffffff_ffffffffffffffff".to_string());
var.size = ByteSize::new(16); var.size = ByteSize::new(16);
assert_eq!(var.parse_to_bitvector(), Bitvector::from_i128(-1)); assert_eq!(var.parse_const_to_bitvector(), Bitvector::from_i128(-1));
var.size = ByteSize::new(10); var.size = ByteSize::new(10);
assert_eq!( assert_eq!(
var.parse_to_bitvector(), var.parse_const_to_bitvector(),
Bitvector::from_i128(-1) Bitvector::from_i128(-1)
.into_truncate(ByteSize::new(10)) .into_truncate(ByteSize::new(10))
.unwrap() .unwrap()
); );
let var = Variable {
name: None,
value: None,
address: Some("000010f".to_string()),
size: ByteSize::new(1), // Note that this size is not the size of a pointer!
is_virtual: false,
};
assert_eq!(
var.parse_address_to_bitvector(ByteSize::new(8)),
Bitvector::from_u64(271)
);
} }
} }
...@@ -134,46 +134,48 @@ pub struct Def { ...@@ -134,46 +134,48 @@ pub struct Def {
pub rhs: Expression, pub rhs: Expression,
} }
impl From<Def> for IrDef { impl Def {
/// Convert a P-Code instruction to the internally used IR. /// Convert a P-Code instruction to the internally used IR.
fn from(def: Def) -> IrDef { pub fn into_ir_def(self, generic_pointer_size: ByteSize) -> IrDef {
use super::ExpressionType::*; use super::ExpressionType::*;
match def.rhs.mnemonic { match self.rhs.mnemonic {
LOAD => IrDef::Load { LOAD => IrDef::Load {
var: def.lhs.unwrap().into(), var: self.lhs.unwrap().into(),
address: def.rhs.input1.unwrap().into(), address: self.rhs.input1.unwrap().into(),
}, },
STORE => IrDef::Store { STORE => IrDef::Store {
address: def.rhs.input1.unwrap().into(), address: self.rhs.input1.unwrap().into(),
value: def.rhs.input2.unwrap().into(), value: self.rhs.input2.unwrap().into(),
}, },
SUBPIECE => IrDef::Assign { SUBPIECE => IrDef::Assign {
var: def.lhs.clone().unwrap().into(), var: self.lhs.clone().unwrap().into(),
value: IrExpression::Subpiece { value: IrExpression::Subpiece {
low_byte: def.rhs.input1.unwrap().parse_to_bytesize(), low_byte: self.rhs.input1.unwrap().parse_to_bytesize(),
size: def.lhs.unwrap().size, size: self.lhs.unwrap().size,
arg: Box::new(def.rhs.input0.unwrap().into()), arg: Box::new(self.rhs.input0.unwrap().into()),
}, },
}, },
INT_ZEXT | INT_SEXT | INT2FLOAT | FLOAT2FLOAT | TRUNC | POPCOUNT => IrDef::Assign { INT_ZEXT | INT_SEXT | INT2FLOAT | FLOAT2FLOAT | TRUNC | POPCOUNT => IrDef::Assign {
var: def.lhs.clone().unwrap().into(), var: self.lhs.clone().unwrap().into(),
value: IrExpression::Cast { value: IrExpression::Cast {
op: def.rhs.mnemonic.into(), op: self.rhs.mnemonic.into(),
size: def.lhs.unwrap().size, size: self.lhs.unwrap().size,
arg: Box::new(def.rhs.input0.unwrap().into()), arg: Box::new(self.rhs.input0.unwrap().into()),
}, },
}, },
_ => { _ => {
let target_var = def.lhs.unwrap(); let target_var = self.lhs.unwrap();
if target_var.address.is_some() { if target_var.address.is_some() {
IrDef::Store { IrDef::Store {
address: IrExpression::Const(target_var.parse_to_bitvector()), address: IrExpression::Const(
value: def.rhs.into(), target_var.parse_address_to_bitvector(generic_pointer_size),
),
value: self.rhs.into(),
} }
} else { } else {
IrDef::Assign { IrDef::Assign {
var: target_var.into(), var: target_var.into(),
value: def.rhs.into(), value: self.rhs.into(),
} }
} }
} }
...@@ -190,23 +192,23 @@ pub struct Blk { ...@@ -190,23 +192,23 @@ pub struct Blk {
pub jmps: Vec<Term<Jmp>>, pub jmps: Vec<Term<Jmp>>,
} }
impl From<Blk> for IrBlk { impl Blk {
/// Convert a P-Code block to the internally used IR. /// Convert a P-Code block to the internally used IR.
fn from(blk: Blk) -> IrBlk { pub fn into_ir_blk(self, generic_pointer_size: ByteSize) -> IrBlk {
let defs: Vec<Term<IrDef>> = blk let defs: Vec<Term<IrDef>> = self
.defs .defs
.into_iter() .into_iter()
.map(|def_term| Term { .map(|def_term| Term {
tid: def_term.tid, tid: def_term.tid,
term: def_term.term.into(), term: def_term.term.into_ir_def(generic_pointer_size),
}) })
.collect(); .collect();
let indirect_jmp_targets = blk let indirect_jmp_targets = self
.jmps .jmps
.iter() .iter()
.find_map(|jmp_term| jmp_term.term.target_hints.clone()) .find_map(|jmp_term| jmp_term.term.target_hints.clone())
.unwrap_or_default(); .unwrap_or_default();
let jmps: Vec<Term<IrJmp>> = blk let jmps: Vec<Term<IrJmp>> = self
.jmps .jmps
.into_iter() .into_iter()
.map(|jmp_term| Term { .map(|jmp_term| Term {
...@@ -327,42 +329,42 @@ pub struct Sub { ...@@ -327,42 +329,42 @@ pub struct Sub {
pub blocks: Vec<Term<Blk>>, pub blocks: Vec<Term<Blk>>,
} }
impl From<Term<Sub>> for Term<IrSub> { impl Term<Sub> {
/// Convert a `Sub` term in the P-Code representation to a `Sub` term in the intermediate representation. /// Convert a `Sub` term in the P-Code representation to a `Sub` term in the intermediate representation.
/// The conversion also repairs the order of the basic blocks in the `blocks` array of the `Sub` /// The conversion also repairs the order of the basic blocks in the `blocks` array of the `Sub`
/// in the sense that the first block of the array is required to also be the function entry point /// in the sense that the first block of the array is required to also be the function entry point
/// after the conversion. /// after the conversion.
fn from(mut sub: Term<Sub>) -> Term<IrSub> { pub fn into_ir_sub_term(mut self, generic_pointer_size: ByteSize) -> Term<IrSub> {
// Since the intermediate representation expects that the first block of a function is its entry point, // Since the intermediate representation expects that the first block of a function is its entry point,
// we have to make sure that this actually holds. // we have to make sure that this actually holds.
if !sub.term.blocks.is_empty() && sub.tid.address != sub.term.blocks[0].tid.address { if !self.term.blocks.is_empty() && self.tid.address != self.term.blocks[0].tid.address {
let mut start_block_index = None; let mut start_block_index = None;
for (i, block) in sub.term.blocks.iter().enumerate() { for (i, block) in self.term.blocks.iter().enumerate() {
if block.tid.address == sub.tid.address { if block.tid.address == self.tid.address {
start_block_index = Some(i); start_block_index = Some(i);
break; break;
} }
} }
if let Some(start_block_index) = start_block_index { if let Some(start_block_index) = start_block_index {
sub.term.blocks.swap(0, start_block_index); self.term.blocks.swap(0, start_block_index);
} else { } else {
panic!("Non-empty function without correct starting block encountered. Name: {}, TID: {}", sub.term.name, sub.tid); panic!("Non-empty function without correct starting block encountered. Name: {}, TID: {}", self.term.name, self.tid);
} }
} }
let blocks = sub let blocks = self
.term .term
.blocks .blocks
.into_iter() .into_iter()
.map(|block_term| Term { .map(|block_term| Term {
tid: block_term.tid, tid: block_term.tid,
term: block_term.term.into(), term: block_term.term.into_ir_blk(generic_pointer_size),
}) })
.collect(); .collect();
Term { Term {
tid: sub.tid, tid: self.tid,
term: IrSub { term: IrSub {
name: sub.term.name, name: self.term.name,
blocks, blocks,
}, },
} }
...@@ -457,8 +459,16 @@ impl Program { ...@@ -457,8 +459,16 @@ impl Program {
/// It is needed to detect whether Ghidra added a constant offset to all addresses of the memory address. /// It is needed to detect whether Ghidra added a constant offset to all addresses of the memory address.
/// E.g. if the `binary_base_address` is 0 for shared object files, /// E.g. if the `binary_base_address` is 0 for shared object files,
/// Ghidra adds an offset so that the memory image does not actually start at address 0. /// Ghidra adds an offset so that the memory image does not actually start at address 0.
pub fn into_ir_program(self, binary_base_address: u64) -> IrProgram { pub fn into_ir_program(
let subs = self.subs.into_iter().map(|sub| sub.into()).collect(); self,
binary_base_address: u64,
generic_pointer_size: ByteSize,
) -> IrProgram {
let subs = self
.subs
.into_iter()
.map(|sub| sub.into_ir_sub_term(generic_pointer_size))
.collect();
let extern_symbols = self let extern_symbols = self
.extern_symbols .extern_symbols
.into_iter() .into_iter()
...@@ -525,7 +535,10 @@ impl Project { ...@@ -525,7 +535,10 @@ impl Project {
pub fn into_ir_project(self, binary_base_address: u64) -> IrProject { pub fn into_ir_project(self, binary_base_address: u64) -> IrProject {
let mut program: Term<IrProgram> = Term { let mut program: Term<IrProgram> = Term {
tid: self.program.tid, tid: self.program.tid,
term: self.program.term.into_ir_program(binary_base_address), term: self
.program
.term
.into_ir_program(binary_base_address, self.stack_pointer_register.size),
}; };
let register_map: HashMap<&String, &RegisterProperties> = self let register_map: HashMap<&String, &RegisterProperties> = self
.register_properties .register_properties
......
...@@ -396,7 +396,7 @@ fn def_deserialization() { ...@@ -396,7 +396,7 @@ fn def_deserialization() {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrDef = def.into(); let _: IrDef = def.into_ir_def(ByteSize::new(8));
let def: Def = serde_json::from_str( let def: Def = serde_json::from_str(
r#" r#"
{ {
...@@ -422,7 +422,7 @@ fn def_deserialization() { ...@@ -422,7 +422,7 @@ fn def_deserialization() {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrDef = def.into(); let _: IrDef = def.into_ir_def(ByteSize::new(8));
} }
#[test] #[test]
...@@ -463,7 +463,7 @@ fn jmp_deserialization() { ...@@ -463,7 +463,7 @@ fn jmp_deserialization() {
fn blk_deserialization() { fn blk_deserialization() {
let setup = Setup::new(); let setup = Setup::new();
let block_term: Term<Blk> = setup.blk_t.clone(); let block_term: Term<Blk> = setup.blk_t.clone();
let _: IrBlk = block_term.term.into(); let _: IrBlk = block_term.term.into_ir_blk(ByteSize::new(8));
} }
#[test] #[test]
...@@ -503,7 +503,7 @@ fn arg_deserialization() { ...@@ -503,7 +503,7 @@ fn arg_deserialization() {
fn sub_deserialization() { fn sub_deserialization() {
let setup = Setup::new(); let setup = Setup::new();
let sub_term: Term<Sub> = setup.sub_t.clone(); let sub_term: Term<Sub> = setup.sub_t.clone();
let _: Term<IrSub> = sub_term.into(); let _: Term<IrSub> = sub_term.into_ir_sub_term(ByteSize::new(8));
let sub_term: Term<Sub> = serde_json::from_str( let sub_term: Term<Sub> = serde_json::from_str(
r#" r#"
{ {
...@@ -542,7 +542,7 @@ fn sub_deserialization() { ...@@ -542,7 +542,7 @@ fn sub_deserialization() {
.unwrap(); .unwrap();
// Example has special case where the starting block has to be corrected // Example has special case where the starting block has to be corrected
assert!(sub_term.tid.address != sub_term.term.blocks[0].tid.address); assert!(sub_term.tid.address != sub_term.term.blocks[0].tid.address);
let ir_sub: Term<IrSub> = sub_term.into(); let ir_sub: Term<IrSub> = sub_term.into_ir_sub_term(ByteSize::new(8));
assert_eq!(ir_sub.tid.address, ir_sub.term.blocks[0].tid.address); assert_eq!(ir_sub.tid.address, ir_sub.term.blocks[0].tid.address);
} }
...@@ -608,7 +608,7 @@ fn program_deserialization() { ...@@ -608,7 +608,7 @@ fn program_deserialization() {
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrProgram = program_term.term.into_ir_program(10000); let _: IrProgram = program_term.term.into_ir_program(10000, ByteSize::new(8));
} }
#[test] #[test]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment