From 43b56cd38d9dd13e9345eb63ed6f3d7fc1e31bd1 Mon Sep 17 00:00:00 2001 From: Enkelmann <46347022+Enkelmann@users.noreply.github.com> Date: Wed, 24 Mar 2021 10:19:11 +0100 Subject: [PATCH] Bugfix function entry points (#160) --- src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs | 13 +++++++++---- src/cwe_checker_lib/src/abstract_domain/interval/tests.rs | 4 ++++ src/cwe_checker_lib/src/pcode/term.rs | 47 +++++++++++++++++++++++++++++++++-------------- src/cwe_checker_lib/src/pcode/term/tests.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 87 insertions(+), 19 deletions(-) diff --git a/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs b/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs index c0150fd..6ef50bf 100644 --- a/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs +++ b/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs @@ -147,10 +147,15 @@ impl IntervalDomain { /// The result is only exact if the `rhs` interval contains exactly one value. pub fn shift_left(&self, rhs: &Self) -> Self { if rhs.interval.start == rhs.interval.end { - let multiplicator = Bitvector::one(self.bytesize().into()) - .into_checked_shl(rhs.interval.start.try_to_u64().unwrap() as usize) - .unwrap(); - self.signed_mul(&multiplicator.into()) + let shift_amount = rhs.interval.start.try_to_u64().unwrap() as usize; + if shift_amount < self.bytesize().as_bit_length() { + let multiplicator = Bitvector::one(self.bytesize().into()) + .into_checked_shl(shift_amount) + .unwrap(); + self.signed_mul(&multiplicator.into()) + } else { + Bitvector::zero(self.bytesize().into()).into() + } } else { Self::new_top(self.bytesize()) } diff --git a/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs b/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs index b2bc6be..b8d1ee3 100644 --- a/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs +++ b/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs @@ -310,4 +310,8 @@ fn shift_left() { result, IntervalDomain::mock_i8_with_bounds(None, 6, 8, None) ); + let lhs = IntervalDomain::mock_with_bounds(Some(2), 3, 4, Some(64)); + let rhs = IntervalDomain::mock_i8_with_bounds(None, 127, 127, None); + let result = lhs.bin_op(BinOpType::IntLeft, &rhs); + assert_eq!(result, IntervalDomain::mock(0, 0)); } diff --git a/src/cwe_checker_lib/src/pcode/term.rs b/src/cwe_checker_lib/src/pcode/term.rs index e2e3817..ea39aef 100644 --- a/src/cwe_checker_lib/src/pcode/term.rs +++ b/src/cwe_checker_lib/src/pcode/term.rs @@ -318,13 +318,36 @@ pub struct Sub { /// The name of the function. pub name: String, /// The basic blocks of the function. - /// The first block of the array is also the entry point into the function. + /// + /// Note that the first block of the array may *not* be the function entry point! pub blocks: Vec<Term<Blk>>, } -impl From<Sub> for IrSub { - fn from(sub: Sub) -> IrSub { +impl From<Term<Sub>> for Term<IrSub> { + /// Convert a `Sub` term in the P-Code representation to a `Sub` term in the intermediate representation. + /// The conversion also repairs the order of the basic blocks in the `blocks` array of the `Sub` + /// in the sense that the first block of the array is required to also be the function entry point + /// after the conversion. + fn from(mut sub: Term<Sub>) -> Term<IrSub> { + // Since the intermediate representation expects that the first block of a function is its entry point, + // we have to make sure that this actually holds. + if !sub.term.blocks.is_empty() && sub.tid.address != sub.term.blocks[0].tid.address { + let mut start_block_index = None; + for (i, block) in sub.term.blocks.iter().enumerate() { + if block.tid.address == sub.tid.address { + start_block_index = Some(i); + break; + } + } + if let Some(start_block_index) = start_block_index { + sub.term.blocks.swap(0, start_block_index); + } else { + panic!("Non-empty function without correct starting block encountered. Name: {}, TID: {}", sub.term.name, sub.tid); + } + } + let blocks = sub + .term .blocks .into_iter() .map(|block_term| Term { @@ -332,9 +355,12 @@ impl From<Sub> for IrSub { term: block_term.term.into(), }) .collect(); - IrSub { - name: sub.name, - blocks, + Term { + tid: sub.tid, + term: IrSub { + name: sub.term.name, + blocks, + }, } } } @@ -428,14 +454,7 @@ impl Program { /// E.g. if the `binary_base_address` is 0 for shared object files, /// Ghidra adds an offset so that the memory image does not actually start at address 0. pub fn into_ir_program(self, binary_base_address: u64) -> IrProgram { - let subs = self - .subs - .into_iter() - .map(|sub_term| Term { - tid: sub_term.tid, - term: sub_term.term.into(), - }) - .collect(); + let subs = self.subs.into_iter().map(|sub| sub.into()).collect(); let extern_symbols = self .extern_symbols .into_iter() diff --git a/src/cwe_checker_lib/src/pcode/term/tests.rs b/src/cwe_checker_lib/src/pcode/term/tests.rs index 5b4a939..5270089 100644 --- a/src/cwe_checker_lib/src/pcode/term/tests.rs +++ b/src/cwe_checker_lib/src/pcode/term/tests.rs @@ -503,7 +503,47 @@ fn arg_deserialization() { fn sub_deserialization() { let setup = Setup::new(); let sub_term: Term<Sub> = setup.sub_t.clone(); - let _: IrSub = sub_term.term.into(); + let _: Term<IrSub> = sub_term.into(); + let sub_term: Term<Sub> = serde_json::from_str( + r#" + { + "tid": { + "id": "sub_00101000", + "address": "00101000" + }, + "term": { + "name": "sub_name", + "blocks": [ + { + "tid": { + "id": "blk_0010030", + "address": "00100030" + }, + "term": { + "defs": [], + "jmps": [] + } + }, + { + "tid": { + "id": "blk_00101000", + "address": "00101000" + }, + "term": { + "defs": [], + "jmps": [] + } + } + ] + } + } + "#, + ) + .unwrap(); + // Example has special case where the starting block has to be corrected + assert!(sub_term.tid.address != sub_term.term.blocks[0].tid.address); + let ir_sub: Term<IrSub> = sub_term.into(); + assert_eq!(ir_sub.tid.address, ir_sub.term.blocks[0].tid.address); } #[test] -- libgit2 0.26.0