Bugfix function entry points (#160)

43b56cd3 · Enkelmann · GitHub · c1b99c33 · 43b56cd3 · 43b56cd3
Unverified Commit 43b56cd3 authored Mar 24, 2021 by Enkelmann Committed by GitHub Mar 24, 2021
4 changed files
--- a/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs
@@ -147,11 +147,16 @@ impl IntervalDomain {
    /// The result is only exact if the `rhs` interval contains exactly one value.
    pub fn shift_left(&self, rhs: &Self) -> Self {
        if rhs.interval.start == rhs.interval.end {
+            let shift_amount = rhs.interval.start.try_to_u64().unwrap() as usize;
+            if shift_amount < self.bytesize().as_bit_length() {
                let multiplicator = Bitvector::one(self.bytesize().into())
-                .into_checked_shl(rhs.interval.start.try_to_u64().unwrap() as usize)
+                    .into_checked_shl(shift_amount)
                    .unwrap();
                self.signed_mul(&multiplicator.into())
            } else {
+                Bitvector::zero(self.bytesize().into()).into()
+            }
+        } else {
            Self::new_top(self.bytesize())
        }
    }

--- a/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
@@ -310,4 +310,8 @@ fn shift_left() {
        result,
        IntervalDomain::mock_i8_with_bounds(None, 6, 8, None)
    );
+    let lhs = IntervalDomain::mock_with_bounds(Some(2), 3, 4, Some(64));
+    let rhs = IntervalDomain::mock_i8_with_bounds(None, 127, 127, None);
+    let result = lhs.bin_op(BinOpType::IntLeft, &rhs);
+    assert_eq!(result, IntervalDomain::mock(0, 0));
 }
--- a/src/cwe_checker_lib/src/pcode/term.rs
+++ b/src/cwe_checker_lib/src/pcode/term.rs
@@ -318,13 +318,36 @@ pub struct Sub {
    /// The name of the function.
    pub name: String,
    /// The basic blocks of the function.
-    /// The first block of the array is also the entry point into the function.
+    ///
+    /// Note that the first block of the array may *not* be the function entry point!
    pub blocks: Vec<Term<Blk>>,
 }
-impl From<Sub> for IrSub {
+impl From<Term<Sub>> for Term<IrSub> {
-    fn from(sub: Sub) -> IrSub {
+    /// Convert a `Sub` term in the P-Code representation to a `Sub` term in the intermediate representation.
+    /// The conversion also repairs the order of the basic blocks in the `blocks` array of the `Sub`
+    /// in the sense that the first block of the array is required to also be the function entry point
+    /// after the conversion.
+    fn from(mut sub: Term<Sub>) -> Term<IrSub> {
+        // Since the intermediate representation expects that the first block of a function is its entry point,
+        // we have to make sure that this actually holds.
+        if !sub.term.blocks.is_empty() && sub.tid.address != sub.term.blocks[0].tid.address {
+            let mut start_block_index = None;
+            for (i, block) in sub.term.blocks.iter().enumerate() {
+                if block.tid.address == sub.tid.address {
+                    start_block_index = Some(i);
+                    break;
+                }
+            }
+            if let Some(start_block_index) = start_block_index {
+                sub.term.blocks.swap(0, start_block_index);
+            } else {
+                panic!("Non-empty function without correct starting block encountered. Name: {}, TID: {}", sub.term.name, sub.tid);
+            }
+        }
        let blocks = sub
+            .term
            .blocks
            .into_iter()
            .map(|block_term| Term {
@@ -332,9 +355,12 @@ impl From<Sub> for IrSub {
                term: block_term.term.into(),
            })
            .collect();
-        IrSub {
+        Term {
-            name: sub.name,
+            tid: sub.tid,
+            term: IrSub {
+                name: sub.term.name,
                blocks,
+            },
        }
    }
 }
@@ -428,14 +454,7 @@ impl Program {
    /// E.g. if the `binary_base_address` is 0 for shared object files,
    /// Ghidra adds an offset so that the memory image does not actually start at address 0.
    pub fn into_ir_program(self, binary_base_address: u64) -> IrProgram {
-        let subs = self
+        let subs = self.subs.into_iter().map(|sub| sub.into()).collect();
-            .subs
-            .into_iter()
-            .map(|sub_term| Term {
-                tid: sub_term.tid,
-                term: sub_term.term.into(),
-            })
-            .collect();
        let extern_symbols = self
            .extern_symbols
            .into_iter()

--- a/src/cwe_checker_lib/src/pcode/term/tests.rs
+++ b/src/cwe_checker_lib/src/pcode/term/tests.rs
@@ -503,7 +503,47 @@ fn arg_deserialization() {
 fn sub_deserialization() {
    let setup = Setup::new();
    let sub_term: Term<Sub> = setup.sub_t.clone();
-    let _: IrSub = sub_term.term.into();
+    let _: Term<IrSub> = sub_term.into();
+    let sub_term: Term<Sub> = serde_json::from_str(
+        r#"
+          {
+          "tid": {
+              "id": "sub_00101000",
+              "address": "00101000"
+          },
+          "term": {
+              "name": "sub_name",
+              "blocks": [
+                {
+                  "tid": {
+                      "id": "blk_0010030",
+                      "address": "00100030"
+                  },
+                  "term": {
+                      "defs": [],
+                      "jmps": []
+                  }
+                },
+                {
+                  "tid": {
+                      "id": "blk_00101000",
+                      "address": "00101000"
+                  },
+                  "term": {
+                      "defs": [],
+                      "jmps": []
+                  }
+                }
+              ]
+          }
+          }
+          "#,
+    )
+    .unwrap();
+    // Example has special case where the starting block has to be corrected
+    assert!(sub_term.tid.address != sub_term.term.blocks[0].tid.address);
+    let ir_sub: Term<IrSub> = sub_term.into();
+    assert_eq!(ir_sub.tid.address, ir_sub.term.blocks[0].tid.address);
 }
 #[test]