From 43b56cd38d9dd13e9345eb63ed6f3d7fc1e31bd1 Mon Sep 17 00:00:00 2001
From: Enkelmann <46347022+Enkelmann@users.noreply.github.com>
Date: Wed, 24 Mar 2021 10:19:11 +0100
Subject: [PATCH] Bugfix function entry points (#160)

---
 src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs | 13 +++++++++----
 src/cwe_checker_lib/src/abstract_domain/interval/tests.rs   |  4 ++++
 src/cwe_checker_lib/src/pcode/term.rs                       | 47 +++++++++++++++++++++++++++++++++--------------
 src/cwe_checker_lib/src/pcode/term/tests.rs                 | 42 +++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 87 insertions(+), 19 deletions(-)

diff --git a/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs b/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs
index c0150fd..6ef50bf 100644
--- a/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/interval/bin_ops.rs
@@ -147,10 +147,15 @@ impl IntervalDomain {
     /// The result is only exact if the `rhs` interval contains exactly one value.
     pub fn shift_left(&self, rhs: &Self) -> Self {
         if rhs.interval.start == rhs.interval.end {
-            let multiplicator = Bitvector::one(self.bytesize().into())
-                .into_checked_shl(rhs.interval.start.try_to_u64().unwrap() as usize)
-                .unwrap();
-            self.signed_mul(&multiplicator.into())
+            let shift_amount = rhs.interval.start.try_to_u64().unwrap() as usize;
+            if shift_amount < self.bytesize().as_bit_length() {
+                let multiplicator = Bitvector::one(self.bytesize().into())
+                    .into_checked_shl(shift_amount)
+                    .unwrap();
+                self.signed_mul(&multiplicator.into())
+            } else {
+                Bitvector::zero(self.bytesize().into()).into()
+            }
         } else {
             Self::new_top(self.bytesize())
         }
diff --git a/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs b/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
index b2bc6be..b8d1ee3 100644
--- a/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
@@ -310,4 +310,8 @@ fn shift_left() {
         result,
         IntervalDomain::mock_i8_with_bounds(None, 6, 8, None)
     );
+    let lhs = IntervalDomain::mock_with_bounds(Some(2), 3, 4, Some(64));
+    let rhs = IntervalDomain::mock_i8_with_bounds(None, 127, 127, None);
+    let result = lhs.bin_op(BinOpType::IntLeft, &rhs);
+    assert_eq!(result, IntervalDomain::mock(0, 0));
 }
diff --git a/src/cwe_checker_lib/src/pcode/term.rs b/src/cwe_checker_lib/src/pcode/term.rs
index e2e3817..ea39aef 100644
--- a/src/cwe_checker_lib/src/pcode/term.rs
+++ b/src/cwe_checker_lib/src/pcode/term.rs
@@ -318,13 +318,36 @@ pub struct Sub {
     /// The name of the function.
     pub name: String,
     /// The basic blocks of the function.
-    /// The first block of the array is also the entry point into the function.
+    ///
+    /// Note that the first block of the array may *not* be the function entry point!
     pub blocks: Vec<Term<Blk>>,
 }
 
-impl From<Sub> for IrSub {
-    fn from(sub: Sub) -> IrSub {
+impl From<Term<Sub>> for Term<IrSub> {
+    /// Convert a `Sub` term in the P-Code representation to a `Sub` term in the intermediate representation.
+    /// The conversion also repairs the order of the basic blocks in the `blocks` array of the `Sub`
+    /// in the sense that the first block of the array is required to also be the function entry point
+    /// after the conversion.
+    fn from(mut sub: Term<Sub>) -> Term<IrSub> {
+        // Since the intermediate representation expects that the first block of a function is its entry point,
+        // we have to make sure that this actually holds.
+        if !sub.term.blocks.is_empty() && sub.tid.address != sub.term.blocks[0].tid.address {
+            let mut start_block_index = None;
+            for (i, block) in sub.term.blocks.iter().enumerate() {
+                if block.tid.address == sub.tid.address {
+                    start_block_index = Some(i);
+                    break;
+                }
+            }
+            if let Some(start_block_index) = start_block_index {
+                sub.term.blocks.swap(0, start_block_index);
+            } else {
+                panic!("Non-empty function without correct starting block encountered. Name: {}, TID: {}", sub.term.name, sub.tid);
+            }
+        }
+
         let blocks = sub
+            .term
             .blocks
             .into_iter()
             .map(|block_term| Term {
@@ -332,9 +355,12 @@ impl From<Sub> for IrSub {
                 term: block_term.term.into(),
             })
             .collect();
-        IrSub {
-            name: sub.name,
-            blocks,
+        Term {
+            tid: sub.tid,
+            term: IrSub {
+                name: sub.term.name,
+                blocks,
+            },
         }
     }
 }
@@ -428,14 +454,7 @@ impl Program {
     /// E.g. if the `binary_base_address` is 0 for shared object files,
     /// Ghidra adds an offset so that the memory image does not actually start at address 0.
     pub fn into_ir_program(self, binary_base_address: u64) -> IrProgram {
-        let subs = self
-            .subs
-            .into_iter()
-            .map(|sub_term| Term {
-                tid: sub_term.tid,
-                term: sub_term.term.into(),
-            })
-            .collect();
+        let subs = self.subs.into_iter().map(|sub| sub.into()).collect();
         let extern_symbols = self
             .extern_symbols
             .into_iter()
diff --git a/src/cwe_checker_lib/src/pcode/term/tests.rs b/src/cwe_checker_lib/src/pcode/term/tests.rs
index 5b4a939..5270089 100644
--- a/src/cwe_checker_lib/src/pcode/term/tests.rs
+++ b/src/cwe_checker_lib/src/pcode/term/tests.rs
@@ -503,7 +503,47 @@ fn arg_deserialization() {
 fn sub_deserialization() {
     let setup = Setup::new();
     let sub_term: Term<Sub> = setup.sub_t.clone();
-    let _: IrSub = sub_term.term.into();
+    let _: Term<IrSub> = sub_term.into();
+    let sub_term: Term<Sub> = serde_json::from_str(
+        r#"
+          {
+          "tid": {
+              "id": "sub_00101000",
+              "address": "00101000"
+          },
+          "term": {
+              "name": "sub_name",
+              "blocks": [
+                {
+                  "tid": {
+                      "id": "blk_0010030",
+                      "address": "00100030"
+                  },
+                  "term": {
+                      "defs": [],
+                      "jmps": []
+                  }
+                },
+                {
+                  "tid": {
+                      "id": "blk_00101000",
+                      "address": "00101000"
+                  },
+                  "term": {
+                      "defs": [],
+                      "jmps": []
+                  }
+                }
+              ]
+          }
+          }
+          "#,
+    )
+    .unwrap();
+    // Example has special case where the starting block has to be corrected
+    assert!(sub_term.tid.address != sub_term.term.blocks[0].tid.address);
+    let ir_sub: Term<IrSub> = sub_term.into();
+    assert_eq!(ir_sub.tid.address, ir_sub.term.blocks[0].tid.address);
 }
 
 #[test]
--
libgit2 0.26.0