Bugfixes (#165)

2ba249af · Enkelmann · GitHub · edd68498 · 2ba249af · 2ba249af
Unverified Commit 2ba249af authored Apr 19, 2021 by Enkelmann Committed by GitHub Apr 19, 2021
6 changed files
--- a/src/caller/src/main.rs
+++ b/src/caller/src/main.rs
@@ -113,9 +113,9 @@ fn run_with_ghidra(args: CmdlineArgs) {
            binary_file_path.display()
        )
    });
-    let mut project = get_project_from_ghidra(&binary_file_path, &binary[..], args.quiet);
+    let (mut project, mut all_logs) = get_project_from_ghidra(&binary_file_path, &binary[..]);
    // Normalize the project and gather log messages generated from it.
-    let mut all_logs = project.normalize();
+    all_logs.append(&mut project.normalize());
    // Generate the representation of the runtime memory image of the binary
    let mut runtime_memory_image = RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
@@ -206,7 +206,7 @@ fn filter_modules_for_partial_run(
 }
 /// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure.
-fn get_project_from_ghidra(file_path: &Path, binary: &[u8], quiet_flag: bool) -> Project {
+fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<LogMessage>) {
    let ghidra_path: std::path::PathBuf =
        serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone())
            .expect("Path to Ghidra not configured.");
@@ -298,14 +298,11 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8], quiet_flag: bool) ->
    let mut project_pcode: cwe_checker_lib::pcode::Project =
        serde_json::from_reader(std::io::BufReader::new(file)).unwrap();
-    project_pcode.normalize();
+    let mut log_messages = project_pcode.normalize();
    let project: Project = match cwe_checker_lib::utils::get_binary_base_address(binary) {
        Ok(binary_base_address) => project_pcode.into_ir_project(binary_base_address),
        Err(_err) => {
-            if !quiet_flag {
+            log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
-                let log = LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback.");
-                println!("{}", log);
-            }
            let mut project = project_pcode.into_ir_project(0);
            // Setting the address_base_offset to zero is a hack, which worked for the tested PE files.
            // But this hack will probably not work in general!
@@ -320,5 +317,5 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8], quiet_flag: bool) ->
    std::fs::remove_file(fifo_path).unwrap();
-    project
+    (project, log_messages)
 }
--- a/src/cwe_checker_lib/src/abstract_domain/bitvector.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/bitvector.rs
@@ -80,11 +80,14 @@ impl RegisterDomain for BitvectorDomain {
        if let BitvectorDomain::Value(bitvec) = self {
            match bitvec.un_op(op) {
                Ok(val) => BitvectorDomain::Value(val),
-                Err(_) => BitvectorDomain::new_top(self.bytesize()),
+                Err(_) => match op {
+                    BoolNegate | FloatNaN => BitvectorDomain::new_top(ByteSize::new(1)),
+                    _ => BitvectorDomain::new_top(self.bytesize()),
+                },
            }
        } else {
            match op {
-                BoolNegate => BitvectorDomain::new_top(ByteSize::new(1)),
+                BoolNegate | FloatNaN => BitvectorDomain::new_top(ByteSize::new(1)),
                _ => BitvectorDomain::new_top(self.bytesize()),
            }
        }
@@ -280,4 +283,12 @@ mod tests {
            BitvectorDomain::Value(Bitvector::from_i64(-1))
        );
    }
+    #[test]
+    fn float_nan_bytesize() {
+        let top_value = BitvectorDomain::new_top(ByteSize::new(8));
+        let result = top_value.un_op(UnOpType::FloatNaN);
+        assert!(result.is_top());
+        assert_eq!(result.bytesize(), ByteSize::new(1));
+    }
 }
--- a/src/cwe_checker_lib/src/abstract_domain/data.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/data.rs
@@ -201,7 +201,10 @@ impl<T: RegisterDomain> RegisterDomain for DataDomain<T> {
        if let Self::Value(value) = self {
            Self::Value(value.un_op(op))
        } else {
-            Self::new_top(self.bytesize())
+            match op {
+                UnOpType::BoolNegate | UnOpType::FloatNaN => Self::new_top(ByteSize::new(1)),
+                _ => Self::new_top(self.bytesize()),
+            }
        }
    }
@@ -423,4 +426,12 @@ mod tests {
        data.remove_ids(&ids_to_remove);
        assert_eq!(data, bv(42).into());
    }
+    #[test]
+    fn float_nan_bytesize() {
+        let top_value: DataDomain<BitvectorDomain> = DataDomain::new_top(ByteSize::new(8));
+        let result = top_value.un_op(UnOpType::FloatNaN);
+        assert!(result.is_top());
+        assert_eq!(result.bytesize(), ByteSize::new(1));
+    }
 }
--- a/src/cwe_checker_lib/src/abstract_domain/interval.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/interval.rs
@@ -440,9 +440,10 @@ impl RegisterDomain for IntervalDomain {
                    IntervalDomain::new_top(self.bytesize())
                }
            }
-            FloatAbs | FloatCeil | FloatFloor | FloatNaN | FloatNegate | FloatRound | FloatSqrt => {
+            FloatAbs | FloatCeil | FloatFloor | FloatNegate | FloatRound | FloatSqrt => {
                IntervalDomain::new_top(self.bytesize())
            }
+            FloatNaN => IntervalDomain::new_top(ByteSize::new(1)),
        }
    }

--- a/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
+++ b/src/cwe_checker_lib/src/abstract_domain/interval/tests.rs
@@ -503,3 +503,11 @@ fn intersection() {
    );
    assert!(interval1.intersect(&IntervalDomain::mock(50, 55)).is_err());
 }
+#[test]
+fn float_nan_bytesize() {
+    let top_value = IntervalDomain::new_top(ByteSize::new(8));
+    let result = top_value.un_op(UnOpType::FloatNaN);
+    assert!(result.is_top());
+    assert_eq!(result.bytesize(), ByteSize::new(1));
+}
--- a/src/cwe_checker_lib/src/pcode/term.rs
+++ b/src/cwe_checker_lib/src/pcode/term.rs
@@ -13,6 +13,7 @@ use crate::intermediate_representation::Program as IrProgram;
 use crate::intermediate_representation::Project as IrProject;
 use crate::intermediate_representation::Sub as IrSub;
 use crate::prelude::*;
+use crate::utils::log::LogMessage;
 // TODO: Handle the case where an indirect tail call is represented by CALLIND plus RETURN
@@ -641,7 +642,18 @@ impl Project {
    ///
    /// Ghidra generates implicit loads for memory accesses, whose address is a constant.
    /// The pass converts them to explicit `LOAD` instructions.
-    pub fn normalize(&mut self) {
+    ///
+    /// ### Remove basic blocks of functions without correct starting block
+    ///
+    /// Sometimes Ghidra generates a (correct) function start inside another function.
+    /// But if the function start is not also the start of a basic block,
+    /// we cannot handle it correctly (yet) as this would need splitting of basic blocks.
+    /// So instead we generate a log message and handle the function as a function without code,
+    /// i.e. a dead end in the control flow graph.
+    #[must_use]
+    pub fn normalize(&mut self) -> Vec<LogMessage> {
+        let mut log_messages = Vec::new();
        // Insert explicit `LOAD` instructions for implicit memory loads in P-Code.
        let generic_pointer_size = self.stack_pointer_register.size;
        for sub in self.program.term.subs.iter_mut() {
@@ -654,6 +666,27 @@ impl Project {
                }
            }
        }
+        // remove all blocks from functions that have no correct starting block and generate a log-message.
+        for sub in self.program.term.subs.iter_mut() {
+            if !sub.term.blocks.is_empty()
+                && sub.tid.address != sub.term.blocks[0].tid.address
+                && sub
+                    .term
+                    .blocks
+                    .iter()
+                    .find(|block| block.tid.address == sub.tid.address)
+                    .is_none()
+            {
+                log_messages.push(LogMessage::new_error(format!(
+                    "Starting block of function {} ({}) not found.",
+                    sub.term.name, sub.tid
+                )));
+                sub.term.blocks = Vec::new();
+            }
+        }
+        log_messages
    }
 }