Support analysis of bare metal binaries (#203)

531e2bc1 · Enkelmann · GitHub · 7f37f6af · 531e2bc1 · 531e2bc1
Unverified Commit 531e2bc1 authored Jul 19, 2021 by Enkelmann Committed by GitHub Jul 19, 2021
8 changed files
--- a/CHANGES.md
+++ b/CHANGES.md
+0.6-dev
+====
+
+-   Add support for analysis of bare-metal binaries (PR #203)
+
 0.5 (2021-07)
 ====


--- a/README.md
+++ b/README.md
@@ -69,6 +69,16 @@ For information about other available command line flags you can pass the `--hel

 If you use the stable version, you can also look at the [online documentation](https://fkie-cad.github.io/cwe_checker/index.html) for more information.

+### For Bare-Metal Binaries ###
+
+The cwe_checker offers experimental support for analyzing bare-metal binaries.
+For that one needs to provide a bare metal configuration file via the `--bare-metal-config` command line option.
+An example for such a configuration file can be found at `bare_metal/stm32f407vg.json`
+(which was created and tested for an STM32F407VG MCU).
+
+For more information build and read the documentation locally via `make documentation`.
+Note that this analysis mode is not yet included in the stable version of the cwe_checker.
+
 ## Documentation and Tests ##

 The test binaries for our test suite can be built with `make compile_test_files` (needs Docker to be installed!). The test suite can then be run with `make test`.

--- a/bare_metal/stm32f407vg.json
+++ b/bare_metal/stm32f407vg.json
+{
+    "_comment": "The CPU architecture of the chip. Valid values are those that Ghidra accepts as processor IDs.",
+    "processor_id": "ARM:LE:32:v8",
+    "_comment_1": "The base address, where the contents of the binary would be mapped on the chip, as a hexadecimal number.",
+    "flash_base_address": "0x08000000",
+    "_comment_2": "The base address, of the RAM memory region as a hexadecimal number.",
+    "ram_base_address": "0x20000000",
+    "_comment_3": "The size of the RAM memory region (in bytes) as a hexadecimal number.",
+    "ram_size": "0x00030000"
+}
\ No newline at end of file
--- a/src/caller/src/main.rs
+++ b/src/caller/src/main.rs
@@ -4,7 +4,7 @@
 extern crate cwe_checker_lib; // Needed for the docstring-link to work

 use cwe_checker_lib::analysis::graph;
-use cwe_checker_lib::utils::binary::RuntimeMemoryImage;
+use cwe_checker_lib::utils::binary::{BareMetalConfig, RuntimeMemoryImage};
 use cwe_checker_lib::utils::log::print_all_messages;
 use cwe_checker_lib::utils::{get_ghidra_plugin_path, read_config_file};
 use cwe_checker_lib::AnalysisResults;
@@ -46,6 +46,12 @@ struct CmdlineArgs {
    #[structopt(long, short)]
    quiet: bool,

+    /// Path to a configuration file for analysis of bare metal binaries.
+    ///
+    /// If this option is set then the input binary is treated as a bare metal binary regardless of its format.
+    #[structopt(long)]
+    bare_metal_config: Option<String>,
+
    /// Prints out the version numbers of all known modules.
    #[structopt(long)]
    module_versions: bool,
@@ -59,7 +65,7 @@ struct CmdlineArgs {
 fn main() {
    let cmdline_args = CmdlineArgs::from_args();

-    run_with_ghidra(cmdline_args);
+    run_with_ghidra(&cmdline_args);
 }

 /// Check the existence of a file
@@ -75,7 +81,7 @@ fn check_file_existence(file_path: String) -> Result<(), String> {
 }

 /// Run the cwe_checker with Ghidra as its backend.
-fn run_with_ghidra(args: CmdlineArgs) {
+fn run_with_ghidra(args: &CmdlineArgs) {
    let mut modules = cwe_checker_lib::get_modules();
    if args.module_versions {
        // Only print the module versions and then quit.
@@ -87,13 +93,21 @@ fn run_with_ghidra(args: CmdlineArgs) {
    }

    // Get the configuration file
-    let config: serde_json::Value = if let Some(config_path) = args.config {
+    let config: serde_json::Value = if let Some(ref config_path) = args.config {
        let file = std::io::BufReader::new(std::fs::File::open(config_path).unwrap());
        serde_json::from_reader(file).expect("Parsing of the configuration file failed")
    } else {
        read_config_file("config.json")
    };

+    // Get the bare metal configuration file if it is provided
+    let bare_metal_config_opt: Option<BareMetalConfig> =
+        args.bare_metal_config.as_ref().map(|config_path| {
+            let file = std::io::BufReader::new(std::fs::File::open(config_path).unwrap());
+            serde_json::from_reader(file)
+                .expect("Parsing of the bare metal configuration file failed")
+        });
+
    // Filter the modules to be executed if the `--partial` parameter is set.
    if let Some(ref partial_module_list) = args.partial {
        filter_modules_for_partial_run(&mut modules, partial_module_list);
@@ -106,21 +120,31 @@ fn run_with_ghidra(args: CmdlineArgs) {
            .collect();
    }

-    let binary_file_path = PathBuf::from(args.binary.unwrap());
+    let binary_file_path = PathBuf::from(args.binary.clone().unwrap());
    let binary: Vec<u8> = std::fs::read(&binary_file_path).unwrap_or_else(|_| {
        panic!(
            "Error: Could not read from file path {}",
            binary_file_path.display()
        )
    });
-    let (mut project, mut all_logs) = get_project_from_ghidra(&binary_file_path, &binary[..]);
+    let (mut project, mut all_logs) = get_project_from_ghidra(
+        &binary_file_path,
+        &binary[..],
+        bare_metal_config_opt.clone(),
+    );
    // Normalize the project and gather log messages generated from it.
    all_logs.append(&mut project.normalize());

    // Generate the representation of the runtime memory image of the binary
-    let mut runtime_memory_image = RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
-        panic!("Error while generating runtime memory image: {}", err);
-    });
+    let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
+        RuntimeMemoryImage::new_from_bare_metal(&binary, &bare_metal_config).unwrap_or_else(|err| {
+            panic!("Error while generating runtime memory image: {}", err);
+        })
+    } else {
+        RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
+            panic!("Error while generating runtime memory image: {}", err);
+        })
+    };
    if project.program.term.address_base_offset != 0 {
        // We adjust the memory addresses once globally
        // so that other analyses do not have to adjust their addresses.
@@ -206,7 +230,14 @@ fn filter_modules_for_partial_run(
 }

 /// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure.
-fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<LogMessage>) {
+fn get_project_from_ghidra(
+    file_path: &Path,
+    binary: &[u8],
+    bare_metal_config_opt: Option<BareMetalConfig>,
+) -> (Project, Vec<LogMessage>) {
+    let bare_metal_base_address_opt = bare_metal_config_opt
+        .as_ref()
+        .map(|config| config.parse_binary_base_address());
    let ghidra_path: std::path::PathBuf =
        serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone())
            .expect("Path to Ghidra not configured.");
@@ -255,7 +286,8 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<Log
    // Execute Ghidra in a new thread and return a Join Handle, so that the thread is only joined
    // after the output has been read into the cwe_checker
    let ghidra_subprocess = thread::spawn(move || {
-        let output = match Command::new(&headless_path)
+        let mut ghidra_command = Command::new(&headless_path);
+        ghidra_command
            .arg(&thread_tmp_folder) // The folder where temporary files should be stored
            .arg(format!("PcodeExtractor_{}_{}", filename, timestamp_suffix)) // The name of the temporary Ghidra Project.
            .arg("-import") // Import a file into the Ghidra project
@@ -267,8 +299,21 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<Log
            .arg(ghidra_plugin_path) // Path to the folder containing the PcodeExtractor.java (so that the other java files can be found.)
            .arg("-deleteProject") // Delete the temporary project after the script finished
            .arg("-analysisTimeoutPerFile") // Set a timeout for how long the standard analysis can run before getting aborted
-            .arg("3600") // Timeout of one hour (=3600 seconds) // TODO: The post-script can detect that the timeout fired and react accordingly.
-            .output() // Execute the command and catch its output.
+            .arg("3600"); // Timeout of one hour (=3600 seconds) // TODO: The post-script can detect that the timeout fired and react accordingly.
+        if let Some(bare_metal_config) = bare_metal_config_opt {
+            let mut base_address: &str = &bare_metal_config.flash_base_address;
+            if let Some(stripped_address) = base_address.strip_prefix("0x") {
+                base_address = stripped_address;
+            }
+            ghidra_command
+                .arg("-loader") // Tell Ghidra to use a specific loader
+                .arg("BinaryLoader") // Use the BinaryLoader for bare metal binaries
+                .arg("-loader-baseAddr") // Provide the base address where the binary should be mapped in memory
+                .arg(base_address)
+                .arg("-processor") // Provide the processor type ID, for which the binary was compiled.
+                .arg(bare_metal_config.processor_id);
+        }
+        let output = match ghidra_command.output() // Execute the command and catch its output.
        {
            Ok(output) => output,
            Err(err) => {
@@ -318,12 +363,18 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<Log
    let project: Project = match cwe_checker_lib::utils::get_binary_base_address(binary) {
        Ok(binary_base_address) => project_pcode.into_ir_project(binary_base_address),
        Err(_err) => {
-            log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
-            let mut project = project_pcode.into_ir_project(0);
-            // Setting the address_base_offset to zero is a hack, which worked for the tested PE files.
-            // But this hack will probably not work in general!
-            project.program.term.address_base_offset = 0;
-            project
+            if let Some(binary_base_address) = bare_metal_base_address_opt {
+                let mut project = project_pcode.into_ir_project(binary_base_address);
+                project.program.term.address_base_offset = 0;
+                project
+            } else {
+                log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
+                let mut project = project_pcode.into_ir_project(0);
+                // For PE files setting the address_base_offset to zero is a hack, which worked for the tested PE files.
+                // But this hack will probably not work in general!
+                project.program.term.address_base_offset = 0;
+                project
+            }
        }
    };


--- a/src/cwe_checker_lib/src/lib.rs
+++ b/src/cwe_checker_lib/src/lib.rs
@@ -30,6 +30,17 @@ through the `--config` command line option.
 Start by taking a look at the standard configuration file located at `src/config.json`
 and read the [check-specific documentation](crate::checkers) for more details about each field in the configuration file.

+## For bare-metal binaries
+
+The cwe_checker offers experimental support for analyzing bare-metal binaries.
+For that one needs to provide a bare metal configuration file via the `--bare-metal-config` command line option.
+An example for such a configuration file can be found at `bare_metal/stm32f407vg.json`
+(which was created and tested for an STM32F407VG MCU).
+
+For more information on the necessary fields of the configuration file
+and the assumed memory model when analyzing bare metal binaries
+see the [configuration struct documentation](crate::utils::binary::BareMetalConfig).
+
 # Integration into other tools

 ### Integration into Ghidra

--- a/src/cwe_checker_lib/src/utils/binary.rs
+++ b/src/cwe_checker_lib/src/utils/binary.rs
@@ -7,6 +7,56 @@ use goblin::elf;
 use goblin::pe;
 use goblin::Object;

+/// Contains all information parsed out of the bare metal configuration JSON file.
+///
+/// The content is information that is necessary for handling bare metal binaries
+/// and that the cwe_checker cannot automatically deduce from the binary itself.
+///
+/// When handling bare metal binaries
+/// we assume that the corresponding MCU uses a very simple memory layout
+/// consisting of exactly one region of non-volatile (flash) memory
+/// and exactly one region of volatile memory (RAM).
+/// Furthermore, we assume that the binary itself is just a dump of the non-volatile memory region.
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
+pub struct BareMetalConfig {
+    /// The CPU type.
+    ///
+    /// The string has to match the `processor_id` that Ghidra uses for the specific CPU type,
+    /// as it is forwarded to Ghidra to identify the CPU.
+    pub processor_id: String,
+    /// The base address of the non-volatile memory (usually flash memory) used by the chip.
+    /// The string is parsed as a hexadecimal number.
+    ///
+    /// We assume that the size of the non-volatile memory equals the size of the input binary.
+    /// In other words, we assume
+    /// that the input binary is a complete dump of the contents of the non-volatile memory of the chip.
+    pub flash_base_address: String,
+    /// The base address of the volatile memory (RAM) used by the chip.
+    /// The string is parsed as a hexadecimal number.
+    pub ram_base_address: String,
+    /// The size of the volatile memory (RAM) used by the chip.
+    /// The string is parsed as a hexadecimal number.
+    ///
+    /// If the exact size is unknown, then one can try to use an upper approximation instead.
+    pub ram_size: String,
+}
+
+impl BareMetalConfig {
+    /// Return the base address of the binary as an integer.
+    pub fn parse_binary_base_address(&self) -> u64 {
+        parse_hex_string_to_u64(&self.flash_base_address)
+            .expect("Parsing of the binary base address failed.")
+    }
+}
+
+/// A helper function to parse a hex string to an integer.
+fn parse_hex_string_to_u64(mut string: &str) -> Result<u64, Error> {
+    if string.starts_with("0x") {
+        string = &string[2..]
+    }
+    Ok(u64::from_str_radix(string, 16)?)
+}
+
 /// A representation of the runtime image of a binary after being loaded into memory by the loader.
 #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
 pub struct RuntimeMemoryImage {
@@ -67,6 +117,31 @@ impl MemorySegment {
            execute_flag: (section_header.characteristics & 0x20000000) != 0,
        }
    }
+
+    /// Generate a segment with the given `base_address` and content given by `binary`.
+    /// The segment is readable, writeable and executable, its size equals the size of `binary`.
+    pub fn from_bare_metal_file(binary: &[u8], base_address: u64) -> MemorySegment {
+        MemorySegment {
+            bytes: binary.to_vec(),
+            base_address,
+            read_flag: true,
+            write_flag: true,
+            execute_flag: true,
+        }
+    }
+
+    /// Generate a segment with the given base address and size.
+    /// The segment is readable and writeable, but not executable.
+    /// The content is set to a vector of zeroes.
+    pub fn new_bare_metal_ram_segment(base_address: u64, size: u64) -> MemorySegment {
+        MemorySegment {
+            bytes: vec![0; size as usize],
+            base_address,
+            read_flag: true,
+            write_flag: true,
+            execute_flag: false,
+        }
+    }
 }

 impl RuntimeMemoryImage {
@@ -114,6 +189,49 @@ impl RuntimeMemoryImage {
        }
    }

+    /// Generate a runtime memory image for a bare metal binary.
+    ///
+    /// The generated runtime memory image contains:
+    /// * one memory region corresponding to non-volatile memory
+    /// * one memory region corresponding to volatile memory (RAM)
+    ///
+    /// See [`BareMetalConfig`] for more information about the assumed memory layout for bare metal binaries.
+    pub fn new_from_bare_metal(
+        binary: &[u8],
+        bare_metal_config: &BareMetalConfig,
+    ) -> Result<Self, Error> {
+        let processor_id_parts: Vec<&str> = bare_metal_config.processor_id.split(':').collect();
+        if processor_id_parts.len() < 3 {
+            return Err(anyhow!("Could not parse processor ID."));
+        }
+        let is_little_endian = match processor_id_parts[1] {
+            "LE" => true,
+            "BE" => false,
+            _ => return Err(anyhow!("Could not parse endianness of the processor ID.")),
+        };
+        let flash_base_address = parse_hex_string_to_u64(&bare_metal_config.flash_base_address)?;
+        let ram_base_address = parse_hex_string_to_u64(&bare_metal_config.ram_base_address)?;
+        let ram_size = parse_hex_string_to_u64(&bare_metal_config.ram_size)?;
+        // Check that the whole binary is contained in addressable space.
+        let address_bit_length = processor_id_parts[2].parse::<u64>()?;
+        match flash_base_address.checked_add(binary.len() as u64) {
+            Some(max_address) => {
+                if (max_address >> address_bit_length) != 0 {
+                    return Err(anyhow!("Binary too large for given base address"));
+                }
+            }
+            None => return Err(anyhow!("Binary too large for given base address")),
+        }
+
+        Ok(RuntimeMemoryImage {
+            memory_segments: vec![
+                MemorySegment::from_bare_metal_file(binary, flash_base_address),
+                MemorySegment::new_bare_metal_ram_segment(ram_base_address, ram_size),
+            ],
+            is_little_endian,
+        })
+    }
+
    /// Return whether values in the memory image should be interpreted in little-endian
    /// or big-endian byte order.
    pub fn is_little_endian_byte_order(&self) -> bool {

--- a/test/bare_metal_samples/test_sample.bin
+++ b/test/bare_metal_samples/test_sample.bin
--- a/test/src/lib.rs
+++ b/test/src/lib.rs
@@ -170,6 +170,38 @@ mod tests {

    #[test]
    #[ignore]
+    fn bare_metal() {
+        let filepath = "bare_metal_samples/test_sample.bin";
+        let output = Command::new("cwe_checker")
+            .arg(filepath)
+            .arg("--partial")
+            .arg("Memory")
+            .arg("--quiet")
+            .arg("--bare-metal-config")
+            .arg("../bare_metal/stm32f407vg.json")
+            .output()
+            .unwrap();
+        let num_cwes = String::from_utf8(output.stdout)
+            .unwrap()
+            .lines()
+            .filter(|line| line.starts_with("[CWE125]"))
+            .count();
+        // We check the number of found CWEs only approximately
+        // so that this check does not fail on minor result changes.
+        // The results are not yet reliable enough for a stricter check.
+        if num_cwes >= 1 && num_cwes <= 10 {
+            println!("{} \t {}", filepath, "[OK]".green());
+        } else {
+            println!("{} \t {}", filepath, "[FAILED]".red());
+            panic!(
+                "Expected occurrences: Between 1 and 10. Found: {}",
+                num_cwes
+            );
+        }
+    }
+
+    #[test]
+    #[ignore]
    fn cwe_78() {
        let mut error_log = Vec::new();
        let mut tests = all_test_cases("cwe_78", "CWE78");