Unverified Commit 531e2bc1 by Enkelmann Committed by GitHub

Support analysis of bare metal binaries (#203)

parent 7f37f6af
0.6-dev
====
- Add support for analysis of bare-metal binaries (PR #203)
0.5 (2021-07)
====
......
......@@ -69,6 +69,16 @@ For information about other available command line flags you can pass the `--hel
If you use the stable version, you can also look at the [online documentation](https://fkie-cad.github.io/cwe_checker/index.html) for more information.
### For Bare-Metal Binaries ###
The cwe_checker offers experimental support for analyzing bare-metal binaries.
For that one needs to provide a bare metal configuration file via the `--bare-metal-config` command line option.
An example for such a configuration file can be found at `bare_metal/stm32f407vg.json`
(which was created and tested for an STM32F407VG MCU).
For more information build and read the documentation locally via `make documentation`.
Note that this analysis mode is not yet included in the stable version of the cwe_checker.
## Documentation and Tests ##
The test binaries for our test suite can be built with `make compile_test_files` (needs Docker to be installed!). The test suite can then be run with `make test`.
......
{
"_comment": "The CPU architecture of the chip. Valid values are those that Ghidra accepts as processor IDs.",
"processor_id": "ARM:LE:32:v8",
"_comment_1": "The base address, where the contents of the binary would be mapped on the chip, as a hexadecimal number.",
"flash_base_address": "0x08000000",
"_comment_2": "The base address, of the RAM memory region as a hexadecimal number.",
"ram_base_address": "0x20000000",
"_comment_3": "The size of the RAM memory region (in bytes) as a hexadecimal number.",
"ram_size": "0x00030000"
}
\ No newline at end of file
......@@ -4,7 +4,7 @@
extern crate cwe_checker_lib; // Needed for the docstring-link to work
use cwe_checker_lib::analysis::graph;
use cwe_checker_lib::utils::binary::RuntimeMemoryImage;
use cwe_checker_lib::utils::binary::{BareMetalConfig, RuntimeMemoryImage};
use cwe_checker_lib::utils::log::print_all_messages;
use cwe_checker_lib::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_lib::AnalysisResults;
......@@ -46,6 +46,12 @@ struct CmdlineArgs {
#[structopt(long, short)]
quiet: bool,
/// Path to a configuration file for analysis of bare metal binaries.
///
/// If this option is set then the input binary is treated as a bare metal binary regardless of its format.
#[structopt(long)]
bare_metal_config: Option<String>,
/// Prints out the version numbers of all known modules.
#[structopt(long)]
module_versions: bool,
......@@ -59,7 +65,7 @@ struct CmdlineArgs {
fn main() {
let cmdline_args = CmdlineArgs::from_args();
run_with_ghidra(cmdline_args);
run_with_ghidra(&cmdline_args);
}
/// Check the existence of a file
......@@ -75,7 +81,7 @@ fn check_file_existence(file_path: String) -> Result<(), String> {
}
/// Run the cwe_checker with Ghidra as its backend.
fn run_with_ghidra(args: CmdlineArgs) {
fn run_with_ghidra(args: &CmdlineArgs) {
let mut modules = cwe_checker_lib::get_modules();
if args.module_versions {
// Only print the module versions and then quit.
......@@ -87,13 +93,21 @@ fn run_with_ghidra(args: CmdlineArgs) {
}
// Get the configuration file
let config: serde_json::Value = if let Some(config_path) = args.config {
let config: serde_json::Value = if let Some(ref config_path) = args.config {
let file = std::io::BufReader::new(std::fs::File::open(config_path).unwrap());
serde_json::from_reader(file).expect("Parsing of the configuration file failed")
} else {
read_config_file("config.json")
};
// Get the bare metal configuration file if it is provided
let bare_metal_config_opt: Option<BareMetalConfig> =
args.bare_metal_config.as_ref().map(|config_path| {
let file = std::io::BufReader::new(std::fs::File::open(config_path).unwrap());
serde_json::from_reader(file)
.expect("Parsing of the bare metal configuration file failed")
});
// Filter the modules to be executed if the `--partial` parameter is set.
if let Some(ref partial_module_list) = args.partial {
filter_modules_for_partial_run(&mut modules, partial_module_list);
......@@ -106,21 +120,31 @@ fn run_with_ghidra(args: CmdlineArgs) {
.collect();
}
let binary_file_path = PathBuf::from(args.binary.unwrap());
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());
let binary: Vec<u8> = std::fs::read(&binary_file_path).unwrap_or_else(|_| {
panic!(
"Error: Could not read from file path {}",
binary_file_path.display()
)
});
let (mut project, mut all_logs) = get_project_from_ghidra(&binary_file_path, &binary[..]);
let (mut project, mut all_logs) = get_project_from_ghidra(
&binary_file_path,
&binary[..],
bare_metal_config_opt.clone(),
);
// Normalize the project and gather log messages generated from it.
all_logs.append(&mut project.normalize());
// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
panic!("Error while generating runtime memory image: {}", err);
});
let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
RuntimeMemoryImage::new_from_bare_metal(&binary, &bare_metal_config).unwrap_or_else(|err| {
panic!("Error while generating runtime memory image: {}", err);
})
} else {
RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
panic!("Error while generating runtime memory image: {}", err);
})
};
if project.program.term.address_base_offset != 0 {
// We adjust the memory addresses once globally
// so that other analyses do not have to adjust their addresses.
......@@ -206,7 +230,14 @@ fn filter_modules_for_partial_run(
}
/// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure.
fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<LogMessage>) {
fn get_project_from_ghidra(
file_path: &Path,
binary: &[u8],
bare_metal_config_opt: Option<BareMetalConfig>,
) -> (Project, Vec<LogMessage>) {
let bare_metal_base_address_opt = bare_metal_config_opt
.as_ref()
.map(|config| config.parse_binary_base_address());
let ghidra_path: std::path::PathBuf =
serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone())
.expect("Path to Ghidra not configured.");
......@@ -255,7 +286,8 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<Log
// Execute Ghidra in a new thread and return a Join Handle, so that the thread is only joined
// after the output has been read into the cwe_checker
let ghidra_subprocess = thread::spawn(move || {
let output = match Command::new(&headless_path)
let mut ghidra_command = Command::new(&headless_path);
ghidra_command
.arg(&thread_tmp_folder) // The folder where temporary files should be stored
.arg(format!("PcodeExtractor_{}_{}", filename, timestamp_suffix)) // The name of the temporary Ghidra Project.
.arg("-import") // Import a file into the Ghidra project
......@@ -267,8 +299,21 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<Log
.arg(ghidra_plugin_path) // Path to the folder containing the PcodeExtractor.java (so that the other java files can be found.)
.arg("-deleteProject") // Delete the temporary project after the script finished
.arg("-analysisTimeoutPerFile") // Set a timeout for how long the standard analysis can run before getting aborted
.arg("3600") // Timeout of one hour (=3600 seconds) // TODO: The post-script can detect that the timeout fired and react accordingly.
.output() // Execute the command and catch its output.
.arg("3600"); // Timeout of one hour (=3600 seconds) // TODO: The post-script can detect that the timeout fired and react accordingly.
if let Some(bare_metal_config) = bare_metal_config_opt {
let mut base_address: &str = &bare_metal_config.flash_base_address;
if let Some(stripped_address) = base_address.strip_prefix("0x") {
base_address = stripped_address;
}
ghidra_command
.arg("-loader") // Tell Ghidra to use a specific loader
.arg("BinaryLoader") // Use the BinaryLoader for bare metal binaries
.arg("-loader-baseAddr") // Provide the base address where the binary should be mapped in memory
.arg(base_address)
.arg("-processor") // Provide the processor type ID, for which the binary was compiled.
.arg(bare_metal_config.processor_id);
}
let output = match ghidra_command.output() // Execute the command and catch its output.
{
Ok(output) => output,
Err(err) => {
......@@ -318,12 +363,18 @@ fn get_project_from_ghidra(file_path: &Path, binary: &[u8]) -> (Project, Vec<Log
let project: Project = match cwe_checker_lib::utils::get_binary_base_address(binary) {
Ok(binary_base_address) => project_pcode.into_ir_project(binary_base_address),
Err(_err) => {
log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
let mut project = project_pcode.into_ir_project(0);
// Setting the address_base_offset to zero is a hack, which worked for the tested PE files.
// But this hack will probably not work in general!
project.program.term.address_base_offset = 0;
project
if let Some(binary_base_address) = bare_metal_base_address_opt {
let mut project = project_pcode.into_ir_project(binary_base_address);
project.program.term.address_base_offset = 0;
project
} else {
log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
let mut project = project_pcode.into_ir_project(0);
// For PE files setting the address_base_offset to zero is a hack, which worked for the tested PE files.
// But this hack will probably not work in general!
project.program.term.address_base_offset = 0;
project
}
}
};
......
......@@ -30,6 +30,17 @@ through the `--config` command line option.
Start by taking a look at the standard configuration file located at `src/config.json`
and read the [check-specific documentation](crate::checkers) for more details about each field in the configuration file.
## For bare-metal binaries
The cwe_checker offers experimental support for analyzing bare-metal binaries.
For that one needs to provide a bare metal configuration file via the `--bare-metal-config` command line option.
An example for such a configuration file can be found at `bare_metal/stm32f407vg.json`
(which was created and tested for an STM32F407VG MCU).
For more information on the necessary fields of the configuration file
and the assumed memory model when analyzing bare metal binaries
see the [configuration struct documentation](crate::utils::binary::BareMetalConfig).
# Integration into other tools
### Integration into Ghidra
......
......@@ -7,6 +7,56 @@ use goblin::elf;
use goblin::pe;
use goblin::Object;
/// Contains all information parsed out of the bare metal configuration JSON file.
///
/// The content is information that is necessary for handling bare metal binaries
/// and that the cwe_checker cannot automatically deduce from the binary itself.
///
/// When handling bare metal binaries
/// we assume that the corresponding MCU uses a very simple memory layout
/// consisting of exactly one region of non-volatile (flash) memory
/// and exactly one region of volatile memory (RAM).
/// Furthermore, we assume that the binary itself is just a dump of the non-volatile memory region.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct BareMetalConfig {
/// The CPU type.
///
/// The string has to match the `processor_id` that Ghidra uses for the specific CPU type,
/// as it is forwarded to Ghidra to identify the CPU.
pub processor_id: String,
/// The base address of the non-volatile memory (usually flash memory) used by the chip.
/// The string is parsed as a hexadecimal number.
///
/// We assume that the size of the non-volatile memory equals the size of the input binary.
/// In other words, we assume
/// that the input binary is a complete dump of the contents of the non-volatile memory of the chip.
pub flash_base_address: String,
/// The base address of the volatile memory (RAM) used by the chip.
/// The string is parsed as a hexadecimal number.
pub ram_base_address: String,
/// The size of the volatile memory (RAM) used by the chip.
/// The string is parsed as a hexadecimal number.
///
/// If the exact size is unknown, then one can try to use an upper approximation instead.
pub ram_size: String,
}
impl BareMetalConfig {
/// Return the base address of the binary as an integer.
pub fn parse_binary_base_address(&self) -> u64 {
parse_hex_string_to_u64(&self.flash_base_address)
.expect("Parsing of the binary base address failed.")
}
}
/// A helper function to parse a hex string to an integer.
fn parse_hex_string_to_u64(mut string: &str) -> Result<u64, Error> {
if string.starts_with("0x") {
string = &string[2..]
}
Ok(u64::from_str_radix(string, 16)?)
}
/// A representation of the runtime image of a binary after being loaded into memory by the loader.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct RuntimeMemoryImage {
......@@ -67,6 +117,31 @@ impl MemorySegment {
execute_flag: (section_header.characteristics & 0x20000000) != 0,
}
}
/// Generate a segment with the given `base_address` and content given by `binary`.
/// The segment is readable, writeable and executable, its size equals the size of `binary`.
pub fn from_bare_metal_file(binary: &[u8], base_address: u64) -> MemorySegment {
MemorySegment {
bytes: binary.to_vec(),
base_address,
read_flag: true,
write_flag: true,
execute_flag: true,
}
}
/// Generate a segment with the given base address and size.
/// The segment is readable and writeable, but not executable.
/// The content is set to a vector of zeroes.
pub fn new_bare_metal_ram_segment(base_address: u64, size: u64) -> MemorySegment {
MemorySegment {
bytes: vec![0; size as usize],
base_address,
read_flag: true,
write_flag: true,
execute_flag: false,
}
}
}
impl RuntimeMemoryImage {
......@@ -114,6 +189,49 @@ impl RuntimeMemoryImage {
}
}
/// Generate a runtime memory image for a bare metal binary.
///
/// The generated runtime memory image contains:
/// * one memory region corresponding to non-volatile memory
/// * one memory region corresponding to volatile memory (RAM)
///
/// See [`BareMetalConfig`] for more information about the assumed memory layout for bare metal binaries.
pub fn new_from_bare_metal(
binary: &[u8],
bare_metal_config: &BareMetalConfig,
) -> Result<Self, Error> {
let processor_id_parts: Vec<&str> = bare_metal_config.processor_id.split(':').collect();
if processor_id_parts.len() < 3 {
return Err(anyhow!("Could not parse processor ID."));
}
let is_little_endian = match processor_id_parts[1] {
"LE" => true,
"BE" => false,
_ => return Err(anyhow!("Could not parse endianness of the processor ID.")),
};
let flash_base_address = parse_hex_string_to_u64(&bare_metal_config.flash_base_address)?;
let ram_base_address = parse_hex_string_to_u64(&bare_metal_config.ram_base_address)?;
let ram_size = parse_hex_string_to_u64(&bare_metal_config.ram_size)?;
// Check that the whole binary is contained in addressable space.
let address_bit_length = processor_id_parts[2].parse::<u64>()?;
match flash_base_address.checked_add(binary.len() as u64) {
Some(max_address) => {
if (max_address >> address_bit_length) != 0 {
return Err(anyhow!("Binary too large for given base address"));
}
}
None => return Err(anyhow!("Binary too large for given base address")),
}
Ok(RuntimeMemoryImage {
memory_segments: vec![
MemorySegment::from_bare_metal_file(binary, flash_base_address),
MemorySegment::new_bare_metal_ram_segment(ram_base_address, ram_size),
],
is_little_endian,
})
}
/// Return whether values in the memory image should be interpreted in little-endian
/// or big-endian byte order.
pub fn is_little_endian_byte_order(&self) -> bool {
......
......@@ -170,6 +170,38 @@ mod tests {
#[test]
#[ignore]
fn bare_metal() {
let filepath = "bare_metal_samples/test_sample.bin";
let output = Command::new("cwe_checker")
.arg(filepath)
.arg("--partial")
.arg("Memory")
.arg("--quiet")
.arg("--bare-metal-config")
.arg("../bare_metal/stm32f407vg.json")
.output()
.unwrap();
let num_cwes = String::from_utf8(output.stdout)
.unwrap()
.lines()
.filter(|line| line.starts_with("[CWE125]"))
.count();
// We check the number of found CWEs only approximately
// so that this check does not fail on minor result changes.
// The results are not yet reliable enough for a stricter check.
if num_cwes >= 1 && num_cwes <= 10 {
println!("{} \t {}", filepath, "[OK]".green());
} else {
println!("{} \t {}", filepath, "[FAILED]".red());
panic!(
"Expected occurrences: Between 1 and 10. Found: {}",
num_cwes
);
}
}
#[test]
#[ignore]
fn cwe_78() {
let mut error_log = Vec::new();
let mut tests = all_test_cases("cwe_78", "CWE78");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment