Unverified Commit 2741dba3 by Enkelmann Committed by GitHub

Refactor some of the caller code into the Lib crate (#371)

parent a1c1026d
......@@ -143,10 +143,10 @@ dependencies = [
name = "cwe_checker"
version = "0.7.0-dev"
dependencies = [
"anyhow",
"clap",
"cwe_checker_lib",
"directories",
"nix",
"serde_json",
]
......@@ -175,6 +175,7 @@ dependencies = [
"gcd",
"goblin",
"itertools",
"nix",
"petgraph",
"regex",
"serde",
......@@ -330,9 +331,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.134"
version = "0.2.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329c933548736bc49fd575ee68c89e8be4d260064184389a5b77517cddd99ffb"
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
[[package]]
name = "log"
......@@ -357,25 +358,25 @@ checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memoffset"
version = "0.6.5"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
dependencies = [
"autocfg",
]
[[package]]
name = "nix"
version = "0.25.0"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e322c04a9e3440c327fca7b6c8a63e6890a32fa2ad689db972425f07e0d22abb"
checksum = "46a58d1d356c6597d08cde02c2f09d785b09e28711837b1ed667dc652c08a694"
dependencies = [
"autocfg",
"bitflags",
"cfg-if",
"libc",
"memoffset",
"pin-utils",
"static_assertions",
]
[[package]]
......@@ -660,6 +661,12 @@ dependencies = [
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
......
......@@ -9,4 +9,4 @@ clap = { version = "4.0", features = ["derive"] }
cwe_checker_lib = { path = "../cwe_checker_lib" }
serde_json = "1.0"
directories = "4.0.1"
nix = "0.25.0"
\ No newline at end of file
anyhow = "1.0"
\ No newline at end of file
......@@ -3,19 +3,16 @@
extern crate cwe_checker_lib; // Needed for the docstring-link to work
use anyhow::Context;
use anyhow::Error;
use clap::Parser;
use cwe_checker_lib::analysis::graph;
use cwe_checker_lib::intermediate_representation::RuntimeMemoryImage;
use cwe_checker_lib::pipeline::{disassemble_binary, AnalysisResults};
use cwe_checker_lib::utils::binary::BareMetalConfig;
use cwe_checker_lib::utils::log::{print_all_messages, LogLevel};
use cwe_checker_lib::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_lib::AnalysisResults;
use cwe_checker_lib::{intermediate_representation::Project, utils::log::LogMessage};
use nix::{sys::stat, unistd};
use cwe_checker_lib::utils::read_config_file;
use std::collections::{BTreeSet, HashSet};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::thread;
use std::path::PathBuf;
#[derive(Debug, Parser)]
#[command(version, about)]
......@@ -73,10 +70,10 @@ struct CmdlineArgs {
debug: bool,
}
fn main() {
fn main() -> Result<(), Error> {
let cmdline_args = CmdlineArgs::parse();
run_with_ghidra(&cmdline_args);
run_with_ghidra(&cmdline_args)
}
/// Return `Ok(file_path)` only if `file_path` points to an existing file.
......@@ -92,7 +89,7 @@ fn check_file_existence(file_path: &str) -> Result<String, String> {
}
/// Run the cwe_checker with Ghidra as its backend.
fn run_with_ghidra(args: &CmdlineArgs) {
fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let mut modules = cwe_checker_lib::get_modules();
if args.module_versions {
// Only print the module versions and then quit.
......@@ -100,15 +97,15 @@ fn run_with_ghidra(args: &CmdlineArgs) {
for module in modules.iter() {
println!("{}", module);
}
return;
return Ok(());
}
// Get the configuration file
let config: serde_json::Value = if let Some(ref config_path) = args.config {
let file = std::io::BufReader::new(std::fs::File::open(config_path).unwrap());
serde_json::from_reader(file).expect("Parsing of the configuration file failed")
serde_json::from_reader(file).context("Parsing of the configuration file failed")?
} else {
read_config_file("config.json")
read_config_file("config.json")?
};
// Get the bare metal configuration file if it is provided
......@@ -128,37 +125,9 @@ fn run_with_ghidra(args: &CmdlineArgs) {
modules.retain(|module| module.name != "CWE78");
}
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());
let binary: Vec<u8> = std::fs::read(&binary_file_path).unwrap_or_else(|_| {
panic!(
"Error: Could not read from file path {}",
binary_file_path.display()
)
});
let (mut project, mut all_logs) = get_project_from_ghidra(
&binary_file_path,
&binary[..],
bare_metal_config_opt.clone(),
);
// Normalize the project and gather log messages generated from it.
all_logs.append(&mut project.normalize());
// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
RuntimeMemoryImage::new_from_bare_metal(&binary, bare_metal_config).unwrap_or_else(|err| {
panic!("Error while generating runtime memory image: {}", err);
})
} else {
RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
panic!("Error while generating runtime memory image: {}", err);
})
};
if project.program.term.address_base_offset != 0 {
// We adjust the memory addresses once globally
// so that other analyses do not have to adjust their addresses.
runtime_memory_image.add_global_memory_offset(project.program.term.address_base_offset);
}
project.runtime_memory_image = runtime_memory_image;
let (binary, project, mut all_logs) =
disassemble_binary(&binary_file_path, bare_metal_config_opt, args.verbose)?;
// Generate the control flow graph of the program
let extern_sub_tids = project
......@@ -225,7 +194,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
true,
false,
);
return;
return Ok(());
}
// Execute the modules and collect their logs and CWE-warnings.
......@@ -248,6 +217,7 @@ fn run_with_ghidra(args: &CmdlineArgs) {
}
}
print_all_messages(all_logs, all_cwes, args.out.as_deref(), args.json);
Ok(())
}
/// Only keep the modules specified by the `--partial` parameter in the `modules` list.
......@@ -270,161 +240,3 @@ fn filter_modules_for_partial_run(
})
.collect();
}
/// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure.
fn get_project_from_ghidra(
file_path: &Path,
binary: &[u8],
bare_metal_config_opt: Option<BareMetalConfig>,
) -> (Project, Vec<LogMessage>) {
let bare_metal_base_address_opt = bare_metal_config_opt
.as_ref()
.map(|config| config.parse_binary_base_address());
let ghidra_path: std::path::PathBuf =
serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone())
.expect("Path to Ghidra not configured.");
let headless_path = ghidra_path.join("support/analyzeHeadless");
// Find the correct paths for temporary files.
let project_dirs = directories::ProjectDirs::from("", "", "cwe_checker")
.expect("Could not determine path for temporary files");
let tmp_folder = if let Some(folder) = project_dirs.runtime_dir() {
folder
} else {
Path::new("/tmp/cwe_checker")
};
if !tmp_folder.exists() {
std::fs::create_dir(tmp_folder).expect("Unable to create temporary folder");
}
// We add a timestamp suffix to file names
// so that if two instances of the cwe_checker are running in parallel on the same file
// they do not interfere with each other.
let timestamp_suffix = format!(
"{:?}",
std::time::SystemTime::now()
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap()
.as_millis()
);
let filename = file_path
.file_name()
.expect("Invalid file name")
.to_string_lossy()
.to_string();
let ghidra_plugin_path = get_ghidra_plugin_path("p_code_extractor");
// Create a unique name for the pipe
let fifo_path = tmp_folder.join(format!("pcode_{}.pipe", timestamp_suffix));
// Create a new fifo and give read and write rights to the owner
if let Err(err) = unistd::mkfifo(&fifo_path, stat::Mode::from_bits(0o600).unwrap()) {
eprintln!("Error creating FIFO pipe: {}", err);
std::process::exit(101);
}
let thread_fifo_path = fifo_path.clone();
let thread_file_path = file_path.to_path_buf();
let thread_tmp_folder = tmp_folder.to_path_buf();
// Execute Ghidra in a new thread and return a Join Handle, so that the thread is only joined
// after the output has been read into the cwe_checker
let ghidra_subprocess = thread::spawn(move || {
let mut ghidra_command = Command::new(&headless_path);
ghidra_command
.arg(&thread_tmp_folder) // The folder where temporary files should be stored
.arg(format!("PcodeExtractor_{}_{}", filename, timestamp_suffix)) // The name of the temporary Ghidra Project.
.arg("-import") // Import a file into the Ghidra project
.arg(thread_file_path) // File import path
.arg("-postScript") // Execute a script after standard analysis by Ghidra finished
.arg(ghidra_plugin_path.join("PcodeExtractor.java")) // Path to the PcodeExtractor.java
.arg(thread_fifo_path) // The path to the named pipe (fifo)
.arg("-scriptPath") // Add a folder containing additional script files to the Ghidra script file search paths
.arg(ghidra_plugin_path) // Path to the folder containing the PcodeExtractor.java (so that the other java files can be found.)
.arg("-deleteProject") // Delete the temporary project after the script finished
.arg("-analysisTimeoutPerFile") // Set a timeout for how long the standard analysis can run before getting aborted
.arg("3600"); // Timeout of one hour (=3600 seconds) // TODO: The post-script can detect that the timeout fired and react accordingly.
if let Some(bare_metal_config) = bare_metal_config_opt {
let mut base_address: &str = &bare_metal_config.flash_base_address;
if let Some(stripped_address) = base_address.strip_prefix("0x") {
base_address = stripped_address;
}
ghidra_command
.arg("-loader") // Tell Ghidra to use a specific loader
.arg("BinaryLoader") // Use the BinaryLoader for bare metal binaries
.arg("-loader-baseAddr") // Provide the base address where the binary should be mapped in memory
.arg(base_address)
.arg("-processor") // Provide the processor type ID, for which the binary was compiled.
.arg(bare_metal_config.processor_id);
}
let output = match ghidra_command.output() // Execute the command and catch its output.
{
Ok(output) => output,
Err(err) => {
eprintln!("Error: Ghidra could not be executed:\n{}", err);
std::process::exit(101);
}
};
match String::from_utf8(output.stdout.clone()) {
Ok(standard_out) => {
if !standard_out.contains("Pcode was successfully extracted!") {
eprintln!("Execution of Ghidra plugin failed: Process was terminated.");
let error_message: String =
standard_out.lines().rev().collect::<Vec<&str>>()[..2].join("\n");
eprintln!("{}", error_message);
std::process::exit(101);
}
}
Err(_) => {
eprintln!("Execution of Ghidra plugin failed: Process was terminated.");
std::process::exit(101);
}
}
if !output.status.success() {
match output.status.code() {
Some(code) => {
eprintln!("{}", String::from_utf8(output.stdout).unwrap());
eprintln!("{}", String::from_utf8(output.stderr).unwrap());
eprintln!("Execution of Ghidra plugin failed with exit code {}", code);
std::process::exit(101);
}
None => {
eprintln!("Execution of Ghidra plugin failed: Process was terminated.");
std::process::exit(101);
}
}
}
});
// Open the FIFO
let file = std::fs::File::open(fifo_path.clone()).expect("Could not open FIFO.");
let mut project_pcode: cwe_checker_lib::pcode::Project =
serde_json::from_reader(std::io::BufReader::new(file)).unwrap();
let mut log_messages = project_pcode.normalize();
let project: Project = match cwe_checker_lib::utils::get_binary_base_address(binary) {
Ok(binary_base_address) => project_pcode.into_ir_project(binary_base_address),
Err(_err) => {
if let Some(binary_base_address) = bare_metal_base_address_opt {
let mut project = project_pcode.into_ir_project(binary_base_address);
project.program.term.address_base_offset = 0;
project
} else {
log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
let mut project = project_pcode.into_ir_project(0);
// For PE files setting the address_base_offset to zero is a hack, which worked for the tested PE files.
// But this hack will probably not work in general!
project.program.term.address_base_offset = 0;
project
}
}
};
ghidra_subprocess
.join()
.expect("The Ghidra thread to be joined has panicked!");
std::fs::remove_file(fifo_path).unwrap();
(project, log_messages)
}
......@@ -19,6 +19,7 @@ directories = "4.0.1"
goblin = "0.5.1"
itertools = "0.10.3"
gcd = "2.1.0"
nix = "0.26.1"
[lib]
name = "cwe_checker_lib"
use super::Context;
use crate::abstract_domain::*;
use crate::analysis::pointer_inference::Data;
use crate::analysis::vsa_results::VsaResult;
use crate::intermediate_representation::*;
use crate::{abstract_domain::*, AnalysisResults};
use crate::pipeline::AnalysisResults;
use std::collections::{BTreeMap, HashMap, HashSet};
impl<'a> Context<'a> {
......
......@@ -28,11 +28,11 @@ use crate::abstract_domain::TryToInterval;
use crate::analysis::pointer_inference::PointerInference;
use crate::analysis::vsa_results::*;
use crate::intermediate_representation::*;
use crate::pipeline::AnalysisResults;
use crate::utils::log::CweWarning;
use crate::utils::log::LogMessage;
use crate::utils::symbol_utils::get_callsites;
use crate::utils::symbol_utils::get_symbol_map;
use crate::AnalysisResults;
use crate::CweModule;
use serde::Deserialize;
use serde::Serialize;
......
......@@ -60,22 +60,14 @@ You can find out more information about each check, including known false positi
by reading the check-specific module documentation in the [`checkers`] module.
*/
use std::collections::BTreeMap;
pub mod abstract_domain;
pub mod analysis;
pub mod checkers;
pub mod intermediate_representation;
pub mod pcode;
pub mod pipeline;
pub mod utils;
use abstract_domain::BricksDomain;
use analysis::function_signature::FunctionSignature;
use analysis::graph::Graph;
use analysis::pointer_inference::PointerInference;
use analysis::string_abstraction::StringAbstraction;
use intermediate_representation::Project;
use utils::log::{CweWarning, LogMessage};
mod prelude {
......@@ -84,7 +76,8 @@ mod prelude {
pub use crate::intermediate_representation::{Bitvector, BitvectorExtended, ByteSize};
pub use crate::intermediate_representation::{Term, Tid};
pub use crate::AnalysisResults;
pub use crate::pipeline::AnalysisResults;
pub use anyhow::Context as _;
pub use anyhow::{anyhow, Error};
}
use prelude::*;
......@@ -134,138 +127,3 @@ pub fn get_modules() -> Vec<&'static CweModule> {
&crate::analysis::pointer_inference::CWE_MODULE,
]
}
/// A struct containing pointers to all known analysis results
/// that may be needed as input for other analyses and CWE checks.
#[derive(Clone, Copy)]
pub struct AnalysisResults<'a> {
/// The content of the binary file
pub binary: &'a [u8],
/// The computed control flow graph of the program.
pub control_flow_graph: &'a Graph<'a>,
/// A pointer to the project struct
pub project: &'a Project,
/// The results of the function signature analysis if already computed.
pub function_signatures: Option<&'a BTreeMap<Tid, FunctionSignature>>,
/// The result of the pointer inference analysis if already computed.
pub pointer_inference: Option<&'a PointerInference<'a>>,
/// The result of the string abstraction if already computed.
pub string_abstraction: Option<&'a StringAbstraction<'a, BricksDomain>>,
}
impl<'a> AnalysisResults<'a> {
/// Create a new `AnalysisResults` struct with only the project itself known.
pub fn new(
binary: &'a [u8],
control_flow_graph: &'a Graph<'a>,
project: &'a Project,
) -> AnalysisResults<'a> {
AnalysisResults {
binary,
control_flow_graph,
project,
function_signatures: None,
pointer_inference: None,
string_abstraction: None,
}
}
/// Compute the function signatures for internal functions.
pub fn compute_function_signatures(
&self,
) -> (BTreeMap<Tid, FunctionSignature>, Vec<LogMessage>) {
analysis::function_signature::compute_function_signatures(
self.project,
self.control_flow_graph,
)
}
/// Create a new `AnalysisResults` struct containing the given function signature analysis results.
pub fn with_function_signatures(
self,
function_signatures: Option<&'a BTreeMap<Tid, FunctionSignature>>,
) -> AnalysisResults<'a> {
AnalysisResults {
function_signatures,
..self
}
}
/// Compute the pointer inference analysis.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_pointer_inference(
&'a self,
config: &serde_json::Value,
print_stats: bool,
) -> PointerInference<'a> {
crate::analysis::pointer_inference::run(
self,
serde_json::from_value(config.clone()).unwrap(),
false,
print_stats,
)
}
/// Create a new `AnalysisResults` struct containing the given pointer inference analysis results.
pub fn with_pointer_inference<'b: 'a>(
self,
pi_results: Option<&'b PointerInference<'a>>,
) -> AnalysisResults<'b> {
AnalysisResults {
pointer_inference: pi_results,
..self
}
}
/// Compute the string abstraction.
/// As the string abstraction depends on the pointer inference, the
/// pointer inference is also computed and put into the `AnalysisResults` struct.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_string_abstraction(
&'a self,
config: &serde_json::Value,
pi_results: Option<&'a PointerInference<'a>>,
) -> StringAbstraction<BricksDomain> {
crate::analysis::string_abstraction::run(
self.project,
self.control_flow_graph,
pi_results.unwrap(),
serde_json::from_value(config.clone()).unwrap(),
)
}
/// Create a new `AnalysisResults` struct containing the given string abstraction results.
pub fn with_string_abstraction<'b: 'a>(
self,
string_abstraction: Option<&'b StringAbstraction<'a, BricksDomain>>,
) -> AnalysisResults<'b> {
AnalysisResults {
string_abstraction,
..self
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::graph::get_program_cfg;
use std::collections::HashSet;
impl<'a> AnalysisResults<'a> {
/// Mocks the `AnalysisResults` struct with a given project.
/// Note that the function leaks memory!
pub fn mock_from_project(project: &'a Project) -> AnalysisResults<'a> {
let extern_subs =
HashSet::from_iter(project.program.term.extern_symbols.keys().cloned());
let graph = Box::new(get_program_cfg(&project.program, extern_subs));
let graph: &'a Graph = Box::leak(graph);
let binary: &'a Vec<u8> = Box::leak(Box::new(Vec::new()));
let analysis_results = AnalysisResults::new(binary, graph, project);
let (fn_sigs, _) = analysis_results.compute_function_signatures();
let fn_sigs: &'a BTreeMap<_, _> = Box::leak(Box::new(fn_sigs));
let analysis_results = analysis_results.with_function_signatures(Some(fn_sigs));
analysis_results
}
}
}
//! This module contains functions and structs helpful for building a complete analysis pipeline
//! starting from the binary file path.
mod results;
pub use results::AnalysisResults;
use crate::intermediate_representation::{Project, RuntimeMemoryImage};
use crate::prelude::*;
use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path;
/// Disassemble the given binary and parse it to a [`Project`](crate::intermediate_representation::Project) struct.
///
/// If successful, returns the binary file (as a byte vector), the parsed project struct,
/// and a vector of log messages generated during the process.
pub fn disassemble_binary(
binary_file_path: &Path,
bare_metal_config_opt: Option<BareMetalConfig>,
verbose_flag: bool,
) -> Result<(Vec<u8>, Project, Vec<LogMessage>), Error> {
let binary: Vec<u8> =
std::fs::read(binary_file_path).context("Could not read from binary file path {}")?;
let (mut project, mut all_logs) = get_project_from_ghidra(
binary_file_path,
&binary[..],
bare_metal_config_opt.clone(),
verbose_flag,
)?;
// Normalize the project and gather log messages generated from it.
all_logs.append(&mut project.normalize());
// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
RuntimeMemoryImage::new_from_bare_metal(&binary, bare_metal_config)
.context("Error while generating runtime memory image.")?
} else {
RuntimeMemoryImage::new(&binary).context("Error while generating runtime memory image.")?
};
if project.program.term.address_base_offset != 0 {
// We adjust the memory addresses once globally
// so that other analyses do not have to adjust their addresses.
runtime_memory_image.add_global_memory_offset(project.program.term.address_base_offset);
}
project.runtime_memory_image = runtime_memory_image;
Ok((binary, project, all_logs))
}
use crate::abstract_domain::BricksDomain;
use crate::analysis::function_signature::FunctionSignature;
use crate::analysis::graph::Graph;
use crate::analysis::pointer_inference::PointerInference;
use crate::analysis::string_abstraction::StringAbstraction;
use crate::intermediate_representation::Project;
use crate::prelude::*;
use crate::utils::log::LogMessage;
use std::collections::BTreeMap;
/// A struct containing pointers to all known analysis results
/// that may be needed as input for other analyses and CWE checks.
#[derive(Clone, Copy)]
pub struct AnalysisResults<'a> {
/// The content of the binary file
pub binary: &'a [u8],
/// The computed control flow graph of the program.
pub control_flow_graph: &'a Graph<'a>,
/// A pointer to the project struct
pub project: &'a Project,
/// The results of the function signature analysis if already computed.
pub function_signatures: Option<&'a BTreeMap<Tid, FunctionSignature>>,
/// The result of the pointer inference analysis if already computed.
pub pointer_inference: Option<&'a PointerInference<'a>>,
/// The result of the string abstraction if already computed.
pub string_abstraction: Option<&'a StringAbstraction<'a, BricksDomain>>,
}
impl<'a> AnalysisResults<'a> {
/// Create a new `AnalysisResults` struct with only the project itself known.
pub fn new(
binary: &'a [u8],
control_flow_graph: &'a Graph<'a>,
project: &'a Project,
) -> AnalysisResults<'a> {
AnalysisResults {
binary,
control_flow_graph,
project,
function_signatures: None,
pointer_inference: None,
string_abstraction: None,
}
}
/// Compute the function signatures for internal functions.
pub fn compute_function_signatures(
&self,
) -> (BTreeMap<Tid, FunctionSignature>, Vec<LogMessage>) {
crate::analysis::function_signature::compute_function_signatures(
self.project,
self.control_flow_graph,
)
}
/// Create a new `AnalysisResults` struct containing the given function signature analysis results.
pub fn with_function_signatures(
self,
function_signatures: Option<&'a BTreeMap<Tid, FunctionSignature>>,
) -> AnalysisResults<'a> {
AnalysisResults {
function_signatures,
..self
}
}
/// Compute the pointer inference analysis.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_pointer_inference(
&'a self,
config: &serde_json::Value,
print_stats: bool,
) -> PointerInference<'a> {
crate::analysis::pointer_inference::run(
self,
serde_json::from_value(config.clone()).unwrap(),
false,
print_stats,
)
}
/// Create a new `AnalysisResults` struct containing the given pointer inference analysis results.
pub fn with_pointer_inference<'b: 'a>(
self,
pi_results: Option<&'b PointerInference<'a>>,
) -> AnalysisResults<'b> {
AnalysisResults {
pointer_inference: pi_results,
..self
}
}
/// Compute the string abstraction.
/// As the string abstraction depends on the pointer inference, the
/// pointer inference is also computed and put into the `AnalysisResults` struct.
/// The result gets returned, but not saved to the `AnalysisResults` struct itself.
pub fn compute_string_abstraction(
&'a self,
config: &serde_json::Value,
pi_results: Option<&'a PointerInference<'a>>,
) -> StringAbstraction<BricksDomain> {
crate::analysis::string_abstraction::run(
self.project,
self.control_flow_graph,
pi_results.unwrap(),
serde_json::from_value(config.clone()).unwrap(),
)
}
/// Create a new `AnalysisResults` struct containing the given string abstraction results.
pub fn with_string_abstraction<'b: 'a>(
self,
string_abstraction: Option<&'b StringAbstraction<'a, BricksDomain>>,
) -> AnalysisResults<'b> {
AnalysisResults {
string_abstraction,
..self
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::graph::get_program_cfg;
use std::collections::HashSet;
impl<'a> AnalysisResults<'a> {
/// Mocks the `AnalysisResults` struct with a given project.
/// Note that the function leaks memory!
pub fn mock_from_project(project: &'a Project) -> AnalysisResults<'a> {
let extern_subs =
HashSet::from_iter(project.program.term.extern_symbols.keys().cloned());
let graph = Box::new(get_program_cfg(&project.program, extern_subs));
let graph: &'a Graph = Box::leak(graph);
let binary: &'a Vec<u8> = Box::leak(Box::new(Vec::new()));
let analysis_results = AnalysisResults::new(binary, graph, project);
let (fn_sigs, _) = analysis_results.compute_function_signatures();
let fn_sigs: &'a BTreeMap<_, _> = Box::leak(Box::new(fn_sigs));
let analysis_results = analysis_results.with_function_signatures(Some(fn_sigs));
analysis_results
}
}
}
//! Utility functions for executing Ghidra and extracting P-Code from the output.
use crate::prelude::*;
use crate::utils::binary::BareMetalConfig;
use crate::utils::{get_ghidra_plugin_path, read_config_file};
use crate::{intermediate_representation::Project, utils::log::LogMessage};
use directories::ProjectDirs;
use nix::{sys::stat, unistd};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::thread;
/// Execute the `p_code_extractor` plugin in Ghidra and parse its output into the `Project` data structure.
///
/// Return an error if the creation of the project failed.
pub fn get_project_from_ghidra(
file_path: &Path,
binary: &[u8],
bare_metal_config_opt: Option<BareMetalConfig>,
verbose_flag: bool,
) -> Result<(Project, Vec<LogMessage>), Error> {
let tmp_folder = get_tmp_folder()?;
// We add a timestamp suffix to file names
// so that if two instances of the cwe_checker are running in parallel on the same file
// they do not interfere with each other.
let timestamp_suffix = format!(
"{:?}",
std::time::SystemTime::now()
.duration_since(std::time::SystemTime::UNIX_EPOCH)
.unwrap()
.as_millis()
);
// Create a unique name for the pipe
let fifo_path = tmp_folder.join(format!("pcode_{}.pipe", timestamp_suffix));
let ghidra_command = generate_ghidra_call_command(
file_path,
&fifo_path,
&timestamp_suffix,
&bare_metal_config_opt,
)?;
let pcode_project = execute_ghidra(ghidra_command, &fifo_path, verbose_flag)?;
parse_pcode_project_to_ir_project(pcode_project, binary, &bare_metal_config_opt)
}
/// Normalize the given P-Code project
/// and then parse it into a project struct of the internally used intermediate representation.
fn parse_pcode_project_to_ir_project(
mut pcode_project: crate::pcode::Project,
binary: &[u8],
bare_metal_config_opt: &Option<BareMetalConfig>,
) -> Result<(Project, Vec<LogMessage>), Error> {
let bare_metal_base_address_opt = bare_metal_config_opt
.as_ref()
.map(|config| config.parse_binary_base_address());
let mut log_messages = pcode_project.normalize();
let project: Project = match crate::utils::get_binary_base_address(binary) {
Ok(binary_base_address) => pcode_project.into_ir_project(binary_base_address),
Err(_err) => {
if let Some(binary_base_address) = bare_metal_base_address_opt {
let mut project = pcode_project.into_ir_project(binary_base_address);
project.program.term.address_base_offset = 0;
project
} else {
log_messages.push(LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback."));
let mut project = pcode_project.into_ir_project(0);
// For PE files setting the address_base_offset to zero is a hack, which worked for the tested PE files.
// But this hack will probably not work in general!
project.program.term.address_base_offset = 0;
project
}
}
};
Ok((project, log_messages))
}
/// Execute Ghidra with the P-Code plugin and return the parsed P-Code project.
///
/// Note that this function will abort the program is the Ghidra execution does not succeed.
fn execute_ghidra(
mut ghidra_command: Command,
fifo_path: &PathBuf,
verbose_flag: bool,
) -> Result<crate::pcode::Project, Error> {
// Create a new fifo and give read and write rights to the owner
unistd::mkfifo(fifo_path, stat::Mode::from_bits(0o600).unwrap())
.context("Error creating FIFO pipe")?;
// Execute Ghidra in a new thread and return a Join Handle, so that the thread is only joined
// after the output has been read into the cwe_checker
let ghidra_subprocess = thread::spawn(move || {
// Execute the command and catch its output.
let output = match ghidra_command.output() {
Ok(output) => output,
Err(err) => {
eprintln!("Ghidra could not be executed: {}", err);
std::process::exit(101);
}
};
if let Ok(stdout) = String::from_utf8(output.stdout.clone()) {
if stdout.contains("Pcode was successfully extracted!") && output.status.success() {
return;
}
}
if verbose_flag {
eprintln!("{}", String::from_utf8(output.stdout).unwrap());
eprintln!("{}", String::from_utf8(output.stderr).unwrap());
if let Some(code) = output.status.code() {
eprintln!("Ghidra plugin failed with exit code {}", code);
}
eprintln!("Execution of Ghidra plugin failed.");
} else {
eprintln!("Execution of Ghidra plugin failed. Use the --verbose flag to print Ghidra output for troubleshooting.");
}
std::process::exit(101)
});
// Open the FIFO
let file = std::fs::File::open(fifo_path.clone()).expect("Could not open FIFO.");
let pcode_parsing_result = serde_json::from_reader(std::io::BufReader::new(file));
ghidra_subprocess
.join()
.expect("The Ghidra thread to be joined has panicked!");
// Clean up the FIFO pipe and propagate errors from the JSON parsing.
std::fs::remove_file(fifo_path).context("Could not clean up FIFO pipe")?;
Ok(pcode_parsing_result?)
}
/// Generate the command that is used to call Ghidra and execute the P-Code-Extractor plugin in it.
fn generate_ghidra_call_command(
file_path: &Path,
fifo_path: &Path,
timestamp_suffix: &str,
bare_metal_config_opt: &Option<BareMetalConfig>,
) -> Result<Command, Error> {
let ghidra_path: std::path::PathBuf =
serde_json::from_value(read_config_file("ghidra.json")?["ghidra_path"].clone())
.context("Path to Ghidra not configured.")?;
let headless_path = ghidra_path.join("support/analyzeHeadless");
let tmp_folder = get_tmp_folder()?;
let filename = file_path
.file_name()
.ok_or_else(|| anyhow!("Invalid file name"))?
.to_string_lossy()
.to_string();
let ghidra_plugin_path = get_ghidra_plugin_path("p_code_extractor");
let mut ghidra_command = Command::new(headless_path);
ghidra_command
.arg(&tmp_folder) // The folder where temporary files should be stored
.arg(format!("PcodeExtractor_{}_{}", filename, timestamp_suffix)) // The name of the temporary Ghidra Project.
.arg("-import") // Import a file into the Ghidra project
.arg(file_path) // File import path
.arg("-postScript") // Execute a script after standard analysis by Ghidra finished
.arg(ghidra_plugin_path.join("PcodeExtractor.java")) // Path to the PcodeExtractor.java
.arg(fifo_path) // The path to the named pipe (fifo)
.arg("-scriptPath") // Add a folder containing additional script files to the Ghidra script file search paths
.arg(ghidra_plugin_path) // Path to the folder containing the PcodeExtractor.java (so that the other java files can be found.)
.arg("-deleteProject") // Delete the temporary project after the script finished
.arg("-analysisTimeoutPerFile") // Set a timeout for how long the standard analysis can run before getting aborted
.arg("3600"); // Timeout of one hour (=3600 seconds) // TODO: The post-script can detect that the timeout fired and react accordingly.
if let Some(bare_metal_config) = bare_metal_config_opt {
let mut base_address: &str = &bare_metal_config.flash_base_address;
if let Some(stripped_address) = base_address.strip_prefix("0x") {
base_address = stripped_address;
}
ghidra_command
.arg("-loader") // Tell Ghidra to use a specific loader
.arg("BinaryLoader") // Use the BinaryLoader for bare metal binaries
.arg("-loader-baseAddr") // Provide the base address where the binary should be mapped in memory
.arg(base_address)
.arg("-processor") // Provide the processor type ID, for which the binary was compiled.
.arg(bare_metal_config.processor_id.clone());
}
Ok(ghidra_command)
}
/// Get the folder where temporary files should be stored for the program.
fn get_tmp_folder() -> Result<PathBuf, Error> {
let project_dirs = ProjectDirs::from("", "", "cwe_checker")
.context("Could not determine path for temporary files")?;
let tmp_folder = if let Some(folder) = project_dirs.runtime_dir() {
folder
} else {
Path::new("/tmp/cwe_checker")
};
if !tmp_folder.exists() {
std::fs::create_dir(tmp_folder).context("Unable to create temporary folder")?;
}
Ok(tmp_folder.to_path_buf())
}
......@@ -2,6 +2,7 @@
pub mod arguments;
pub mod binary;
pub mod ghidra;
pub mod graph_utils;
pub mod log;
pub mod symbol_utils;
......@@ -9,14 +10,14 @@ pub mod symbol_utils;
use crate::prelude::*;
/// Get the contents of a configuration file.
pub fn read_config_file(filename: &str) -> serde_json::Value {
pub fn read_config_file(filename: &str) -> Result<serde_json::Value, Error> {
let project_dirs = directories::ProjectDirs::from("", "", "cwe_checker")
.expect("Could not discern location of configuration files.");
.context("Could not discern location of configuration files.")?;
let config_dir = project_dirs.config_dir();
let config_path = config_dir.join(filename);
let config_file =
std::fs::read_to_string(config_path).expect("Could not read configuration file");
serde_json::from_str(&config_file).unwrap()
std::fs::read_to_string(config_path).context("Could not read configuration file")?;
Ok(serde_json::from_str(&config_file)?)
}
/// Get the folder path to a Ghidra plugin bundled with the cwe_checker.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment