Unverified Commit 9a53c27b by Enkelmann Committed by GitHub

Add address_base_offset field to Program struct (#129)

parent 05c371dc
use cwe_checker_rs::intermediate_representation::Project;
use cwe_checker_rs::utils::log::print_all_messages;
use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_rs::AnalysisResults;
use cwe_checker_rs::{intermediate_representation::Project, utils::log::LogMessage};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::process::Command;
......@@ -132,7 +132,7 @@ fn run_with_ghidra(args: CmdlineArgs) {
binary_file_path.display()
)
});
let mut project = get_project_from_ghidra(&binary_file_path);
let mut project = get_project_from_ghidra(&binary_file_path, &binary[..], args.quiet);
// Normalize the project and gather log messages generated from it.
let mut all_logs = project.normalize();
let mut analysis_results = AnalysisResults::new(&binary, &project);
......@@ -197,7 +197,7 @@ fn filter_modules_for_partial_run(
}
/// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure.
fn get_project_from_ghidra(file_path: &Path) -> Project {
fn get_project_from_ghidra(file_path: &Path, binary: &[u8], quiet_flag: bool) -> Project {
let ghidra_path: std::path::PathBuf =
serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone())
.expect("Path to Ghidra not configured.");
......@@ -263,7 +263,20 @@ fn get_project_from_ghidra(file_path: &Path) -> Project {
let mut project_pcode: cwe_checker_rs::pcode::Project =
serde_json::from_reader(std::io::BufReader::new(file)).unwrap();
project_pcode.normalize();
let project: Project = project_pcode.into();
let project: Project = match cwe_checker_rs::utils::get_binary_base_address(binary) {
Ok(binary_base_address) => project_pcode.into_ir_project(binary_base_address),
Err(_err) => {
if !quiet_flag {
let log = LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback.");
println!("{}", log);
}
let mut project = project_pcode.into_ir_project(0);
// Setting the address_base_offset to zero is a hack, which worked for the tested PE files.
// But this hack will probably not work in general!
project.program.term.address_base_offset = 0;
project
}
};
// delete the temporary file again.
std::fs::remove_file(output_path).unwrap();
project
......
......@@ -126,6 +126,7 @@ fn mock_program() -> Term<Program> {
subs: vec![sub1, sub2],
extern_symbols: Vec::new(),
entry_points: Vec::new(),
address_base_offset: 0,
},
};
program
......
......@@ -500,6 +500,7 @@ mod tests {
subs: vec![sub1, sub2],
extern_symbols: Vec::new(),
entry_points: Vec::new(),
address_base_offset: 0,
},
};
program
......
......@@ -76,6 +76,7 @@ fn mock_project() -> (Project, Config) {
mock_extern_symbol("other"),
],
entry_points: Vec::new(),
address_base_offset: 0,
};
let program_term = Term {
tid: Tid::new("program"),
......
......@@ -307,6 +307,14 @@ pub struct Program {
/// Entry points into to binary,
/// i.e. the term identifiers of functions that may be called from outside of the binary.
pub entry_points: Vec<Tid>,
/// An offset that has been added to all addresses in the program compared to the addresses
/// as specified in the binary file.
///
/// In certain cases, e.g. if the binary specifies a segment to be loaded at address 0,
/// the Ghidra backend may shift the whole binary image by a constant value in memory.
/// Thus addresses as specified by the binary and addresses as reported by Ghidra may differ by a constant offset,
/// which is stored in this value.
pub address_base_offset: u64,
}
impl Program {
......@@ -503,6 +511,7 @@ mod tests {
subs: Vec::new(),
extern_symbols: Vec::new(),
entry_points: Vec::new(),
address_base_offset: 0,
}
}
}
......
......@@ -354,12 +354,19 @@ pub struct Program {
pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>,
pub image_base: String,
}
impl From<Program> for IrProgram {
impl Program {
/// Convert a program parsed from Ghidra to the internally used IR.
fn from(program: Program) -> IrProgram {
let subs = program
///
/// The `binary_base_address` denotes the base address of the memory image of the binary
/// according to the program headers of the binary.
/// It is needed to detect whether Ghidra added a constant offset to all addresses of the memory address.
/// E.g. if the `binary_base_address` is 0 for shared object files,
/// Ghidra adds an offset so that the memory image does not actually start at address 0.
pub fn into_ir_program(self, binary_base_address: u64) -> IrProgram {
let subs = self
.subs
.into_iter()
.map(|sub_term| Term {
......@@ -367,14 +374,18 @@ impl From<Program> for IrProgram {
term: sub_term.term.into(),
})
.collect();
IrProgram {
subs,
extern_symbols: program
let extern_symbols = self
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
.collect();
let address_base_offset =
u64::from_str_radix(&self.image_base, 16).unwrap() - binary_base_address;
IrProgram {
subs,
extern_symbols,
entry_points: self.entry_points,
address_base_offset,
}
}
}
......@@ -409,14 +420,17 @@ pub struct Project {
pub register_calling_convention: Vec<CallingConvention>,
}
impl From<Project> for IrProject {
impl Project {
/// Convert a project parsed from Ghidra to the internally used IR.
fn from(project: Project) -> IrProject {
///
/// The `binary_base_address` denotes the base address of the memory image of the binary
/// according to the program headers of the binary.
pub fn into_ir_project(self, binary_base_address: u64) -> IrProject {
let mut program: Term<IrProgram> = Term {
tid: project.program.tid,
term: project.program.term.into(),
tid: self.program.tid,
term: self.program.term.into_ir_program(binary_base_address),
};
let register_map: HashMap<&String, &RegisterProperties> = project
let register_map: HashMap<&String, &RegisterProperties> = self
.register_properties
.iter()
.map(|p| (&p.register, p))
......@@ -511,9 +525,9 @@ impl From<Project> for IrProject {
}
IrProject {
program,
cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(),
calling_conventions: project
cpu_architecture: self.cpu_architecture,
stack_pointer_register: self.stack_pointer_register.into(),
calling_conventions: self
.register_calling_convention
.into_iter()
.map(|cconv| cconv.into())
......
......@@ -28,7 +28,8 @@ impl Setup {
"term": {
"subs": [],
"extern_symbols": [],
"entry_points":[]
"entry_points":[],
"image_base": "10000"
}
},
"stack_pointer_register": {
......@@ -560,20 +561,21 @@ fn program_deserialization() {
"term": {
"subs": [],
"extern_symbols": [],
"entry_points":[]
"entry_points":[],
"image_base": "10000"
}
}
"#,
)
.unwrap();
let _: IrProgram = program_term.term.into();
let _: IrProgram = program_term.term.into_ir_program(10000);
}
#[test]
fn project_deserialization() {
let setup = Setup::new();
let project: Project = setup.project.clone();
let _: IrProject = project.into();
let _: IrProject = project.into_ir_project(10000);
}
#[test]
......@@ -660,7 +662,7 @@ fn from_project_to_ir_project() {
sub.term.blocks.push(blk);
mock_project.program.term.subs.push(sub.clone());
let ir_program = IrProject::from(mock_project).program.term;
let ir_program = mock_project.into_ir_project(10000).program.term;
let ir_rdi_var = IrVariable {
name: String::from("RDI"),
size: ByteSize::new(8),
......
......@@ -298,6 +298,7 @@ impl From<Program> for IrProgram {
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
address_base_offset: 0,
}
}
}
......
......@@ -2,6 +2,8 @@ pub mod graph_utils;
pub mod log;
pub mod symbol_utils;
use crate::prelude::*;
/// Get the contents of a configuration file.
pub fn read_config_file(filename: &str) -> serde_json::Value {
let project_dirs = directories::ProjectDirs::from("", "", "cwe_checker")
......@@ -20,3 +22,22 @@ pub fn get_ghidra_plugin_path(plugin_name: &str) -> std::path::PathBuf {
let data_dir = project_dirs.data_dir();
data_dir.join("ghidra").join(plugin_name)
}
/// Get the base address for the image of a binary when loaded into memory.
pub fn get_binary_base_address(binary: &[u8]) -> Result<u64, Error> {
use goblin::Object;
match Object::parse(binary)? {
Object::Elf(elf_file) => {
for header in elf_file.program_headers.iter() {
let vm_range = header.vm_range();
if !vm_range.is_empty() && header.p_type == goblin::elf::program_header::PT_LOAD {
// The loadable segments have to occur in order in the program header table.
// So the start address of the first loadable segment is the base offset of the binary.
return Ok(vm_range.start as u64);
}
}
Err(anyhow!("No loadable segment bounds found."))
}
_ => Err(anyhow!("Binary type not yet supported")),
}
}
......@@ -29,7 +29,8 @@ public class TermCreator {
*/
public static Term<Program> createProgramTerm() {
Tid progTid = new Tid(String.format("prog_%s", HelperFunctions.ghidraProgram.getMinAddress().toString()), HelperFunctions.ghidraProgram.getMinAddress().toString());
return new Term<Program>(progTid, new Program(new ArrayList<Term<Sub>>(), HelperFunctions.addEntryPoints(symTab)));
String imageBase = HelperFunctions.ghidraProgram.getImageBase().toString();
return new Term<Program>(progTid, new Program(new ArrayList<Term<Sub>>(), HelperFunctions.addEntryPoints(symTab), imageBase));
}
......
......@@ -14,6 +14,8 @@ public class Program {
private ArrayList<ExternSymbol> externSymbols;
@SerializedName("entry_points")
private ArrayList<Tid> entryPoints;
@SerializedName("image_base")
private String imageBase;
public Program() {
}
......@@ -22,9 +24,10 @@ public class Program {
this.setSubs(subs);
}
public Program(ArrayList<Term<Sub>> subs, ArrayList<Tid> entryPoints) {
public Program(ArrayList<Term<Sub>> subs, ArrayList<Tid> entryPoints, String imageBase) {
this.setSubs(subs);
this.setEntryPoints(entryPoints);
this.setImageBase(imageBase);
}
......@@ -55,4 +58,12 @@ public class Program {
public void setEntryPoints(ArrayList<Tid> entryPoints) {
this.entryPoints = entryPoints;
}
public String getImageBase() {
return imageBase;
}
public void setImageBase(String imageBase) {
this.imageBase = imageBase;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment