Unverified Commit 9a53c27b by Enkelmann Committed by GitHub

Add address_base_offset field to Program struct (#129)

parent 05c371dc
use cwe_checker_rs::intermediate_representation::Project;
use cwe_checker_rs::utils::log::print_all_messages; use cwe_checker_rs::utils::log::print_all_messages;
use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file}; use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_rs::AnalysisResults; use cwe_checker_rs::AnalysisResults;
use cwe_checker_rs::{intermediate_representation::Project, utils::log::LogMessage};
use std::collections::HashSet; use std::collections::HashSet;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::Command; use std::process::Command;
...@@ -132,7 +132,7 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -132,7 +132,7 @@ fn run_with_ghidra(args: CmdlineArgs) {
binary_file_path.display() binary_file_path.display()
) )
}); });
let mut project = get_project_from_ghidra(&binary_file_path); let mut project = get_project_from_ghidra(&binary_file_path, &binary[..], args.quiet);
// Normalize the project and gather log messages generated from it. // Normalize the project and gather log messages generated from it.
let mut all_logs = project.normalize(); let mut all_logs = project.normalize();
let mut analysis_results = AnalysisResults::new(&binary, &project); let mut analysis_results = AnalysisResults::new(&binary, &project);
...@@ -197,7 +197,7 @@ fn filter_modules_for_partial_run( ...@@ -197,7 +197,7 @@ fn filter_modules_for_partial_run(
} }
/// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure. /// Execute the `p_code_extractor` plugin in ghidra and parse its output into the `Project` data structure.
fn get_project_from_ghidra(file_path: &Path) -> Project { fn get_project_from_ghidra(file_path: &Path, binary: &[u8], quiet_flag: bool) -> Project {
let ghidra_path: std::path::PathBuf = let ghidra_path: std::path::PathBuf =
serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone()) serde_json::from_value(read_config_file("ghidra.json")["ghidra_path"].clone())
.expect("Path to Ghidra not configured."); .expect("Path to Ghidra not configured.");
...@@ -263,7 +263,20 @@ fn get_project_from_ghidra(file_path: &Path) -> Project { ...@@ -263,7 +263,20 @@ fn get_project_from_ghidra(file_path: &Path) -> Project {
let mut project_pcode: cwe_checker_rs::pcode::Project = let mut project_pcode: cwe_checker_rs::pcode::Project =
serde_json::from_reader(std::io::BufReader::new(file)).unwrap(); serde_json::from_reader(std::io::BufReader::new(file)).unwrap();
project_pcode.normalize(); project_pcode.normalize();
let project: Project = project_pcode.into(); let project: Project = match cwe_checker_rs::utils::get_binary_base_address(binary) {
Ok(binary_base_address) => project_pcode.into_ir_project(binary_base_address),
Err(_err) => {
if !quiet_flag {
let log = LogMessage::new_info("Could not determine binary base address. Using base address of Ghidra output as fallback.");
println!("{}", log);
}
let mut project = project_pcode.into_ir_project(0);
// Setting the address_base_offset to zero is a hack, which worked for the tested PE files.
// But this hack will probably not work in general!
project.program.term.address_base_offset = 0;
project
}
};
// delete the temporary file again. // delete the temporary file again.
std::fs::remove_file(output_path).unwrap(); std::fs::remove_file(output_path).unwrap();
project project
......
...@@ -126,6 +126,7 @@ fn mock_program() -> Term<Program> { ...@@ -126,6 +126,7 @@ fn mock_program() -> Term<Program> {
subs: vec![sub1, sub2], subs: vec![sub1, sub2],
extern_symbols: Vec::new(), extern_symbols: Vec::new(),
entry_points: Vec::new(), entry_points: Vec::new(),
address_base_offset: 0,
}, },
}; };
program program
......
...@@ -500,6 +500,7 @@ mod tests { ...@@ -500,6 +500,7 @@ mod tests {
subs: vec![sub1, sub2], subs: vec![sub1, sub2],
extern_symbols: Vec::new(), extern_symbols: Vec::new(),
entry_points: Vec::new(), entry_points: Vec::new(),
address_base_offset: 0,
}, },
}; };
program program
......
...@@ -76,6 +76,7 @@ fn mock_project() -> (Project, Config) { ...@@ -76,6 +76,7 @@ fn mock_project() -> (Project, Config) {
mock_extern_symbol("other"), mock_extern_symbol("other"),
], ],
entry_points: Vec::new(), entry_points: Vec::new(),
address_base_offset: 0,
}; };
let program_term = Term { let program_term = Term {
tid: Tid::new("program"), tid: Tid::new("program"),
......
...@@ -307,6 +307,14 @@ pub struct Program { ...@@ -307,6 +307,14 @@ pub struct Program {
/// Entry points into to binary, /// Entry points into to binary,
/// i.e. the term identifiers of functions that may be called from outside of the binary. /// i.e. the term identifiers of functions that may be called from outside of the binary.
pub entry_points: Vec<Tid>, pub entry_points: Vec<Tid>,
/// An offset that has been added to all addresses in the program compared to the addresses
/// as specified in the binary file.
///
/// In certain cases, e.g. if the binary specifies a segment to be loaded at address 0,
/// the Ghidra backend may shift the whole binary image by a constant value in memory.
/// Thus addresses as specified by the binary and addresses as reported by Ghidra may differ by a constant offset,
/// which is stored in this value.
pub address_base_offset: u64,
} }
impl Program { impl Program {
...@@ -503,6 +511,7 @@ mod tests { ...@@ -503,6 +511,7 @@ mod tests {
subs: Vec::new(), subs: Vec::new(),
extern_symbols: Vec::new(), extern_symbols: Vec::new(),
entry_points: Vec::new(), entry_points: Vec::new(),
address_base_offset: 0,
} }
} }
} }
......
...@@ -354,12 +354,19 @@ pub struct Program { ...@@ -354,12 +354,19 @@ pub struct Program {
pub subs: Vec<Term<Sub>>, pub subs: Vec<Term<Sub>>,
pub extern_symbols: Vec<ExternSymbol>, pub extern_symbols: Vec<ExternSymbol>,
pub entry_points: Vec<Tid>, pub entry_points: Vec<Tid>,
pub image_base: String,
} }
impl From<Program> for IrProgram { impl Program {
/// Convert a program parsed from Ghidra to the internally used IR. /// Convert a program parsed from Ghidra to the internally used IR.
fn from(program: Program) -> IrProgram { ///
let subs = program /// The `binary_base_address` denotes the base address of the memory image of the binary
/// according to the program headers of the binary.
/// It is needed to detect whether Ghidra added a constant offset to all addresses of the memory address.
/// E.g. if the `binary_base_address` is 0 for shared object files,
/// Ghidra adds an offset so that the memory image does not actually start at address 0.
pub fn into_ir_program(self, binary_base_address: u64) -> IrProgram {
let subs = self
.subs .subs
.into_iter() .into_iter()
.map(|sub_term| Term { .map(|sub_term| Term {
...@@ -367,14 +374,18 @@ impl From<Program> for IrProgram { ...@@ -367,14 +374,18 @@ impl From<Program> for IrProgram {
term: sub_term.term.into(), term: sub_term.term.into(),
}) })
.collect(); .collect();
let extern_symbols = self
.extern_symbols
.into_iter()
.map(|symbol| symbol.into())
.collect();
let address_base_offset =
u64::from_str_radix(&self.image_base, 16).unwrap() - binary_base_address;
IrProgram { IrProgram {
subs, subs,
extern_symbols: program extern_symbols,
.extern_symbols entry_points: self.entry_points,
.into_iter() address_base_offset,
.map(|symbol| symbol.into())
.collect(),
entry_points: program.entry_points,
} }
} }
} }
...@@ -409,14 +420,17 @@ pub struct Project { ...@@ -409,14 +420,17 @@ pub struct Project {
pub register_calling_convention: Vec<CallingConvention>, pub register_calling_convention: Vec<CallingConvention>,
} }
impl From<Project> for IrProject { impl Project {
/// Convert a project parsed from Ghidra to the internally used IR. /// Convert a project parsed from Ghidra to the internally used IR.
fn from(project: Project) -> IrProject { ///
/// The `binary_base_address` denotes the base address of the memory image of the binary
/// according to the program headers of the binary.
pub fn into_ir_project(self, binary_base_address: u64) -> IrProject {
let mut program: Term<IrProgram> = Term { let mut program: Term<IrProgram> = Term {
tid: project.program.tid, tid: self.program.tid,
term: project.program.term.into(), term: self.program.term.into_ir_program(binary_base_address),
}; };
let register_map: HashMap<&String, &RegisterProperties> = project let register_map: HashMap<&String, &RegisterProperties> = self
.register_properties .register_properties
.iter() .iter()
.map(|p| (&p.register, p)) .map(|p| (&p.register, p))
...@@ -511,9 +525,9 @@ impl From<Project> for IrProject { ...@@ -511,9 +525,9 @@ impl From<Project> for IrProject {
} }
IrProject { IrProject {
program, program,
cpu_architecture: project.cpu_architecture, cpu_architecture: self.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(), stack_pointer_register: self.stack_pointer_register.into(),
calling_conventions: project calling_conventions: self
.register_calling_convention .register_calling_convention
.into_iter() .into_iter()
.map(|cconv| cconv.into()) .map(|cconv| cconv.into())
......
...@@ -28,7 +28,8 @@ impl Setup { ...@@ -28,7 +28,8 @@ impl Setup {
"term": { "term": {
"subs": [], "subs": [],
"extern_symbols": [], "extern_symbols": [],
"entry_points":[] "entry_points":[],
"image_base": "10000"
} }
}, },
"stack_pointer_register": { "stack_pointer_register": {
...@@ -560,20 +561,21 @@ fn program_deserialization() { ...@@ -560,20 +561,21 @@ fn program_deserialization() {
"term": { "term": {
"subs": [], "subs": [],
"extern_symbols": [], "extern_symbols": [],
"entry_points":[] "entry_points":[],
"image_base": "10000"
} }
} }
"#, "#,
) )
.unwrap(); .unwrap();
let _: IrProgram = program_term.term.into(); let _: IrProgram = program_term.term.into_ir_program(10000);
} }
#[test] #[test]
fn project_deserialization() { fn project_deserialization() {
let setup = Setup::new(); let setup = Setup::new();
let project: Project = setup.project.clone(); let project: Project = setup.project.clone();
let _: IrProject = project.into(); let _: IrProject = project.into_ir_project(10000);
} }
#[test] #[test]
...@@ -660,7 +662,7 @@ fn from_project_to_ir_project() { ...@@ -660,7 +662,7 @@ fn from_project_to_ir_project() {
sub.term.blocks.push(blk); sub.term.blocks.push(blk);
mock_project.program.term.subs.push(sub.clone()); mock_project.program.term.subs.push(sub.clone());
let ir_program = IrProject::from(mock_project).program.term; let ir_program = mock_project.into_ir_project(10000).program.term;
let ir_rdi_var = IrVariable { let ir_rdi_var = IrVariable {
name: String::from("RDI"), name: String::from("RDI"),
size: ByteSize::new(8), size: ByteSize::new(8),
......
...@@ -298,6 +298,7 @@ impl From<Program> for IrProgram { ...@@ -298,6 +298,7 @@ impl From<Program> for IrProgram {
.map(|symbol| symbol.into()) .map(|symbol| symbol.into())
.collect(), .collect(),
entry_points: program.entry_points, entry_points: program.entry_points,
address_base_offset: 0,
} }
} }
} }
......
...@@ -2,6 +2,8 @@ pub mod graph_utils; ...@@ -2,6 +2,8 @@ pub mod graph_utils;
pub mod log; pub mod log;
pub mod symbol_utils; pub mod symbol_utils;
use crate::prelude::*;
/// Get the contents of a configuration file. /// Get the contents of a configuration file.
pub fn read_config_file(filename: &str) -> serde_json::Value { pub fn read_config_file(filename: &str) -> serde_json::Value {
let project_dirs = directories::ProjectDirs::from("", "", "cwe_checker") let project_dirs = directories::ProjectDirs::from("", "", "cwe_checker")
...@@ -20,3 +22,22 @@ pub fn get_ghidra_plugin_path(plugin_name: &str) -> std::path::PathBuf { ...@@ -20,3 +22,22 @@ pub fn get_ghidra_plugin_path(plugin_name: &str) -> std::path::PathBuf {
let data_dir = project_dirs.data_dir(); let data_dir = project_dirs.data_dir();
data_dir.join("ghidra").join(plugin_name) data_dir.join("ghidra").join(plugin_name)
} }
/// Get the base address for the image of a binary when loaded into memory.
pub fn get_binary_base_address(binary: &[u8]) -> Result<u64, Error> {
use goblin::Object;
match Object::parse(binary)? {
Object::Elf(elf_file) => {
for header in elf_file.program_headers.iter() {
let vm_range = header.vm_range();
if !vm_range.is_empty() && header.p_type == goblin::elf::program_header::PT_LOAD {
// The loadable segments have to occur in order in the program header table.
// So the start address of the first loadable segment is the base offset of the binary.
return Ok(vm_range.start as u64);
}
}
Err(anyhow!("No loadable segment bounds found."))
}
_ => Err(anyhow!("Binary type not yet supported")),
}
}
...@@ -29,7 +29,8 @@ public class TermCreator { ...@@ -29,7 +29,8 @@ public class TermCreator {
*/ */
public static Term<Program> createProgramTerm() { public static Term<Program> createProgramTerm() {
Tid progTid = new Tid(String.format("prog_%s", HelperFunctions.ghidraProgram.getMinAddress().toString()), HelperFunctions.ghidraProgram.getMinAddress().toString()); Tid progTid = new Tid(String.format("prog_%s", HelperFunctions.ghidraProgram.getMinAddress().toString()), HelperFunctions.ghidraProgram.getMinAddress().toString());
return new Term<Program>(progTid, new Program(new ArrayList<Term<Sub>>(), HelperFunctions.addEntryPoints(symTab))); String imageBase = HelperFunctions.ghidraProgram.getImageBase().toString();
return new Term<Program>(progTid, new Program(new ArrayList<Term<Sub>>(), HelperFunctions.addEntryPoints(symTab), imageBase));
} }
......
...@@ -14,6 +14,8 @@ public class Program { ...@@ -14,6 +14,8 @@ public class Program {
private ArrayList<ExternSymbol> externSymbols; private ArrayList<ExternSymbol> externSymbols;
@SerializedName("entry_points") @SerializedName("entry_points")
private ArrayList<Tid> entryPoints; private ArrayList<Tid> entryPoints;
@SerializedName("image_base")
private String imageBase;
public Program() { public Program() {
} }
...@@ -22,9 +24,10 @@ public class Program { ...@@ -22,9 +24,10 @@ public class Program {
this.setSubs(subs); this.setSubs(subs);
} }
public Program(ArrayList<Term<Sub>> subs, ArrayList<Tid> entryPoints) { public Program(ArrayList<Term<Sub>> subs, ArrayList<Tid> entryPoints, String imageBase) {
this.setSubs(subs); this.setSubs(subs);
this.setEntryPoints(entryPoints); this.setEntryPoints(entryPoints);
this.setImageBase(imageBase);
} }
...@@ -55,4 +58,12 @@ public class Program { ...@@ -55,4 +58,12 @@ public class Program {
public void setEntryPoints(ArrayList<Tid> entryPoints) { public void setEntryPoints(ArrayList<Tid> entryPoints) {
this.entryPoints = entryPoints; this.entryPoints = entryPoints;
} }
public String getImageBase() {
return imageBase;
}
public void setImageBase(String imageBase) {
this.imageBase = imageBase;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment