Unverified Commit f2048c23 by Enkelmann Committed by GitHub

Add runtime memory image representation (#131)

parent 9a53c27b
use cwe_checker_rs::utils::binary::RuntimeMemoryImage;
use cwe_checker_rs::utils::log::print_all_messages; use cwe_checker_rs::utils::log::print_all_messages;
use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file}; use cwe_checker_rs::utils::{get_ghidra_plugin_path, read_config_file};
use cwe_checker_rs::AnalysisResults; use cwe_checker_rs::AnalysisResults;
...@@ -135,7 +136,18 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -135,7 +136,18 @@ fn run_with_ghidra(args: CmdlineArgs) {
let mut project = get_project_from_ghidra(&binary_file_path, &binary[..], args.quiet); let mut project = get_project_from_ghidra(&binary_file_path, &binary[..], args.quiet);
// Normalize the project and gather log messages generated from it. // Normalize the project and gather log messages generated from it.
let mut all_logs = project.normalize(); let mut all_logs = project.normalize();
let mut analysis_results = AnalysisResults::new(&binary, &project);
// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = RuntimeMemoryImage::new(&binary).unwrap_or_else(|err| {
panic!("Error while generating runtime memory image: {}", err);
});
if project.program.term.address_base_offset != 0 {
// We adjust the memory addresses once globally
// so that other analyses do not have to adjust their addresses.
runtime_memory_image.add_global_memory_offset(project.program.term.address_base_offset);
}
let mut analysis_results = AnalysisResults::new(&binary, &runtime_memory_image, &project);
let modules_depending_on_pointer_inference = vec!["CWE243", "CWE367", "CWE476", "Memory"]; let modules_depending_on_pointer_inference = vec!["CWE243", "CWE367", "CWE476", "Memory"];
let pointer_inference_results = if modules let pointer_inference_results = if modules
......
...@@ -9,6 +9,7 @@ extern crate ocaml; ...@@ -9,6 +9,7 @@ extern crate ocaml;
use crate::analysis::pointer_inference::PointerInference; use crate::analysis::pointer_inference::PointerInference;
use crate::intermediate_representation::Project; use crate::intermediate_representation::Project;
use crate::utils::binary::RuntimeMemoryImage;
use crate::utils::log::{CweWarning, LogMessage}; use crate::utils::log::{CweWarning, LogMessage};
pub mod abstract_domain; pub mod abstract_domain;
...@@ -75,6 +76,8 @@ pub fn get_modules() -> Vec<&'static CweModule> { ...@@ -75,6 +76,8 @@ pub fn get_modules() -> Vec<&'static CweModule> {
pub struct AnalysisResults<'a> { pub struct AnalysisResults<'a> {
/// The content of the binary file /// The content of the binary file
pub binary: &'a [u8], pub binary: &'a [u8],
/// A representation of the runtime memory image of the binary.
pub runtime_memory_image: &'a RuntimeMemoryImage,
/// A pointer to the project struct /// A pointer to the project struct
pub project: &'a Project, pub project: &'a Project,
/// The result of the pointer inference analysis if already computed. /// The result of the pointer inference analysis if already computed.
...@@ -83,9 +86,14 @@ pub struct AnalysisResults<'a> { ...@@ -83,9 +86,14 @@ pub struct AnalysisResults<'a> {
impl<'a> AnalysisResults<'a> { impl<'a> AnalysisResults<'a> {
/// Create a new `AnalysisResults` struct with only the project itself known. /// Create a new `AnalysisResults` struct with only the project itself known.
pub fn new(binary: &'a [u8], project: &'a Project) -> AnalysisResults<'a> { pub fn new(
binary: &'a [u8],
runtime_memory_image: &'a RuntimeMemoryImage,
project: &'a Project,
) -> AnalysisResults<'a> {
AnalysisResults { AnalysisResults {
binary, binary,
runtime_memory_image,
project, project,
pointer_inference: None, pointer_inference: None,
} }
......
//! Utility structs and functions which directly parse the binary file.
use crate::abstract_domain::BitvectorDomain;
use crate::abstract_domain::HasByteSize;
use crate::abstract_domain::RegisterDomain;
use crate::intermediate_representation::BinOpType;
use crate::prelude::*;
use goblin::elf;
use goblin::pe;
use goblin::Object;
/// A representation of the runtime image of a binary after being loaded into memory by the loader.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
pub struct RuntimeMemoryImage {
memory_segments: Vec<MemorySegment>,
is_little_endian: bool,
}
/// A continuous segment in the memory image.
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)]
struct MemorySegment {
/// The contents of the segment
pub bytes: Vec<u8>,
/// The base address, i.e. the address of the first byte of the segment
pub base_address: u64,
/// Is the segment readable
pub read_flag: bool,
/// Is the segment writeable
pub write_flag: bool,
/// Is the segment executable
pub execute_flag: bool,
}
impl MemorySegment {
/// Generate a segment from a program header of an ELF file.
pub fn from_elf_segment(binary: &[u8], program_header: &elf::ProgramHeader) -> MemorySegment {
let mut bytes: Vec<u8> = binary[program_header.file_range()].to_vec();
if program_header.vm_range().len() > program_header.file_range().len() {
// The additional memory space must be filled with null bytes.
bytes.resize(program_header.vm_range().len(), 0u8);
}
MemorySegment {
bytes,
base_address: program_header.p_vaddr,
read_flag: program_header.is_read(),
write_flag: program_header.is_write(),
execute_flag: program_header.is_executable(),
}
}
/// Generate a segment from a section table from a PE file.
pub fn from_pe_section(
binary: &[u8],
section_header: &pe::section_table::SectionTable,
) -> MemorySegment {
let mut bytes: Vec<u8> = binary[section_header.pointer_to_raw_data as usize
..(section_header.pointer_to_raw_data as usize
+ section_header.size_of_raw_data as usize)]
.to_vec();
if section_header.virtual_size > section_header.size_of_raw_data {
// The additional memory space must be filled with null bytes.
bytes.resize(section_header.virtual_size as usize, 0u8);
}
MemorySegment {
bytes,
base_address: section_header.virtual_address as u64,
read_flag: (section_header.characteristics & 0x40000000) != 0,
write_flag: (section_header.characteristics & 0x80000000) != 0,
execute_flag: (section_header.characteristics & 0x20000000) != 0,
}
}
}
impl RuntimeMemoryImage {
/// Generate a runtime memory image for a given binary.
///
/// The function can parse ELF and PE files as input.
pub fn new(binary: &[u8]) -> Result<Self, Error> {
let parsed_object = Object::parse(binary)?;
match parsed_object {
Object::Elf(elf_file) => {
let mut memory_segments = Vec::new();
for header in elf_file.program_headers.iter() {
if header.p_type == elf::program_header::PT_LOAD {
memory_segments.push(MemorySegment::from_elf_segment(binary, header));
}
}
if memory_segments.is_empty() {
return Err(anyhow!("No loadable segments found"));
}
Ok(RuntimeMemoryImage {
memory_segments,
is_little_endian: elf_file.header.endianness().unwrap().is_little(),
})
}
Object::PE(pe_file) => {
let mut memory_segments = Vec::new();
for header in pe_file.sections.iter() {
if (header.characteristics & 0x02000000) == 0 {
// Only load segments which are not discardable
memory_segments.push(MemorySegment::from_pe_section(binary, header));
}
}
if memory_segments.is_empty() {
return Err(anyhow!("No loadable segments found"));
}
let mut memory_image = RuntimeMemoryImage {
memory_segments,
is_little_endian: true,
};
memory_image.add_global_memory_offset(pe_file.image_base as u64);
Ok(memory_image)
}
_ => Err(anyhow!("Object type not supported.")),
}
}
/// Return whether values in the memory image should be interpreted in little-endian
/// or big-endian byte order.
pub fn is_little_endian_byte_order(&self) -> bool {
self.is_little_endian
}
/// Add a global offset to the base addresses of all memory segments.
/// Useful to align the addresses with those reported by Ghidra
/// if the Ghidra backend added such an offset to all addresses.
pub fn add_global_memory_offset(&mut self, offset: u64) {
for segment in self.memory_segments.iter_mut() {
segment.base_address += offset;
}
}
/// Read the contents of the memory image at the given address into a `BitvectorDomain`,
/// to emulate a read instruction to global data at runtime.
///
/// The read method is endian-aware,
/// i.e. values are interpreted with the endianness of the CPU architecture.
/// If the address points to a writeable segment, the returned value is a `Top` value,
/// since the data may change during program execution.
///
/// Returns an error if the address is not contained in the global data address range.
pub fn read(&self, address: &Bitvector, size: ByteSize) -> Result<BitvectorDomain, Error> {
let address = address.try_to_u64().unwrap();
for segment in self.memory_segments.iter() {
if address >= segment.base_address
&& address + u64::from(size) <= segment.base_address + segment.bytes.len() as u64
{
if segment.write_flag {
// The segment is writeable, thus we do not know the content at runtime.
return Ok(BitvectorDomain::new_top(size));
}
let index = (address - segment.base_address) as usize;
let mut bytes = segment.bytes[index..index + u64::from(size) as usize].to_vec();
if self.is_little_endian {
bytes = bytes.into_iter().rev().collect();
}
let mut bytes = bytes.into_iter();
let mut bitvector: BitvectorDomain =
Bitvector::from_u8(bytes.next().unwrap()).into();
for byte in bytes {
let new_byte: BitvectorDomain = Bitvector::from_u8(byte).into();
bitvector = bitvector.bin_op(BinOpType::Piece, &new_byte);
}
return Ok(bitvector);
}
}
// No segment fully contains the read.
Err(anyhow!("Address is not a valid global memory address."))
}
/// For an address to global read-only memory, return the memory segment it points to
/// and the index inside the segment, where the address points to.
///
/// Returns an error if the target memory segment is marked as writeable
/// or if the pointer does not point to global memory.
pub fn get_ro_data_pointer_at_address(
&self,
address: &Bitvector,
) -> Result<(&[u8], usize), Error> {
let address = address.try_to_u64().unwrap();
for segment in self.memory_segments.iter() {
if address >= segment.base_address
&& address < segment.base_address + segment.bytes.len() as u64
{
if segment.write_flag {
return Err(anyhow!("Target segment is writeable"));
} else {
return Ok((&segment.bytes, (address - segment.base_address) as usize));
}
}
}
Err(anyhow!("Pointer target not in global memory."))
}
}
#[cfg(test)]
pub mod tests {
use super::*;
impl RuntimeMemoryImage {
/// Create a mock runtime memory image for unit tests.
pub fn mock() -> RuntimeMemoryImage {
RuntimeMemoryImage {
memory_segments: vec![MemorySegment {
bytes: [0xb0u8, 0xb1, 0xb2, 0xb3, 0xb4].to_vec(),
base_address: 0x1000,
read_flag: true,
write_flag: false,
execute_flag: false,
}],
is_little_endian: true,
}
}
}
#[test]
fn read_endianness() {
let mut mem_image = RuntimeMemoryImage::mock();
let address = Bitvector::from_u32(0x1001);
assert_eq!(
mem_image.read(&address, ByteSize::new(4)).unwrap(),
Bitvector::from_u32(0xb4b3b2b1).into()
);
mem_image.is_little_endian = false;
assert_eq!(
mem_image.read(&address, ByteSize::new(4)).unwrap(),
Bitvector::from_u32(0xb1b2b3b4).into()
);
}
#[test]
fn ro_data_pointer() {
let mem_image = RuntimeMemoryImage::mock();
let address = Bitvector::from_u32(0x1002);
let (slice, index) = mem_image.get_ro_data_pointer_at_address(&address).unwrap();
assert_eq!(index, 2);
assert_eq!(&slice[index..], &[0xb2u8, 0xb3, 0xb4]);
}
}
pub mod binary;
pub mod graph_utils; pub mod graph_utils;
pub mod log; pub mod log;
pub mod symbol_utils; pub mod symbol_utils;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment