Unverified Commit 6b93b698 by Enkelmann Committed by GitHub

retarget nonexisting jump targets to dummy target (#103)

parent af4219f4
...@@ -135,7 +135,9 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -135,7 +135,9 @@ fn run_with_ghidra(args: CmdlineArgs) {
filter_modules_for_partial_run(&mut modules, partial_module_list); filter_modules_for_partial_run(&mut modules, partial_module_list);
} }
let project = get_project_from_ghidra(&Path::new(&args.binary.unwrap())); let mut project = get_project_from_ghidra(&Path::new(&args.binary.unwrap()));
// Normalize the project and gather log messages generated from it.
let mut all_logs = project.normalize();
// Print debug and then return. // Print debug and then return.
// Right now there is only one debug printing function. // Right now there is only one debug printing function.
...@@ -150,7 +152,6 @@ fn run_with_ghidra(args: CmdlineArgs) { ...@@ -150,7 +152,6 @@ fn run_with_ghidra(args: CmdlineArgs) {
} }
// Execute the modules and collect their logs and CWE-warnings. // Execute the modules and collect their logs and CWE-warnings.
let mut all_logs = Vec::new();
let mut all_cwes = Vec::new(); let mut all_cwes = Vec::new();
for module in modules { for module in modules {
let (mut logs, mut cwes) = (module.run)(&project, &config[&module.name]); let (mut logs, mut cwes) = (module.run)(&project, &config[&module.name]);
......
...@@ -12,13 +12,19 @@ fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarni ...@@ -12,13 +12,19 @@ fn run_pointer_inference(program_jsonbuilder_val: ocaml::Value) -> (Vec<CweWarni
serde_json::from_value(program_json).expect("Project deserialization failed"); serde_json::from_value(program_json).expect("Project deserialization failed");
project.replace_let_bindings(); project.replace_let_bindings();
let mut project: crate::intermediate_representation::Project = project.into();
let mut all_logs = project.normalize();
let config: crate::analysis::pointer_inference::Config = let config: crate::analysis::pointer_inference::Config =
serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone()) serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone())
.unwrap(); .unwrap();
let (logs, cwes) = crate::analysis::pointer_inference::run(&project.into(), config, false); let (mut logs, cwes) = crate::analysis::pointer_inference::run(&project, config, false);
all_logs.append(&mut logs);
( (
cwes, cwes,
logs.into_iter().map(|log| format! {"{}", log}).collect(), all_logs
.into_iter()
.map(|log| format! {"{}", log})
.collect(),
) )
} }
...@@ -38,10 +44,12 @@ fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value) ...@@ -38,10 +44,12 @@ fn run_pointer_inference_and_print_debug(program_jsonbuilder_val: ocaml::Value)
serde_json::from_value(program_json).expect("Project deserialization failed"); serde_json::from_value(program_json).expect("Project deserialization failed");
project.replace_let_bindings(); project.replace_let_bindings();
let mut project: crate::intermediate_representation::Project = project.into();
let _ = project.normalize();
let config: crate::analysis::pointer_inference::Config = let config: crate::analysis::pointer_inference::Config =
serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone()) serde_json::from_value(crate::utils::read_config_file("config.json")["Memory"].clone())
.unwrap(); .unwrap();
crate::analysis::pointer_inference::run(&project.into(), config, true); // Note: This discard all CweWarnings and log messages. crate::analysis::pointer_inference::run(&project, config, true); // Note: This discard all CweWarnings and log messages.
} }
caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) { caml!(rs_run_pointer_inference_and_print_debug(program_jsonbuilder_val) {
......
use super::{ByteSize, Expression, Variable}; use super::{ByteSize, Expression, Variable};
use crate::prelude::*; use crate::prelude::*;
use crate::utils::log::LogMessage;
use std::collections::HashSet;
/// A term identifier consisting of an ID string (which is required to be unique) /// A term identifier consisting of an ID string (which is required to be unique)
/// and an address to indicate where the term is located. /// and an address to indicate where the term is located.
...@@ -115,6 +117,54 @@ pub enum Jmp { ...@@ -115,6 +117,54 @@ pub enum Jmp {
}, },
} }
impl Term<Jmp> {
/// If the TID of a jump target or return target is not contained in `known_tids`
/// replace it with a dummy TID and return an error message.
fn retarget_nonexisting_jump_targets_to_dummy_tid(
&mut self,
known_tids: &HashSet<Tid>,
dummy_sub_tid: &Tid,
dummy_blk_tid: &Tid,
) -> Result<(), LogMessage> {
use Jmp::*;
match &mut self.term {
BranchInd(_) => (),
Branch(tid) | CBranch { target: tid, .. } if known_tids.get(tid).is_none() => {
let error_msg = format!("Jump target at {} does not exist", tid.address);
let error_log = LogMessage::new_error(error_msg).location(self.tid.clone());
*tid = dummy_blk_tid.clone();
return Err(error_log);
}
Call { target, return_ } if known_tids.get(target).is_none() => {
let error_msg = format!("Call target at {} does not exist", target.address);
let error_log = LogMessage::new_error(error_msg).location(self.tid.clone());
*target = dummy_sub_tid.clone();
*return_ = None;
return Err(error_log);
}
Call {
return_: Some(return_tid),
..
}
| CallInd {
return_: Some(return_tid),
..
}
| CallOther {
return_: Some(return_tid),
..
} if known_tids.get(return_tid).is_none() => {
let error_msg = format!("Return target at {} does not exist", return_tid.address);
let error_log = LogMessage::new_error(error_msg).location(self.tid.clone());
*return_tid = dummy_blk_tid.clone();
return Err(error_log);
}
_ => (),
}
Ok(())
}
}
/// A basic block is a sequence of `Def` instructions followed by up to two `Jmp` instructions. /// A basic block is a sequence of `Def` instructions followed by up to two `Jmp` instructions.
/// ///
/// The `Def` instructions represent side-effectful operations that are executed in order when the block is entered. /// The `Def` instructions represent side-effectful operations that are executed in order when the block is entered.
...@@ -313,10 +363,92 @@ impl Project { ...@@ -313,10 +363,92 @@ impl Project {
} }
} }
/// Replace jumps to nonexisting TIDs with jumps to a dummy target
/// representing an artificial sink in the control flow graph.
/// Return a log message for each replaced jump target.
///
/// Nonexisting jump targets may be generated by the Ghidra backend
/// if the data at the target address is not a valid assembly instruction.
#[must_use]
fn remove_references_to_nonexisting_tids(&mut self) -> Vec<LogMessage> {
// Gather all existing jump targets
let mut jump_target_tids = HashSet::new();
for sub in self.program.term.subs.iter() {
jump_target_tids.insert(sub.tid.clone());
for block in sub.term.blocks.iter() {
jump_target_tids.insert(block.tid.clone());
}
}
for symbol in self.program.term.extern_symbols.iter() {
jump_target_tids.insert(symbol.tid.clone());
}
// Replace all jumps to non-existing jump targets with jumps to dummy targets
let dummy_sub_tid = Tid::new("Artificial Sink Sub");
let dummy_blk_tid = Tid::new("Artificial Sink Block");
let mut log_messages = Vec::new();
for sub in self.program.term.subs.iter_mut() {
for block in sub.term.blocks.iter_mut() {
for jmp in block.term.jmps.iter_mut() {
if let Err(log_msg) = jmp.retarget_nonexisting_jump_targets_to_dummy_tid(
&jump_target_tids,
&dummy_sub_tid,
&dummy_blk_tid,
) {
log_messages.push(log_msg);
}
}
}
}
// If at least one dummy jump was inserted, add the corresponding dummy sub and block to the program.
if !log_messages.is_empty() {
let dummy_sub: Term<Sub> = Term {
tid: dummy_sub_tid,
term: Sub {
name: "Artificial Sink Sub".to_string(),
blocks: vec![Term {
tid: dummy_blk_tid,
term: Blk {
defs: Vec::new(),
jmps: Vec::new(),
},
}],
},
};
self.program.term.subs.push(dummy_sub);
}
log_messages
}
/// Run some normalization passes over the project. /// Run some normalization passes over the project.
/// ///
/// Right now this only replaces trivial expressions like `a XOR a` with their result. /// Passes:
pub fn normalize(&mut self) { /// - Replace trivial expressions like `a XOR a` with their result.
/// - Replace jumps to nonexisting TIDs with jumps to an artificial sink target in the CFG.
#[must_use]
pub fn normalize(&mut self) -> Vec<LogMessage> {
self.substitute_trivial_expressions(); self.substitute_trivial_expressions();
self.remove_references_to_nonexisting_tids()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn retarget_nonexisting_jumps() {
let mut jmp_term = Term {
tid: Tid::new("jmp"),
term: Jmp::Branch(Tid::new("nonexisting_target")),
};
assert_eq!(jmp_term.term, Jmp::Branch(Tid::new("nonexisting_target")));
assert!(jmp_term
.retarget_nonexisting_jump_targets_to_dummy_tid(
&HashSet::new(),
&Tid::new("dummy_sub"),
&Tid::new("dummy_blk")
)
.is_err());
assert_eq!(jmp_term.term, Jmp::Branch(Tid::new("dummy_blk")));
} }
} }
...@@ -412,7 +412,7 @@ impl From<Project> for IrProject { ...@@ -412,7 +412,7 @@ impl From<Project> for IrProject {
tid: project.program.tid, tid: project.program.tid,
term: project.program.term.into(), term: project.program.term.into(),
}; };
let mut ir_project = IrProject { IrProject {
program, program,
cpu_architecture: project.cpu_architecture, cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(), stack_pointer_register: project.stack_pointer_register.into(),
...@@ -421,9 +421,7 @@ impl From<Project> for IrProject { ...@@ -421,9 +421,7 @@ impl From<Project> for IrProject {
.into_iter() .into_iter()
.map(|cconv| cconv.into()) .map(|cconv| cconv.into())
.collect(), .collect(),
}; }
ir_project.normalize();
ir_project
} }
} }
......
...@@ -362,14 +362,12 @@ impl From<Project> for IrProject { ...@@ -362,14 +362,12 @@ impl From<Project> for IrProject {
return_register: project.return_registers, return_register: project.return_registers,
callee_saved_register: project.callee_saved_registers, callee_saved_register: project.callee_saved_registers,
}; };
let mut ir_project = IrProject { IrProject {
program, program,
cpu_architecture: project.cpu_architecture, cpu_architecture: project.cpu_architecture,
stack_pointer_register: project.stack_pointer_register.into(), stack_pointer_register: project.stack_pointer_register.into(),
calling_conventions: vec![default_cconv], calling_conventions: vec![default_cconv],
}; }
ir_project.normalize();
ir_project
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment