Unverified Commit b20691f2 by Enkelmann Committed by GitHub

parse indirect jump targets from Ghidra (#151)

parent 03973e80
...@@ -71,6 +71,7 @@ fn mock_program() -> Term<Program> { ...@@ -71,6 +71,7 @@ fn mock_program() -> Term<Program> {
term: Blk { term: Blk {
defs: vec![def_term1], defs: vec![def_term1],
jmps: vec![call_term], jmps: vec![call_term],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub1_blk2 = Term { let sub1_blk2 = Term {
...@@ -78,6 +79,7 @@ fn mock_program() -> Term<Program> { ...@@ -78,6 +79,7 @@ fn mock_program() -> Term<Program> {
term: Blk { term: Blk {
defs: vec![def_term5], defs: vec![def_term5],
jmps: vec![jmp_term], jmps: vec![jmp_term],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub1 = Term { let sub1 = Term {
...@@ -104,6 +106,7 @@ fn mock_program() -> Term<Program> { ...@@ -104,6 +106,7 @@ fn mock_program() -> Term<Program> {
term: Blk { term: Blk {
defs: vec![def_term2, def_term3], defs: vec![def_term2, def_term3],
jmps: vec![cond_jump_term, jump_term_2], jmps: vec![cond_jump_term, jump_term_2],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub2_blk2 = Term { let sub2_blk2 = Term {
...@@ -111,6 +114,7 @@ fn mock_program() -> Term<Program> { ...@@ -111,6 +114,7 @@ fn mock_program() -> Term<Program> {
term: Blk { term: Blk {
defs: vec![def_term4], defs: vec![def_term4],
jmps: vec![return_term], jmps: vec![return_term],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub2 = Term { let sub2 = Term {
......
...@@ -203,6 +203,54 @@ impl<'a> GraphBuilder<'a> { ...@@ -203,6 +203,54 @@ impl<'a> GraphBuilder<'a> {
} }
} }
/// Add an intraprocedural jump edge from the `source` node to the `target_tid`.
/// If no node corresponding to the `target_tid` exists,
/// new nodes corresponding to the (target block, current sub) combination will be created.
fn add_intraprocedural_edge(
&mut self,
source: NodeIndex,
target_tid: &Tid,
jump: &'a Term<Jmp>,
untaken_conditional: Option<&'a Term<Jmp>>,
) {
let sub_term = match self.graph[source] {
Node::BlkEnd(_, sub_term) => sub_term,
_ => panic!(),
};
if let Some((target_node, _)) = self
.jump_targets
.get(&(target_tid.clone(), sub_term.tid.clone()))
{
self.graph
.add_edge(source, *target_node, Edge::Jump(jump, untaken_conditional));
} else {
let target_block = self.program.term.find_block(target_tid).unwrap();
let (target_node, _) = self.add_block(target_block, sub_term);
self.graph
.add_edge(source, target_node, Edge::Jump(jump, untaken_conditional));
}
}
/// Read in target hints for indirect intraprocedural jumps from the source block
/// and add intraprocedural jump edges for them to the graph.
///
/// The function assumes (but does not check) that the `jump` is an intraprocedural indirect jump.
fn add_indirect_jumps(
&mut self,
source: NodeIndex,
jump: &'a Term<Jmp>,
untaken_conditional: Option<&'a Term<Jmp>>,
) {
let source_block = match self.graph[source] {
Node::BlkEnd(source_block, _) => source_block,
_ => panic!(),
};
for target_address in source_block.term.indirect_jmp_targets.iter() {
let target_tid = Tid::blk_id_at_address(target_address);
self.add_intraprocedural_edge(source, &target_tid, jump, untaken_conditional);
}
}
/// add call edges and interprocedural jump edges for a specific jump term to the graph /// add call edges and interprocedural jump edges for a specific jump term to the graph
fn add_jump_edge( fn add_jump_edge(
&mut self, &mut self,
...@@ -220,22 +268,11 @@ impl<'a> GraphBuilder<'a> { ...@@ -220,22 +268,11 @@ impl<'a> GraphBuilder<'a> {
target: tid, target: tid,
condition: _, condition: _,
} => { } => {
if let Some((target_node, _)) = self.add_intraprocedural_edge(source, tid, jump, untaken_conditional);
self.jump_targets.get(&(tid.clone(), sub_term.tid.clone())) }
{ Jmp::BranchInd(_) => {
self.graph.add_edge( self.add_indirect_jumps(source, jump, untaken_conditional);
source,
*target_node,
Edge::Jump(jump, untaken_conditional),
);
} else {
let target_block = self.program.term.find_block(tid).unwrap();
let (target_node, _) = self.add_block(target_block, sub_term);
self.graph
.add_edge(source, target_node, Edge::Jump(jump, untaken_conditional));
}
} }
Jmp::BranchInd(_) => (), // TODO: add handling of indirect edges!
Jmp::Call { target, return_ } => { Jmp::Call { target, return_ } => {
// first make sure that the return block exists // first make sure that the return block exists
let return_to_node_option = if let Some(return_tid) = return_ { let return_to_node_option = if let Some(return_tid) = return_ {
...@@ -445,6 +482,7 @@ mod tests { ...@@ -445,6 +482,7 @@ mod tests {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: vec![call_term], jmps: vec![call_term],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub1_blk2 = Term { let sub1_blk2 = Term {
...@@ -452,6 +490,7 @@ mod tests { ...@@ -452,6 +490,7 @@ mod tests {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: vec![jmp_term], jmps: vec![jmp_term],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub1 = Term { let sub1 = Term {
...@@ -478,6 +517,7 @@ mod tests { ...@@ -478,6 +517,7 @@ mod tests {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: vec![cond_jump_term, jump_term_2], jmps: vec![cond_jump_term, jump_term_2],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub2_blk2 = Term { let sub2_blk2 = Term {
...@@ -485,6 +525,7 @@ mod tests { ...@@ -485,6 +525,7 @@ mod tests {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: vec![return_term], jmps: vec![return_term],
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub2 = Term { let sub2 = Term {
...@@ -514,4 +555,38 @@ mod tests { ...@@ -514,4 +555,38 @@ mod tests {
assert_eq!(graph.node_count(), 16); assert_eq!(graph.node_count(), 16);
assert_eq!(graph.edge_count(), 20); assert_eq!(graph.edge_count(), 20);
} }
#[test]
fn add_indirect_jumps() {
let indirect_jmp_term = Term {
tid: Tid::new("indrect_jmp".to_string()),
term: Jmp::BranchInd(Expression::Const(Bitvector::from_u32(0x1000))), // At the moment the expression does not matter
};
let mut blk_tid = Tid::new("blk_00001000");
blk_tid.address = "00001000".to_string();
let blk_term = Term {
tid: blk_tid,
term: Blk {
defs: Vec::new(),
jmps: vec![indirect_jmp_term],
indirect_jmp_targets: vec!["00001000".to_string()],
},
};
let sub_term = Term {
tid: Tid::new("sub"),
term: Sub {
name: "sub".to_string(),
blocks: vec![blk_term],
},
};
let mut program = Program::mock_empty();
program.subs.push(sub_term);
let program_term = Term {
tid: Tid::new("program".to_string()),
term: program,
};
let graph = get_program_cfg(&program_term, HashSet::new());
assert_eq!(graph.node_count(), 2);
assert_eq!(graph.edge_count(), 2);
}
} }
...@@ -139,6 +139,7 @@ fn context_problem_implementation() { ...@@ -139,6 +139,7 @@ fn context_problem_implementation() {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: Vec::new(), jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
}, },
}; };
let sub = Term { let sub = Term {
......
...@@ -14,6 +14,7 @@ fn mock_block(tid: &str) -> Term<Blk> { ...@@ -14,6 +14,7 @@ fn mock_block(tid: &str) -> Term<Blk> {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: Vec::new(), jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
}, },
} }
} }
......
...@@ -32,6 +32,18 @@ impl Tid { ...@@ -32,6 +32,18 @@ impl Tid {
address: self.address, address: self.address,
} }
} }
/// Generate the ID of a block starting at the given address.
///
/// Note that the block may not actually exist.
/// For cases where one assembly instruction generates more than one block,
/// the returned block ID is the one that would be executed first if a jump to the given address happened.
pub fn blk_id_at_address(address: &str) -> Tid {
Tid {
id: format!("blk_{}", address),
address: address.to_string(),
}
}
} }
impl std::fmt::Display for Tid { impl std::fmt::Display for Tid {
...@@ -212,6 +224,10 @@ impl Term<Jmp> { ...@@ -212,6 +224,10 @@ impl Term<Jmp> {
/// - For two jumps, the first one has to be a conditional jump, /// - For two jumps, the first one has to be a conditional jump,
/// where the second unconditional jump is only taken if the condition of the first jump evaluates to false. /// where the second unconditional jump is only taken if the condition of the first jump evaluates to false.
/// ///
/// If one of the `Jmp` instructions is an indirect jump,
/// then the `indirect_jmp_targets` is a list of possible jump target addresses for that jump.
/// The list may not be complete and the entries are not guaranteed to be correct.
///
/// Basic blocks are *single entry, single exit*, i.e. a basic block is only entered at the beginning /// Basic blocks are *single entry, single exit*, i.e. a basic block is only entered at the beginning
/// and is only exited by the jump instructions at the end of the block. /// and is only exited by the jump instructions at the end of the block.
/// If a new control flow edge is discovered that would jump to the middle of a basic block, /// If a new control flow edge is discovered that would jump to the middle of a basic block,
...@@ -220,6 +236,41 @@ impl Term<Jmp> { ...@@ -220,6 +236,41 @@ impl Term<Jmp> {
pub struct Blk { pub struct Blk {
pub defs: Vec<Term<Def>>, pub defs: Vec<Term<Def>>,
pub jmps: Vec<Term<Jmp>>, pub jmps: Vec<Term<Jmp>>,
pub indirect_jmp_targets: Vec<String>,
}
impl Term<Blk> {
/// Remove indirect jump target addresses for which no corresponding target block exists.
/// Return an error message for each removed address.
pub fn remove_nonexisting_indirect_jump_targets(
&mut self,
known_block_tids: &HashSet<Tid>,
) -> Result<(), Vec<LogMessage>> {
let mut logs = Vec::new();
self.term.indirect_jmp_targets = self
.term
.indirect_jmp_targets
.iter()
.filter_map(|target_address| {
if known_block_tids
.get(&Tid::blk_id_at_address(&target_address))
.is_some()
{
Some(target_address.to_string())
} else {
let error_msg =
format!("Indirect jump target at {} does not exist", target_address);
logs.push(LogMessage::new_error(error_msg).location(self.tid.clone()));
None
}
})
.collect();
if logs.is_empty() {
Ok(())
} else {
Err(logs)
}
}
} }
/// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it. /// A `Sub` or subroutine represents a function with a given name and a list of basic blocks belonging to it.
...@@ -436,6 +487,11 @@ impl Project { ...@@ -436,6 +487,11 @@ impl Project {
let mut log_messages = Vec::new(); let mut log_messages = Vec::new();
for sub in self.program.term.subs.iter_mut() { for sub in self.program.term.subs.iter_mut() {
for block in sub.term.blocks.iter_mut() { for block in sub.term.blocks.iter_mut() {
if let Err(mut logs) =
block.remove_nonexisting_indirect_jump_targets(&jump_target_tids)
{
log_messages.append(&mut logs);
}
for jmp in block.term.jmps.iter_mut() { for jmp in block.term.jmps.iter_mut() {
if let Err(log_msg) = jmp.retarget_nonexisting_jump_targets_to_dummy_tid( if let Err(log_msg) = jmp.retarget_nonexisting_jump_targets_to_dummy_tid(
&jump_target_tids, &jump_target_tids,
...@@ -458,6 +514,7 @@ impl Project { ...@@ -458,6 +514,7 @@ impl Project {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: Vec::new(), jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
}, },
}], }],
}, },
...@@ -492,6 +549,7 @@ mod tests { ...@@ -492,6 +549,7 @@ mod tests {
term: Blk { term: Blk {
defs: Vec::new(), defs: Vec::new(),
jmps: Vec::new(), jmps: Vec::new(),
indirect_jmp_targets: Vec::new(),
}, },
} }
} }
......
...@@ -30,6 +30,7 @@ pub struct Jmp { ...@@ -30,6 +30,7 @@ pub struct Jmp {
pub goto: Option<Label>, pub goto: Option<Label>,
pub call: Option<Call>, pub call: Option<Call>,
pub condition: Option<Variable>, pub condition: Option<Variable>,
pub target_hints: Option<Vec<String>>,
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)] #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone, Copy)]
...@@ -199,6 +200,11 @@ impl From<Blk> for IrBlk { ...@@ -199,6 +200,11 @@ impl From<Blk> for IrBlk {
term: def_term.term.into(), term: def_term.term.into(),
}) })
.collect(); .collect();
let indirect_jmp_targets = blk
.jmps
.iter()
.find_map(|jmp_term| jmp_term.term.target_hints.clone())
.unwrap_or_default();
let jmps: Vec<Term<IrJmp>> = blk let jmps: Vec<Term<IrJmp>> = blk
.jmps .jmps
.into_iter() .into_iter()
...@@ -207,7 +213,11 @@ impl From<Blk> for IrBlk { ...@@ -207,7 +213,11 @@ impl From<Blk> for IrBlk {
term: jmp_term.term.into(), term: jmp_term.term.into(),
}) })
.collect(); .collect();
IrBlk { defs, jmps } IrBlk {
defs,
jmps,
indirect_jmp_targets,
}
} }
} }
......
...@@ -8,6 +8,8 @@ import ghidra.program.model.address.Address; ...@@ -8,6 +8,8 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Function; import ghidra.program.model.listing.Function;
import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode; import ghidra.program.model.pcode.Varnode;
import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.RefType;
import ghidra.program.model.symbol.SymbolTable; import ghidra.program.model.symbol.SymbolTable;
import symbol.ExternSymbolCreator; import symbol.ExternSymbolCreator;
import term.*; import term.*;
...@@ -91,9 +93,12 @@ public class TermCreator { ...@@ -91,9 +93,12 @@ public class TermCreator {
case PcodeOp.CBRANCH: case PcodeOp.CBRANCH:
return handleConditionalBranches(jmpTid, intraJump); return handleConditionalBranches(jmpTid, intraJump);
case PcodeOp.BRANCH: case PcodeOp.BRANCH:
case PcodeOp.BRANCHIND:
jumps.add(new Term<Jmp>(jmpTid, new Jmp(ExecutionType.JmpType.GOTO, mnemonic, createLabel(null), PcodeBlockData.pcodeIndex))); jumps.add(new Term<Jmp>(jmpTid, new Jmp(ExecutionType.JmpType.GOTO, mnemonic, createLabel(null), PcodeBlockData.pcodeIndex)));
break; break;
case PcodeOp.BRANCHIND:
Jmp jump = createIndirectJump(mnemonic);
jumps.add(new Term<Jmp>(jmpTid, jump));
break;
case PcodeOp.RETURN: case PcodeOp.RETURN:
jumps.add(new Term<Jmp>(jmpTid, new Jmp(ExecutionType.JmpType.RETURN, mnemonic, createLabel(null), PcodeBlockData.pcodeIndex))); jumps.add(new Term<Jmp>(jmpTid, new Jmp(ExecutionType.JmpType.RETURN, mnemonic, createLabel(null), PcodeBlockData.pcodeIndex)));
break; break;
...@@ -102,6 +107,26 @@ public class TermCreator { ...@@ -102,6 +107,26 @@ public class TermCreator {
return jumps; return jumps;
} }
/**
*
* @param mnemonic: The Mnemonic of the jump term
* @return: The created indirect jump term
*
* Creates a jump term for an indirect jump and adds jump targets as computed by Ghidra to it as target hints.
*/
public static Jmp createIndirectJump(String mnemonic) {
Jmp jump = new Jmp(ExecutionType.JmpType.GOTO, mnemonic, createLabel(null), PcodeBlockData.pcodeIndex);
ArrayList<String> target_hints = new ArrayList<String>();
for(Reference reference: PcodeBlockData.instruction.getReferencesFrom()) {
if(reference.getReferenceType() == RefType.COMPUTED_JUMP || reference.getReferenceType() == RefType.CONDITIONAL_COMPUTED_JUMP) {
target_hints.add(reference.getToAddress().toString());
}
}
jump.setTargetHints(target_hints);
return jump;
}
/** /**
* *
......
...@@ -3,6 +3,7 @@ package term; ...@@ -3,6 +3,7 @@ package term;
import bil.ExecutionType; import bil.ExecutionType;
import bil.Variable; import bil.Variable;
import java.util.ArrayList;
import com.google.gson.annotations.SerializedName; import com.google.gson.annotations.SerializedName;
public class Jmp { public class Jmp {
...@@ -19,6 +20,8 @@ public class Jmp { ...@@ -19,6 +20,8 @@ public class Jmp {
private Variable condition; private Variable condition;
@SerializedName("pcode_index") @SerializedName("pcode_index")
private int pcodeIndex; private int pcodeIndex;
@SerializedName("target_hints")
private ArrayList<String> targetHints;
public Jmp() { public Jmp() {
} }
...@@ -93,4 +96,11 @@ public class Jmp { ...@@ -93,4 +96,11 @@ public class Jmp {
this.pcodeIndex = pcodeIndex; this.pcodeIndex = pcodeIndex;
} }
public ArrayList<String> getTargetHints() {
return targetHints;
}
public void setTargetHints(ArrayList<String> targetHints) {
this.targetHints = targetHints;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment