import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.HashMap; import java.util.stream.StreamSupport; import java.util.concurrent.TimeUnit; import bil.*; import term.*; import internal.*; import internal.PcodeBlockData; import internal.JumpProcessing; import internal.TermCreator; import internal.HelperFunctions; import symbol.ExternSymbol; import symbol.ExternSymbolCreator; import serializer.Serializer; import ghidra.app.script.GhidraScript; import ghidra.program.model.block.CodeBlock; import ghidra.program.model.block.CodeBlockIterator; import ghidra.program.model.block.CodeBlockReferenceIterator; import ghidra.program.model.block.SimpleBlockModel; import ghidra.program.model.lang.CompilerSpec; import ghidra.program.model.lang.PrototypeModel; import ghidra.program.model.lang.Register; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.FunctionIterator; import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.InstructionIterator; import ghidra.program.model.listing.Listing; import ghidra.program.model.listing.VariableStorage; import ghidra.program.model.pcode.PcodeOp; import ghidra.program.util.VarnodeContext; import ghidra.util.exception.CancelledException; public class PcodeExtractor extends GhidraScript { /** * * Entry point to Ghidra Script. Calls serializer after processing of Terms. */ @Override protected void run() throws Exception { HelperFunctions.monitor = getMonitor(); HelperFunctions.ghidraProgram = currentProgram; HelperFunctions.funcMan = currentProgram.getFunctionManager(); HelperFunctions.context = new VarnodeContext(currentProgram, currentProgram.getProgramContext(), currentProgram.getProgramContext()); SimpleBlockModel simpleBM = new SimpleBlockModel(currentProgram); Listing listing = currentProgram.getListing(); TermCreator.symTab = currentProgram.getSymbolTable(); ExternSymbolCreator.createExternalSymbolMap(TermCreator.symTab); setFunctionEntryPoints(); Term<Program> program = TermCreator.createProgramTerm(); Project project = createProject(program); program = iterateFunctions(simpleBM, listing, program); program.getTerm().setExternSymbols(new ArrayList<ExternSymbol>(ExternSymbolCreator.externalSymbolMap.values())); String jsonPath = getScriptArgs()[0]; Serializer ser = new Serializer(project, jsonPath); ser.serializeProject(); TimeUnit.SECONDS.sleep(3); } /** * * @param simpleBM: Simple Block Model to iterate over blocks * @param listing: Listing to get assembly instructions * @param program: program term * @return: Processed Program Term * * Iterates over functions to create sub terms and calls the block iterator to add all block terms to each subroutine. */ protected Term<Program> iterateFunctions(SimpleBlockModel simpleBM, Listing listing, Term<Program> program) { FunctionIterator functions = HelperFunctions.funcMan.getFunctions(true); for (Function func : functions) { if (!ExternSymbolCreator.externalSymbolMap.containsKey(func.getName())){ Term<Sub> currentSub = TermCreator.createSubTerm(func); currentSub.getTerm().setBlocks(iterateBlocks(currentSub, simpleBM, listing)); program.getTerm().addSub(currentSub); } } return program; } /** * * @param currentSub: Current Sub Term to processed * @param simpleBM: Simple Block Model to iterate over blocks * @param listing: Listing to get assembly instructions * @return: new ArrayList of Blk Terms * * Iterates over all blocks and calls the instruction iterator to add def and jmp terms to each block. */ protected ArrayList<Term<Blk>> iterateBlocks(Term<Sub> currentSub, SimpleBlockModel simpleBM, Listing listing) { ArrayList<Term<Blk>> blockTerms = new ArrayList<Term<Blk>>(); try { CodeBlockIterator blockIter = simpleBM.getCodeBlocksContaining(currentSub.getTerm().getAddresses(), getMonitor()); while(blockIter.hasNext()) { CodeBlock currentBlock = blockIter.next(); ArrayList<Term<Blk>> newBlockTerms = iterateInstructions(TermCreator.createBlkTerm(currentBlock.getFirstStartAddress().toString(), null), listing, currentBlock); Term<Blk> lastBlockTerm = newBlockTerms.get(newBlockTerms.size() - 1); JumpProcessing.handlePossibleDefinitionAtEndOfBlock(lastBlockTerm, currentBlock); blockTerms.addAll(newBlockTerms); } } catch (CancelledException e) { System.out.printf("Could not retrieve all basic blocks comprised by function: %s\n", currentSub.getTerm().getName()); } return blockTerms; } /** * * @param block: Blk Term to be filled with instructions * @param listing: Assembly instructions * @param codeBlock: codeBlock for retrieving instructions * @return: new array of Blk Terms * * Iterates over assembly instructions and processes each of the pcode blocks. * Handles empty block by adding a jump Term with fallthrough address */ protected ArrayList<Term<Blk>> iterateInstructions(Term<Blk> block, Listing listing, CodeBlock codeBlock) { PcodeBlockData.instructionIndex = 0; InstructionIterator instructions = listing.getInstructions(codeBlock, true); PcodeBlockData.numberOfInstructionsInBlock = StreamSupport.stream(listing.getInstructions(codeBlock, true).spliterator(), false).count(); PcodeBlockData.blocks = new ArrayList<Term<Blk>>(); PcodeBlockData.blocks.add(block); for (Instruction instr : instructions) { PcodeBlockData.instruction = instr; analysePcodeBlockOfAssemblyInstruction(); PcodeBlockData.instructionIndex++; } if (PcodeBlockData.blocks.get(0).getTerm().getDefs().isEmpty() && PcodeBlockData.blocks.get(0).getTerm().getJmps().isEmpty()) { handleEmptyBlock(codeBlock); } return PcodeBlockData.blocks; } /** * * @param codeBlock: Current empty block * @return New jmp term containing fall through address * * Adds fallthrough address jump to empty block if available */ protected void handleEmptyBlock(CodeBlock codeBlock) { try { CodeBlockReferenceIterator destinations = codeBlock.getDestinations(getMonitor()); if(destinations.hasNext()) { Tid jmpTid = new Tid(String.format("instr_%s_%s", codeBlock.getFirstStartAddress().toString(), 0), codeBlock.getFirstStartAddress().toString()); Tid gotoTid = new Tid(); String destAddr = destinations.next().getDestinationBlock().getFirstStartAddress().toString(); gotoTid.setId(String.format("blk_%s", destAddr)); gotoTid.setAddress(destAddr); PcodeBlockData.blocks.get(0).getTerm().addJmp(new Term<Jmp>(jmpTid, new Jmp(ExecutionType.JmpType.GOTO, "BRANCH", new Label((Tid) gotoTid), 0))); } } catch (CancelledException e) { System.out.printf("Could not retrieve destinations for block at: %s\n", codeBlock.getFirstStartAddress().toString()); } } /** * * Checks whether the assembly instruction is a nop instruction and adds a jump to the block. * Checks whether a jump occured within a ghidra generated pcode block and fixes the control flow * by adding missing jumps between artificially generated blocks. * Checks whether an instruction is in a delay slot and, if so, ignores it * as Ghidra already includes the instruction before the jump */ protected void analysePcodeBlockOfAssemblyInstruction() { PcodeBlockData.ops = PcodeBlockData.instruction.getPcode(true); if(PcodeBlockData.instruction.isInDelaySlot()) { return; } if(PcodeBlockData.ops.length == 0) { JumpProcessing.addBranchToCurrentBlock(PcodeBlockData.blocks.get(PcodeBlockData.blocks.size()-1).getTerm(), PcodeBlockData.instruction.getAddress().toString(), PcodeBlockData.instruction.getFallThrough().toString()); if(PcodeBlockData.instructionIndex < PcodeBlockData.numberOfInstructionsInBlock - 1) { PcodeBlockData.blocks.add(TermCreator.createBlkTerm(PcodeBlockData.instruction.getFallThrough().toString(), null)); } return; } PcodeBlockData.temporaryDefStorage = new ArrayList<Term<Def>>(); Boolean intraInstructionJumpOccured = iteratePcode(); JumpProcessing.fixControlFlowWhenIntraInstructionJumpOccured(intraInstructionJumpOccured); if(!PcodeBlockData.temporaryDefStorage.isEmpty()) { PcodeBlockData.blocks.get(PcodeBlockData.blocks.size() - 1).getTerm().addMultipleDefs(PcodeBlockData.temporaryDefStorage); } } /** * * @return: indicator if jump occured within pcode block * * Iterates over the Pcode instructions of the current assembly instruction. */ protected Boolean iteratePcode() { int numberOfPcodeOps = PcodeBlockData.ops.length; Boolean intraInstructionJumpOccured = false; PcodeBlockData.pcodeIndex = 0; for(PcodeOp op : PcodeBlockData.ops) { PcodeBlockData.pcodeOp = op; String mnemonic = PcodeBlockData.pcodeOp.getMnemonic(); if (JumpProcessing.jumps.contains(mnemonic) || PcodeBlockData.pcodeOp.getOpcode() == PcodeOp.UNIMPLEMENTED) { intraInstructionJumpOccured = JumpProcessing.processJump(mnemonic, numberOfPcodeOps); } else { PcodeBlockData.temporaryDefStorage.add(TermCreator.createDefTerm()); } PcodeBlockData.pcodeIndex++; } return intraInstructionJumpOccured; } /** * @param program: program term * @return: new Project * * Creates the project object and adds the stack pointer register and program term. */ protected Project createProject(Term<Program> program) { Project project = new Project(); CompilerSpec comSpec = currentProgram.getCompilerSpec(); Register stackPointerRegister = comSpec.getStackPointer(); int stackPointerByteSize = (int) stackPointerRegister.getBitLength() / 8; Variable stackPointerVar = new Variable(stackPointerRegister.getName(), stackPointerByteSize, false); project.setProgram(program); project.setStackPointerRegister(stackPointerVar); project.setCpuArch(HelperFunctions.getCpuArchitecture()); try { HashMap<String, RegisterConvention> conventions = new HashMap<String, RegisterConvention>(); ParseCspecContent.parseSpecs(currentProgram, conventions); addParameterRegister(conventions); project.setRegisterConvention(new ArrayList<RegisterConvention>(conventions.values())); } catch (FileNotFoundException e) { System.out.println(e); } project.setRegisterProperties(HelperFunctions.getRegisterList()); return project; } /** * Adds parameter register to the RegisterCallingConvention object */ protected void addParameterRegister(HashMap<String, RegisterConvention> conventions) { PrototypeModel[] models = currentProgram.getCompilerSpec().getCallingConventions(); for(PrototypeModel model : models) { String cconv = model.getName(); if(conventions.get(cconv) != null) { ArrayList<String> parameters = conventions.get(cconv).getParameter(); for(VariableStorage storage : model.getPotentialInputRegisterStorage(currentProgram)) { parameters.add(storage.getRegister().getName()); } } } } /** * Adds all entry points of internal and external function to a global hash map * This will later speed up the cast of indirect Calls. */ protected void setFunctionEntryPoints() { // Add internal function addresses for(Function func : HelperFunctions.funcMan.getFunctions(true)) { String address = func.getEntryPoint().toString(); HelperFunctions.functionEntryPoints.put(address, new Tid(String.format("sub_%s", address), address)); } // Add thunk addresses for external functions for(ExternSymbol sym : ExternSymbolCreator.externalSymbolMap.values()){ for(String address : sym.getAddresses()) { HelperFunctions.functionEntryPoints.put(address, sym.getTid()); } } } }