Commit c3ebc2bd by devttys0

Improved opcode signature searches; consolidated filtering code to configuration module.

parent 89c5c63d
...@@ -7,8 +7,9 @@ class Display(object): ...@@ -7,8 +7,9 @@ class Display(object):
HEADER_WIDTH = 150 HEADER_WIDTH = 150
DEFAULT_FORMAT = "%s\n" DEFAULT_FORMAT = "%s\n"
def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False): def __init__(self, quiet=False, verbose=False, log=None, csv=False, fit_to_screen=False, filter=None):
self.quiet = quiet self.quiet = quiet
self.filter = filter
self.verbose = verbose self.verbose = verbose
self.fit_to_screen = fit_to_screen self.fit_to_screen = fit_to_screen
self.fp = None self.fp = None
...@@ -61,6 +62,7 @@ class Display(object): ...@@ -61,6 +62,7 @@ class Display(object):
def _fprint(self, fmt, columns, csv=True): def _fprint(self, fmt, columns, csv=True):
if not self.quiet: if not self.quiet:
line = fmt % tuple(columns) line = fmt % tuple(columns)
if filter and self.filter.valid_result(line):
sys.stdout.write(self._format_line(line.strip()) + "\n") sys.stdout.write(self._format_line(line.strip()) + "\n")
if self.fp and not (self.csv and not csv): if self.fp and not (self.csv and not csv):
......
...@@ -3,9 +3,9 @@ import binwalk.core.common as common ...@@ -3,9 +3,9 @@ import binwalk.core.common as common
from binwalk.core.smart import SmartSignature from binwalk.core.smart import SmartSignature
from binwalk.core.compat import * from binwalk.core.compat import *
class MagicFilter: class Filter:
''' '''
Class to filter libmagic results based on include/exclude rules and false positive detection. Class to filter results based on include/exclude rules and false positive detection.
An instance of this class is available via the Binwalk.filter object. An instance of this class is available via the Binwalk.filter object.
Note that all filter strings should be in lower case. Note that all filter strings should be in lower case.
''' '''
...@@ -112,16 +112,15 @@ class MagicFilter: ...@@ -112,16 +112,15 @@ class MagicFilter:
return self.FILTER_INCLUDE return self.FILTER_INCLUDE
def valid_magic_result(self, data): def valid_result(self, data):
''' '''
Checks if the given string contains invalid data. Checks if the given string contains invalid data.
Called internally by Binwalk.scan().
@data - String to validate. @data - String to validate.
Returns True if data is valid, False if invalid. Returns True if data is valid, False if invalid.
''' '''
# A result of 'data' is never ever valid. # A result of 'data' is never ever valid (for libmagic results)
if data == self.DATA_RESULT: if data == self.DATA_RESULT:
return False return False
......
...@@ -27,6 +27,7 @@ class Settings: ...@@ -27,6 +27,7 @@ class Settings:
PLUGINS = "plugins" PLUGINS = "plugins"
EXTRACT_FILE = "extract.conf" EXTRACT_FILE = "extract.conf"
BINWALK_MAGIC_FILE = "binwalk" BINWALK_MAGIC_FILE = "binwalk"
BINARCH_MAGIC_FILE = "binarch"
def __init__(self): def __init__(self):
''' '''
...@@ -45,11 +46,13 @@ class Settings: ...@@ -45,11 +46,13 @@ class Settings:
# Build the paths to all user-specific files # Build the paths to all user-specific files
self.paths['user'][self.BINWALK_MAGIC_FILE] = self._user_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE) self.paths['user'][self.BINWALK_MAGIC_FILE] = self._user_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['user'][self.BINARCH_MAGIC_FILE] = self._user_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['user'][self.EXTRACT_FILE] = self._user_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE) self.paths['user'][self.EXTRACT_FILE] = self._user_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
self.paths['user'][self.PLUGINS] = self._user_path(self.BINWALK_PLUGINS_DIR) self.paths['user'][self.PLUGINS] = self._user_path(self.BINWALK_PLUGINS_DIR)
# Build the paths to all system-wide files # Build the paths to all system-wide files
self.paths['system'][self.BINWALK_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE) self.paths['system'][self.BINWALK_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['system'][self.BINARCH_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['system'][self.EXTRACT_FILE] = self._system_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE) self.paths['system'][self.EXTRACT_FILE] = self._system_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
self.paths['system'][self.PLUGINS] = self._system_path(self.BINWALK_PLUGINS_DIR) self.paths['system'][self.PLUGINS] = self._system_path(self.BINWALK_PLUGINS_DIR)
......
...@@ -161,7 +161,7 @@ class SmartSignature: ...@@ -161,7 +161,7 @@ class SmartSignature:
Returns True if the string result is one of many. Returns True if the string result is one of many.
Returns False if the string result is not one of many. Returns False if the string result is not one of many.
''' '''
if self.filter.valid_magic_result(data): if self.filter.valid_result(data):
if self.last_one_of_many is not None and data.startswith(self.last_one_of_many): if self.last_one_of_many is not None and data.startswith(self.last_one_of_many):
return True return True
......
# MIPS prologue
# addiu $sp, -XX
# sw XX, XX($sp)
# 27 BD FF XX
# AF BX XX XX
0 string \xFF\xBD\x27 MIPSEL instructions, function prologue{offset-adjust:-1}
>6 byte !0xAF (invalid)
>5 byte&0xE0 !0xA0 (invalid)
0 string \x27\xBD\xFF MIPS instructions, function prologue
>4 byte !0xAF (invalid)
>5 byte&0xE0 !0xA0 (invalid)
# MIPS epilogue
# jr $ra
# addiu $sp, XX
#
# addiu $sp, XX
# jr $ra
0 belong 0x03e00008 MIPS instructions, function epilogue
>4 beshort !0x27BD (invalid)
0 beshort 0x27BD MIPS instructions, function epilogue
>2 belong !0x03e00008 (invalid)
0 lelong 0x03e00008 MIPSEL instructions, function epilogue
>6 leshort !0x27BD (invalid)
0 leshort 0x27BD MIPS instructions, function epilogue
>2 lelong !0x03e00008 (invalid)
# PowerPC prologue
# mflr r0
0 belong 0x7C0802A6 PowerPC big endian instructions, function prologue
0 lelong 0x7C0802A6 PowerPC little endian instructions, funciton prologue
# PowerPC epilogue
# blr
0 belong 0x4E800020 PowerPC big endian instructions, function epilogue
0 lelong 0x4E800020 PowerPC little endian instructions, function epilogue
# ARM prologue
# STMFD SP!, {XX}
# <any instruction whose opcode begins with 0xE>
0 beshort 0xE92D ARMEB instructions, function prologue
>5 byte&0xF0 !0xE0 (invalid)
>9 byte&0xF0 !0xE0 (invalid)
0 leshort 0xE92D ARM instructions, function prologue{offset-adjust:-2}
>5 byte&0xF0 !0xE0 (invalid)
>9 byte&0xF0 !0xE0 (invalid)
# ARM epilogue
# MOV R0, XX
# LDMFD SP!, {XX}
0 beshort 0xE1A0 ARMEB instructions, function epilogue
>4 beshort !0xE8BD (invalid)
0 leshort 0xE1A0 ARM instructions, function epilogue{offset-adjust:-2}
>4 leshort !0xE8BD (invalid)
# Ubicom32 prologue
# move.4 -4($sp)++, $ra
0 belong 0x02FF6125 Ubicom32 instructions, function prologue
# Ubicom32 epilogues
# calli $ra, 0($ra)
# ret ($sp)4++
0 belong 0xF0A000A0 Ubicom32 instructions, function epilogue
0 belong 0x000022E1 Ubicom32 instructions, function epilogue
# AVR8 prologue
# push r28
# push r29
0 belong 0x93CF93DF AVR8 instructions, function prologue
0 belong 0x93DF93CF AVR8 instructions, function prologue
# AVR32 prologue
# pushm r7,lr
# mov r7,sp
0 string \xEB\xCD\x40\x80\x1A\x97 AVR32 instructions, function prologue
# SPARC eiplogue
# ret
# restore XX
0 string \x81\xC7\xE0\x08\x81\xE8 SPARC instructions, function epilogue
# x86 epilogue
# push ebp
# move ebp, esp
0 string \x55\x89\xE5 Intel x86 instructions, function epilogue
...@@ -5,4 +5,3 @@ from binwalk.modules.hashmatch import HashMatch ...@@ -5,4 +5,3 @@ from binwalk.modules.hashmatch import HashMatch
from binwalk.modules.configuration import Configuration from binwalk.modules.configuration import Configuration
from binwalk.modules.extractor import Extractor from binwalk.modules.extractor import Extractor
from binwalk.modules.entropy import Entropy from binwalk.modules.entropy import Entropy
from binwalk.modules.opcodes import OpcodeValidator
import os import os
import sys import sys
import argparse import argparse
import binwalk.core.filter
import binwalk.core.common import binwalk.core.common
import binwalk.core.display import binwalk.core.display
import binwalk.core.settings import binwalk.core.settings
...@@ -34,6 +35,22 @@ class Configuration(Module): ...@@ -34,6 +35,22 @@ class Configuration(Module):
type=int, type=int,
kwargs={'swap_size' : 0}, kwargs={'swap_size' : 0},
description='Reverse every n bytes before scanning'), description='Reverse every n bytes before scanning'),
Option(short='I',
long='show-invalid',
kwargs={'show_invalid' : True},
description='Show results marked as invalid'),
Option(short='x',
long='exclude',
kwargs={'exclude_filters' : []},
type=list,
dtype=str.__name__,
description='Exclude results that match <str>'),
Option(short='y',
long='include',
kwargs={'include_filters' : []},
type=list,
dtype=str.__name__,
description='Only show results that match <str>'),
Option(long='log', Option(long='log',
short='f', short='f',
type=argparse.FileType, type=argparse.FileType,
...@@ -70,6 +87,9 @@ class Configuration(Module): ...@@ -70,6 +87,9 @@ class Configuration(Module):
Kwarg(name='offset', default=0), Kwarg(name='offset', default=0),
Kwarg(name='block', default=0), Kwarg(name='block', default=0),
Kwarg(name='swap_size', default=0), Kwarg(name='swap_size', default=0),
Kwarg(name='show_invalid', default=False),
Kwarg(name='include_filters', default=[]),
Kwarg(name='exclude_filters', default=[]),
Kwarg(name='log_file', default=None), Kwarg(name='log_file', default=None),
Kwarg(name='csv', default=False), Kwarg(name='csv', default=False),
Kwarg(name='format_to_terminal', default=False), Kwarg(name='format_to_terminal', default=False),
...@@ -86,11 +106,20 @@ class Configuration(Module): ...@@ -86,11 +106,20 @@ class Configuration(Module):
self._open_target_files() self._open_target_files()
self._set_verbosity() self._set_verbosity()
self.filter = binwalk.core.filter.Filter(self.show_invalid)
# Set any specified include/exclude filters
for regex in self.exclude_filters:
self.filter.exclude(regex)
for regex in self.include_filters:
self.filter.include(regex)
self.settings = binwalk.core.settings.Settings() self.settings = binwalk.core.settings.Settings()
self.display = binwalk.core.display.Display(log=self.log_file, self.display = binwalk.core.display.Display(log=self.log_file,
csv=self.csv, csv=self.csv,
quiet=self.quiet, quiet=self.quiet,
verbose=self.verbose, verbose=self.verbose,
filter=self.filter,
fit_to_screen=self.format_to_terminal) fit_to_screen=self.format_to_terminal)
if self.show_help: if self.show_help:
......
import sys
import inspect
import binwalk.core.common
from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg
class Operand(object):
def __init__(self, **kwargs):
self.valid = False
self.value = None
self.mnem = None
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class Instruction(object):
BIG = 'big'
LITTLE = 'little'
def __init__(self, **kwargs):
self.valid = False
self.opcode = None
self.mnem = None
self.endianess = None
self.operands = []
self.size = 0
for (k, v) in iterator(kwargs):
setattr(self, k, v)
class Disassembler(object):
MIN_INSTRUCTION_COUNT = 6
INSTRUCTION_SIZE = 4
OPCODE_INDEX = 0
OPCODE_MASK = 0
ENDIANESS = Instruction.BIG
def __init__(self):
self.confidence = 0.0
def pre_processor(self, data):
d = ''
if self.ENDIANESS == Instruction.LITTLE:
d = data[::-1]
else:
d = data
return d
def validate(self, instruction):
return None
def disassemble_opcode(self, ins, data):
if len(data) > self.OPCODE_INDEX:
ins.opcode = ord(data[self.OPCODE_INDEX]) & self.OPCODE_MASK
if ins.opcode in self.OPCODES:
ins.valid = True
else:
ins.valid = False
else:
ins.valid = False
def disassemble(self, data):
ins = Instruction(size=self.INSTRUCTION_SIZE, endianess=self.ENDIANESS)
if data:
data = self.pre_processor(data)
self.disassemble_opcode(ins, data)
self.validate(ins)
return ins
class MIPS(Disassembler):
OPCODE_MASK = (0x3F << 2)
OPCODES = [
0x04 << 2, # beq
0x05 << 2, # bne
0x09 << 2, # addiu
0x08 << 2, # addi
0x0D << 2, # ori
0x23 << 2, # lw
0x2B << 2, # sw
0x0F << 2, # lui
]
class MIPSEL(MIPS):
ENDIANESS = Instruction.LITTLE
class ARMEB(Disassembler):
OPCODE_MASK = 0xF0
OPCODES = [0xE0]
class ARM(ARMEB):
ENDIANESS = Instruction.LITTLE
class OpcodeValidator(Module):
MIN_CONFIDENCE = 0.0
TITLE = 'Opcode'
CLI = [
Option(short='A',
long='opcodes',
kwargs={'enabled' : True},
description='Scan files for executable opcodes'),
Option(short='a',
long='unaligned',
kwargs={'honor_instruction_alignment' : False},
description='Scan for opcodes at unaligned offsets'),
]
KWARGS = [
Kwarg(name='enabled', default=False),
Kwarg(name='honor_instruction_alignment', default=True),
]
def init(self):
self.disassemblers = {}
for (name, cls) in inspect.getmembers(sys.modules[__name__], inspect.isclass):
try:
obj = cls()
if isinstance(obj, Disassembler) and name != 'Disassembler':
self.disassemblers[obj] = 0
except TypeError:
pass
if self.config.verbose:
self.HEADER[-1] = 'EXECUTABLE CODE'
else:
self.HEADER = ['CONFIDENCE', 'FILE ARCHITECTURE']
self.HEADER_FORMAT = '%s %s'
self.RESULT = ['confidence', 'description']
self.RESULT_FORMAT = '%-7.2f %s'
def run(self):
for fp in self.config.target_files:
self.header()
for disassembler in self.search(fp):
if not self.config.verbose and disassembler.confidence > self.MIN_CONFIDENCE:
desc = self.build_description_string(disassembler)
self.result(description=desc, confidence=disassembler.confidence, file=fp, plot=False)
self.footer()
def build_description_string(self, disassembler):
return disassembler.__class__.__name__ + " executable code, endianess: " + disassembler.ENDIANESS
def is_valid_sequence(self, disassembler, data):
j = 0
retval = True
# Ignore blocks of NULL bytes
if data == "\x00" * len(data):
return False
while j < len(data):
ins = disassembler.disassemble(data[j:j+disassembler.INSTRUCTION_SIZE])
if not ins.valid:
retval = False
break
else:
j += disassembler.INSTRUCTION_SIZE
return retval
def search(self, fp):
winners = {}
results = {}
total_hits = {}
offset_range = range(0, 4)
for i in offset_range:
total_hits[i] = 0
for disassembler in self.disassemblers:
results[disassembler] = {}
for i in offset_range:
results[disassembler][i] = 0
while True:
offset = 0
(data, dlen) = fp.read_block()
if not data:
break
while i < dlen:
for j in offset_range:
offset = i + j
for disassembler in self.disassemblers:
if self.honor_instruction_alignment and (offset % disassembler.INSTRUCTION_SIZE):
continue
ins = disassembler.disassemble(data[offset:offset+disassembler.INSTRUCTION_SIZE])
if ins.valid:
sequence_size = disassembler.MIN_INSTRUCTION_COUNT * disassembler.INSTRUCTION_SIZE
if self.is_valid_sequence(disassembler, data[offset:offset+sequence_size]):
desc = self.build_description_string(disassembler)
self.result(description=desc, offset=(fp.tell()-dlen+offset), file=fp, display=self.config.verbose)
results[disassembler][j] += 1
total_hits[j] += 1
i += len(offset_range)
for (disassembler, offset_results) in iterator(results):
sorted_offsets = sorted(offset_results, key=offset_results.get, reverse=True)
winning_offset = sorted_offsets[0]
if total_hits[winning_offset] > 0 and offset_results[winning_offset] > 0:
disassembler.confidence = ((offset_results[winning_offset] / float(total_hits[winning_offset])) * 100)
winners[disassembler] = disassembler.confidence
return sorted(winners, key=winners.get, reverse=True)
import magic import magic
import binwalk.core.parser import binwalk.core.parser
import binwalk.core.filter
import binwalk.core.smart import binwalk.core.smart
from binwalk.core.compat import * from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg from binwalk.core.module import Module, Option, Kwarg
...@@ -14,63 +13,41 @@ class Signature(Module): ...@@ -14,63 +13,41 @@ class Signature(Module):
long='signature', long='signature',
kwargs={'enabled' : True}, kwargs={'enabled' : True},
description='Scan target file(s) for file signatures'), description='Scan target file(s) for file signatures'),
Option(short='R',
long='raw-bytes',
kwargs={'raw_bytes' : None},
type=str,
description='Scan target file(s) for the specified sequence of bytes'),
Option(short='A',
long='opcodes',
kwargs={'enabled' : True, 'search_for_opcodes' : True},
description='Scan target file(s) for common executable opcodes'),
Option(short='m', Option(short='m',
long='magic', long='magic',
kwargs={'magic_files' : []}, kwargs={'magic_files' : []},
type=list, type=list,
dtype='file', dtype='file',
description='Specify a custom magic file to use'), description='Specify a custom magic file to use'),
Option(short='R',
long='raw-bytes',
kwargs={'raw_bytes' : None},
type=str,
description='Specify a sequence of bytes to search for'),
Option(short='b', Option(short='b',
long='dumb', long='dumb',
kwargs={'dumb_scan' : True}, kwargs={'dumb_scan' : True},
description='Disable smart signature keywords'), description='Disable smart signature keywords'),
Option(short='I',
long='show-invalid',
kwargs={'show_invalid' : True},
description='Show results marked as invalid'),
Option(short='x',
long='exclude',
kwargs={'exclude_filters' : []},
type=list,
dtype=str.__name__,
description='Exclude results that match <str>'),
Option(short='y',
long='include',
kwargs={'include_filters' : []},
type=list,
dtype=str.__name__,
description='Only show results that match <str>'),
] ]
KWARGS = [ KWARGS = [
Kwarg(name='enabled', default=False), Kwarg(name='enabled', default=False),
Kwarg(name='dumb_scan', default=False),
Kwarg(name='show_invalid', default=False),
Kwarg(name='raw_bytes', default=None), Kwarg(name='raw_bytes', default=None),
Kwarg(name='search_for_opcodes', default=False),
Kwarg(name='dumb_scan', default=False),
Kwarg(name='magic_files', default=[]), Kwarg(name='magic_files', default=[]),
Kwarg(name='exclude_filters', default=[]),
Kwarg(name='include_filters', default=[]),
] ]
MAGIC_FLAGS = magic.MAGIC_NO_CHECK_TEXT | magic.MAGIC_NO_CHECK_ENCODING | magic.MAGIC_NO_CHECK_APPTYPE | magic.MAGIC_NO_CHECK_TOKENS MAGIC_FLAGS = magic.MAGIC_NO_CHECK_TEXT | magic.MAGIC_NO_CHECK_ENCODING | magic.MAGIC_NO_CHECK_APPTYPE | magic.MAGIC_NO_CHECK_TOKENS
def init(self): def init(self):
# Create SmartSignature and MagicParser class instances. These are mostly for internal use. # Create SmartSignature and MagicParser class instances. These are mostly for internal use.
self.filter = binwalk.core.filter.MagicFilter() self.smart = binwalk.core.smart.SmartSignature(self.config.filter, ignore_smart_signatures=self.dumb_scan)
self.smart = binwalk.core.smart.SmartSignature(self.filter, ignore_smart_signatures=self.dumb_scan) self.parser = binwalk.core.parser.MagicParser(self.config.filter, self.smart)
self.parser = binwalk.core.parser.MagicParser(self.filter, self.smart)
# Set any specified include/exclude filters
for regex in self.exclude_filters:
self.filter.exclude(regex)
for regex in self.include_filters:
self.filter.include(regex)
# If a raw byte sequence was specified, build a magic file from that instead of using the default magic files # If a raw byte sequence was specified, build a magic file from that instead of using the default magic files
if self.raw_bytes is not None: if self.raw_bytes is not None:
...@@ -78,6 +55,12 @@ class Signature(Module): ...@@ -78,6 +55,12 @@ class Signature(Module):
# Use the system default magic file if no other was specified # Use the system default magic file if no other was specified
if not self.magic_files: if not self.magic_files:
if self.search_for_opcodes:
self.magic_files = [
self.config.settings.paths['user'][self.config.settings.BINARCH_MAGIC_FILE],
self.config.settings.paths['system'][self.config.settings.BINARCH_MAGIC_FILE],
]
else:
# Append the user's magic file first so that those signatures take precedence # Append the user's magic file first so that those signatures take precedence
self.magic_files = [ self.magic_files = [
self.config.settings.paths['user'][self.config.settings.BINWALK_MAGIC_FILE], self.config.settings.paths['user'][self.config.settings.BINWALK_MAGIC_FILE],
...@@ -96,7 +79,6 @@ class Signature(Module): ...@@ -96,7 +79,6 @@ class Signature(Module):
''' '''
Called automatically by self.result. Called automatically by self.result.
''' '''
if not self.show_invalid:
if not r.description: if not r.description:
r.valid = False r.valid = False
...@@ -133,7 +115,7 @@ class Signature(Module): ...@@ -133,7 +115,7 @@ class Signature(Module):
# Pass the data to libmagic, and split out multiple results into a list # Pass the data to libmagic, and split out multiple results into a list
magic_result = self.magic.buffer(candidate_data) magic_result = self.magic.buffer(candidate_data)
if self.filter.valid_magic_result(magic_result): if self.config.filter.valid_result(magic_result):
# The smart filter parser returns a binwalk.core.module.Result object # The smart filter parser returns a binwalk.core.module.Result object
r = self.smart.parse(magic_result) r = self.smart.parse(magic_result)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment