Commit 5faae704 by devttys0

Performance enhancements for entropy analysis code

parent 61624fc0
...@@ -23,33 +23,26 @@ class SignatureTag(object): ...@@ -23,33 +23,26 @@ class SignatureTag(object):
for (k,v) in binwalk.core.compat.iterator(kwargs): for (k,v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v) setattr(self, k, v)
class SignatureResult(object): class SignatureResult(binwalk.core.module.Result):
''' '''
Container class for signature results. Container class for signature results.
''' '''
def __init__(self, **kwargs): def __init__(self, **kwargs):
# These are set by signature keyword tags # These are set by signature keyword tags.
# Keyword tags can also set any other object attributes,
# including those in binwalk.core.module.Result.
self.jump = 0 self.jump = 0
self.many = False self.many = False
self.size = 0
self.name = None
self.offset = 0
self.adjust = 0 self.adjust = 0
self.strlen = 0 self.strlen = 0
self.string = False self.string = False
self.invalid = False self.invalid = False
self.extract = True
# These are set by code internally # These are set by code internally
self.id = 0 self.id = 0
self.file = None
self.valid = True
self.display = True
self.description = ""
# Kwargs overrides the defaults set above # Kwargs overrides the defaults set above
for (k,v) in binwalk.core.compat.iterator(kwargs): super(self.__class__, self).__init__(**kwargs)
setattr(self, k, v)
self.valid = (not self.invalid) self.valid = (not self.invalid)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import os import os
import math import math
import zlib import zlib
import numpy as np
import binwalk.core.common import binwalk.core.common
from binwalk.core.compat import * from binwalk.core.compat import *
from binwalk.core.module import Module, Option, Kwarg from binwalk.core.module import Module, Option, Kwarg
...@@ -21,19 +22,21 @@ class Entropy(Module): ...@@ -21,19 +22,21 @@ class Entropy(Module):
COLORS = ['r', 'g', 'c', 'b', 'm'] COLORS = ['r', 'g', 'c', 'b', 'm']
DEFAULT_BLOCK_SIZE = 1024 DEFAULT_BLOCK_SIZE = 1024
DEFAULT_DATA_POINTS = 2048
TITLE = "Entropy Analysis" TITLE = "Entropy Analysis"
ORDER = 8 ORDER = 8
# TODO: Add --dpoints option to set the number of data points?
CLI = [ CLI = [
Option(short='E', Option(short='E',
long='entropy', long='entropy',
kwargs={'enabled' : True}, kwargs={'enabled' : True},
description='Calculate file entropy'), description='Calculate file entropy'),
Option(short='H', Option(short='H',
long='zlib', long='fast',
kwargs={'use_zlib' : True}, kwargs={'use_zlib' : True},
description='Use zlib compression ratios instead of Shannon algorithm'), description='Use faster, but less detailed, entropy analysis'),
Option(short='J', Option(short='J',
long='save', long='save',
kwargs={'save_plot' : True}, kwargs={'save_plot' : True},
...@@ -93,7 +96,7 @@ class Entropy(Module): ...@@ -93,7 +96,7 @@ class Entropy(Module):
if self.config.block: if self.config.block:
self.block_size = self.config.block self.block_size = self.config.block
else: else:
self.block_size = self.DEFAULT_BLOCK_SIZE self.block_size = None
def run(self): def run(self):
for fp in iter(self.next_file, None): for fp in iter(self.next_file, None):
...@@ -119,6 +122,16 @@ class Entropy(Module): ...@@ -119,6 +122,16 @@ class Entropy(Module):
# Clear results from any previously analyzed files # Clear results from any previously analyzed files
self.clear(results=True) self.clear(results=True)
# If -K was not specified, calculate the block size to create DEFAULT_DATA_POINTS data points
if self.block_size is None:
block_size = fp.size / self.DEFAULT_DATA_POINTS
# Round up to the nearest DEFAULT_BLOCK_SIZE (1024)
block_size = int(block_size + ((self.DEFAULT_BLOCK_SIZE - block_size) % self.DEFAULT_BLOCK_SIZE))
else:
block_size = self.block_size
binwalk.core.common.debug("Entropy block size (%d data points): %d" % (self.DEFAULT_DATA_POINTS, block_size))
while True: while True:
file_offset = fp.tell() file_offset = fp.tell()
...@@ -128,13 +141,13 @@ class Entropy(Module): ...@@ -128,13 +141,13 @@ class Entropy(Module):
i = 0 i = 0
while i < dlen: while i < dlen:
entropy = self.algorithm(data[i:i+self.block_size]) entropy = self.algorithm(data[i:i+block_size])
r = self.result(offset=(file_offset + i), r = self.result(offset=(file_offset + i),
file=fp, file=fp,
entropy=entropy, entropy=entropy,
description=("%f" % entropy), description=("%f" % entropy),
display=self.display_results) display=self.display_results)
i += self.block_size i += block_size
if self.do_plot: if self.do_plot:
self.plot_entropy(fp.name) self.plot_entropy(fp.name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment