Performance enhancements for entropy analysis code

5faae704 · devttys0 · 61624fc0 · 5faae704 · 5faae704
Commit 5faae704 authored Nov 12, 2014 by devttys0
Hide whitespace changes
Inline Side-by-side

Showing with 23 additions and 17 deletions

magic.py src/binwalk/core/magic.py +5 -12

entropy.py src/binwalk/modules/entropy.py +18 -5

No files found.
--- a/src/binwalk/core/magic.py
+++ b/src/binwalk/core/magic.py
@@ -23,33 +23,26 @@ class SignatureTag(object):
        for (k,v) in binwalk.core.compat.iterator(kwargs):
            setattr(self, k, v)
-class SignatureResult(object):
+class SignatureResult(binwalk.core.module.Result):
    '''
    Container class for signature results.
    '''
    def __init__(self, **kwargs):
-        # These are set by signature keyword tags
+        # These are set by signature keyword tags.
+        # Keyword tags can also set any other object attributes,
+        # including those in binwalk.core.module.Result.
        self.jump = 0
        self.many = False
-        self.size = 0
-        self.name = None
-        self.offset = 0
        self.adjust = 0
        self.strlen = 0
        self.string = False
        self.invalid = False
-        self.extract = True
        # These are set by code internally
        self.id = 0
-        self.file = None
-        self.valid = True
-        self.display = True
-        self.description = ""
        # Kwargs overrides the defaults set above
-        for (k,v) in binwalk.core.compat.iterator(kwargs):
+        super(self.__class__, self).__init__(**kwargs)
-            setattr(self, k, v)
        self.valid = (not self.invalid)

--- a/src/binwalk/modules/entropy.py
+++ b/src/binwalk/modules/entropy.py
@@ -3,6 +3,7 @@
 import os
 import math
 import zlib
+import numpy as np
 import binwalk.core.common
 from binwalk.core.compat import *
 from binwalk.core.module import Module, Option, Kwarg
@@ -21,19 +22,21 @@ class Entropy(Module):
    COLORS = ['r', 'g', 'c', 'b', 'm']
    DEFAULT_BLOCK_SIZE = 1024
+    DEFAULT_DATA_POINTS = 2048
    TITLE = "Entropy Analysis"
    ORDER = 8
+    # TODO: Add --dpoints option to set the number of data points?
    CLI = [
            Option(short='E',
                   long='entropy',
                   kwargs={'enabled' : True},
                   description='Calculate file entropy'),
            Option(short='H',
-                   long='zlib',
+                   long='fast',
                   kwargs={'use_zlib' : True},
-                   description='Use zlib compression ratios instead of Shannon algorithm'),
+                   description='Use faster, but less detailed, entropy analysis'),
            Option(short='J',
                   long='save',
                   kwargs={'save_plot' : True},
@@ -93,7 +96,7 @@ class Entropy(Module):
            if self.config.block:
                self.block_size = self.config.block
            else:
-                self.block_size = self.DEFAULT_BLOCK_SIZE
+                self.block_size = None
    def run(self):
        for fp in iter(self.next_file, None):
@@ -119,6 +122,16 @@ class Entropy(Module):
        # Clear results from any previously analyzed files
        self.clear(results=True)
+        # If -K was not specified, calculate the block size to create DEFAULT_DATA_POINTS data points
+        if self.block_size is None:
+            block_size = fp.size / self.DEFAULT_DATA_POINTS
+            # Round up to the nearest DEFAULT_BLOCK_SIZE (1024)
+            block_size = int(block_size + ((self.DEFAULT_BLOCK_SIZE - block_size) % self.DEFAULT_BLOCK_SIZE))
+        else:
+            block_size = self.block_size
+        binwalk.core.common.debug("Entropy block size (%d data points): %d" % (self.DEFAULT_DATA_POINTS, block_size))
        while True:
            file_offset = fp.tell()
@@ -128,13 +141,13 @@ class Entropy(Module):
            i = 0
            while i < dlen:
-                entropy = self.algorithm(data[i:i+self.block_size])
+                entropy = self.algorithm(data[i:i+block_size])
                r = self.result(offset=(file_offset + i),
                                file=fp,
                                entropy=entropy,
                                description=("%f" % entropy),
                                display=self.display_results)
-                i += self.block_size
+                i += block_size
        if self.do_plot:
            self.plot_entropy(fp.name)