Fixed potential self-overlapping signatures

37bb4799 · devttys0 · 46ac01e1 · 37bb4799 · 37bb4799 · 37bb4799
Commit 37bb4799 authored Nov 17, 2014 by devttys0
10 changed files
--- a/src/binwalk/core/magic.py
+++ b/src/binwalk/core/magic.py
@@ -7,6 +7,7 @@ __all__ = ['Magic']
 import re
 import struct
 import datetime
+import binwalk.core.common
 import binwalk.core.compat

 class ParserException(Exception):
@@ -15,14 +16,6 @@ class ParserException(Exception):
    '''
    pass

-class SignatureTag(object):
-    '''
-    Conatiner class for each signature tag entry.
-    '''
-    def __init__(self, **kwargs):
-        for (k,v) in binwalk.core.compat.iterator(kwargs):
-            setattr(self, k, v)
-
 class SignatureResult(binwalk.core.module.Result):
    '''
    Container class for signature results.
@@ -38,6 +31,7 @@ class SignatureResult(binwalk.core.module.Result):
        self.string = False
        self.invalid = False
        self.once = False
+        self.overlap = False

        # These are set by code internally
        self.id = 0
@@ -63,7 +57,7 @@ class SignatureLine(object):

        Returns None.
        '''
-        self.tags = []
+        self.tags = {}
        self.text = line
        self.regex = False

@@ -255,7 +249,7 @@ class SignatureLine(object):
                    v = True

                # Create a new SignatureTag instance and append it to self.tags
-                self.tags.append(SignatureTag(name=n, value=v))
+                self.tags[n] = v

            # Remove all tags from the printable format string
            self.format = retag.sub('', self.format).strip()
@@ -298,33 +292,34 @@ class Signature(object):
        # Strings and single byte signatures are taken at face value;
        # multi-byte integer values are turned into regex strings based
        # on their data type size and endianess.
-        #
-        # Regex types are already compiled expressions.
        if line.type == 'regex':
+            # Regex types are already compiled expressions.
+            # Note that since re.finditer is used, unless the specified
+            # regex accounts for it, overlapping signatures will be ignored.
            return line.value
        if line.type == 'string':
-            restr = re.escape(line.value)
+            restr = line.value
        elif line.size == 1:
-            restr = re.escape(chr(line.value))
+            restr = chr(line.value)
        elif line.size == 2:
            if line.endianess == '<':
-                restr = re.escape(chr(line.value & 0xFF) + chr(line.value >> 8))
+                restr = chr(line.value & 0xFF) + chr(line.value >> 8)
            elif line.endianess == '>':
-                restr = re.escape(chr(line.value >> 8) + chr(line.value & 0xFF))
+                restr = chr(line.value >> 8) + chr(line.value & 0xFF)
        elif line.size == 4:
            if line.endianess == '<':
-                restr = re.escape(chr(line.value & 0xFF) +
+                restr =          (chr(line.value & 0xFF) +
                                  chr((line.value >> 8) & 0xFF) +
                                  chr((line.value >> 16) & 0xFF) +
                                  chr(line.value >> 24))
            elif line.endianess == '>':
-                restr = re.escape(chr(line.value >> 24) +
+                restr =          (chr(line.value >> 24) +
                                  chr((line.value >> 16) & 0xFF) +
                                  chr((line.value >> 8) & 0xFF) +
                                  chr(line.value & 0xFF))
        elif line.size == 8:
            if line.endianess == '<':
-                restr = re.escape(chr(line.value & 0xFF) +
+                restr =          (chr(line.value & 0xFF) +
                                  chr((line.value >> 8) & 0xFF) +
                                  chr((line.value >> 16) & 0xFF) +
                                  chr((line.value >> 24) & 0xFF) +
@@ -333,7 +328,7 @@ class Signature(object):
                                  chr((line.value >> 48) & 0xFF) +
                                  chr(line.value >> 56))
            elif line.endianess == '>':
-                restr = re.escape(chr(line.value >> 56) +
+                restr =          (chr(line.value >> 56) +
                                  chr((line.value >> 48) & 0xFF) +
                                  chr((line.value >> 40) & 0xFF) +
                                  chr((line.value >> 32) & 0xFF) +
@@ -342,7 +337,21 @@ class Signature(object):
                                  chr((line.value >> 8) & 0xFF) +
                                  chr(line.value & 0xFF))

-        return re.compile(restr)
+        # Since re.finditer is used on a per-signature basis, signatures should be crafted carefully
+        # to ensure that they aren't potentially self-overlapping (e.g., a signature of "ABCDAB" could
+        # be confused by the byte sequence "ABCDABCDAB"). The longer the signature, the less likely an
+        # unintentional overlap is, although files could still be maliciously crafted to cause false
+        # negative results.
+        #
+        # Thus, unless a signature has been explicitly marked as knowingly overlapping ('{overlap}'),
+        # spit out a warning about any self-overlapping signatures.
+        if not binwalk.core.compat.has_key(line.tags, 'overlap'):
+            for i in range(1, line.size):
+                if restr[i:] == restr[0:(line.size-i)]:
+                    binwalk.core.common.warning("Signature '%s' is a self-overlapping signature!" % line.text)
+                    break
+
+        return re.compile(re.escape(restr))

    def append(self, line):
        '''
@@ -377,6 +386,7 @@ class Magic(object):
        self.signatures = []
        # A set of signatures with the 'once' keyword that have already been displayed once
        self.display_once = set()
+        self.dirty = True

        self.show_invalid = invalid
        self.includes = [re.compile(x) for x in include]
@@ -557,7 +567,7 @@ class Magic(object):
                    if line.value is None:
                        # Check to see if this is a string whose size is known and has been specified on a previous
                        # signature line.
-                        if binwalk.core.compat.has_key(tags, 'strlen') and [x for x in line.tags if x.name == 'string']:
+                        if binwalk.core.compat.has_key(tags, 'strlen') and binwalk.core.compat.has_key(line.tags, 'string'):
                            dvalue = self.data[start:(start+tags['strlen'])]
                        # Else, just terminate the string at the first newline, carriage return, or NULL byte
                        else:
@@ -633,23 +643,23 @@ class Magic(object):

                    # Process tag keywords specified in the signature line. These have already been parsed out of the
                    # original format string so that they can be processed separately from the printed description string.
-                    for tag in line.tags:
+                    for (tag_name, tag_value) in binwalk.core.compat.iterator(line.tags):
                        # If the tag value is a string, try to format it
-                        if isinstance(tag.value, str):
+                        if isinstance(tag_value, str):
                            # Generate the tuple for the format string
                            dvalue_tuple = ()
-                            for x in self.fmtstr.finditer(tag.value):
+                            for x in self.fmtstr.finditer(tag_value):
                                dvalue_tuple += (dvalue,)

                            # Format the tag string
-                            tags[tag.name] = tag.value % dvalue_tuple
+                            tags[tag_name] = tag_value % dvalue_tuple
                        # Else, just use the raw tag value
                        else:
-                            tags[tag.name] = tag.value
+                            tags[tag_name] = tag_value

                        # Some tag values are intended to be integer values, so try to convert them as such
                        try:
-                            tags[tag.name] = int(tags[tag.name], 0)
+                            tags[tag_name] = int(tags[tag_name], 0)
                        except KeyboardInterrupt as e:
                            raise e
                        except Exception as e:
@@ -722,7 +732,6 @@ class Magic(object):
        results = []
        matched_offsets = set()

-        # It's expensive in Python to pass large strings around to various functions.
        # Since data can potentially be quite a large string, make it available to other
        # methods via a class attribute so that it doesn't need to be passed around to
        # different methods over and over again.
@@ -730,14 +739,14 @@ class Magic(object):

        # If dlen wasn't specified, search all of self.data
        if dlen is None:
-            dlen = len(self.data)
+            dlen = len(data)

-        # Loop through each loaded signature
        for signature in self.signatures:
            # Use regex to search the data block for potential signature matches (fast)
-            for match in signature.regex.finditer(self.data):
+            for match in signature.regex.finditer(data):
                # Take the offset of the start of the signature into account
                offset = match.start() - signature.offset
+
                # Signatures are ordered based on the length of their magic bytes (largest first).
                # If this offset has already been matched to a previous signature, ignore it unless
                # self.show_invalid has been specified. Also ignore obviously invalid offsets (<1)

--- a/src/binwalk/magic/archives
+++ b/src/binwalk/magic/archives
@@ -125,15 +125,24 @@
 0    string        \351,\001JAM    JAM archive

 # LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
-2    string        -lzs-        LHa 2.x? archive data [lzs] [NSRL|LHA2]
-2    string        -lh\40-      LHa 2.x? archive data [lh ] [NSRL|LHA2]
-2    string        -lhd-        LHa 2.x? archive data [lhd] [NSRL|LHA2]
-2    string        -lh2-        LHa 2.x? archive data [lh2] [NSRL|LHA2]
-2    string        -lh3-        LHa 2.x? archive data [lh3] [NSRL|LHA2]
-2    string        -lh4-        LHa (2.x) archive data [lh4] [NSRL|LHA2]
-2    string        -lh5-        LHa (2.x) archive data [lh5] [NSRL|LHA2]
-2    string        -lh6-        LHa (2.x) archive data [lh6] [NSRL|LHA2]
-2    string        -lh7-        LHa (2.x) archive data [lh7] [NSRL|LHA2]
+2    string        -lzs         LHa 2.x? archive data [lzs] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh\40       LHa 2.x? archive data [lh ] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lhd         LHa 2.x? archive data [lhd] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh2         LHa 2.x? archive data [lh2] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh3         LHa 2.x? archive data [lh3] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh4         LHa (2.x) archive data [lh4] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh5         LHa (2.x) archive data [lh5] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh6         LHa (2.x) archive data [lh6] [NSRL|LHA2]
+>6   string        !-           {invalid}
+2    string        -lh7         LHa (2.x) archive data [lh7] [NSRL|LHA2]
+>6   string        !-           {invalid}


 # cpio archives
@@ -290,7 +299,8 @@
 >56     leshort         1               \b, 1 registry entry
 >56     leshort         >1              \b, %u registry entries

-0       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
+0       string  \x00\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20        LBR archive data
+>12     string  !\x00x00                                                {invalid}

 # Parity archive reconstruction file, the 'par' file format now used on Usenet.
 0       string          PAR\0   PARity archive data

--- a/src/binwalk/magic/bootloaders
+++ b/src/binwalk/magic/bootloaders
@@ -2,7 +2,8 @@
 #---------------------------Bootloaders--------------------------------

 # CFE bootloader
-0	string	CFE1CFE1	CFE boot loader
+0	string	CFE1	    CFE boot loader
+>4  string  !CFE1       {invalid}
 >40	string	CFE1CFE1	{invalid}

 # U-Boot boot loader

--- a/src/binwalk/magic/compressed
+++ b/src/binwalk/magic/compressed
@@ -2,7 +2,8 @@
 #------------------Compression Formats-----------------------------

 # AFX compressed files (Wolfram Kleff)
-2    string        -afx-        AFX compressed file data
+2    string        -afx         AFX compressed file data
+>6   string        !-           {invalid}

 # bzip2
 0    string BZh91AY&SY    bzip2 compressed data, block size = 900k

--- a/src/binwalk/magic/crypto
+++ b/src/binwalk/magic/crypto
 # Type: OpenSSL certificates/key files
 # From: Nicolas Collignon <tsointsoin@gmail.com>

-0       string  -----BEGIN\x20CERTIFICATE-----      PEM certificate
+0       string  -----BEGIN\x20CERTIFICATE           PEM certificate
+>22     string  !-----                              {invalid}
 0       string  -----BEGIN\x20CERTIFICATE\x20REQ    PEM certificate request
 0       string  -----BEGIN\x20RSA\x20PRIVATE        PEM RSA private key
 0       string  -----BEGIN\x20DSA\x20PRIVATE        PEM DSA private key

--- a/src/binwalk/magic/filesystems
+++ b/src/binwalk/magic/filesystems
@@ -573,7 +573,7 @@
 >8      belong      x               {jump:%d}

 # Wind River MemFS file system, found in some VxWorks devices
-0       string    owowowowowowowowowowowowowowow    Wind River management filesystem,
+0       string    owowowowowowowowowowowowowowow    Wind River management filesystem,{overlap}
 >30     string    !ow                               {invalid},
 >32     belong    1                                 compressed,
 >32     belong    2                                 plain text,

--- a/src/binwalk/magic/firmware
+++ b/src/binwalk/magic/firmware
@@ -163,7 +163,8 @@
 >23     byte            x               header checksum: 0x%X

 # PackImg tag, somtimes used as a delimiter between the kernel and rootfs in firmware images.
-0       string        --PaCkImGs--    PackImg section delimiter tag,
+0       string        --PaCkImGs      PackImg section delimiter tag,
+>10     string        !--             {invalid}
 # If the size in both big and little endian is greater than 512MB, consider this a false positive
 >16     ulelong       >0x20000000
 >>16    ubelong       >0x20000000    {invalid}
@@ -517,7 +518,8 @@
 >18         beshort+16 x                        data offset from start of block: %d

 # Obfuscated Arcadyan firmware
-0x68    belong  0x00D50800                      Obfuscated Arcadyan firmware,
+0x68    belong  0x00D508                        Obfuscated Arcadyan firmware,
+>3      byte    !0                              {invalid}
 >0      ubelong x                               signature bytes: 0x%X,
 >0x70   string  !\x00\x00\x00\x00\x00\x00\x00   {invalid},
 >0x70   belong  0x00000000                      see https://github.com/devttys0/wrt120n/deobfuscator

--- a/src/binwalk/magic/misc
+++ b/src/binwalk/magic/misc
@@ -41,7 +41,8 @@
 # CodeGate 2011 http://nopsrus.blogspot.com/2013/05/codegate-ctf-2011-binary-100-points.html
 0    string    \x23\x40\x7e\x5e    Windows Script Encoded Data (screnc.exe)

-0   string     /home/              Unix home path string:
+0   string     /home               Unix home path string:
+>5  string     !/                  {invalid}
 >0  string     x                   "%s"

 0   string      neighbor           Neighborly text,

--- a/src/binwalk/magic/vxworks
+++ b/src/binwalk/magic/vxworks

 # Signatures to identify the start of a VxWorks symbol table
-8       string      \x00\x00\x05\x00\x00\x00\x00\x00    VxWorks symbol table, big endian,
+8       string      \x00\x00\x05\x00\x00\x00\x00\x00    VxWorks symbol table, big endian,{overlap}
 >4      belong      0                                   {invalid} 
 >4      ubelong     x                                   first entry: [type: function, code address: 0x%X,
 >0      belong      0                                   {invalid}
@@ -33,7 +33,7 @@
 >>152   belong      !0x700
 >>>152  belong      !0x900                              \b, {invalid}

-8       string      \x00\x00\x07\x00\x00\x00\x00\x00    VxWorks symbol table, big endian,
+8       string      \x00\x00\x07\x00\x00\x00\x00\x00    VxWorks symbol table, big endian,{overlap}
 >4      belong      0                                   {invalid}
 >4      ubelong     x                                   first entry: [type: initialized data, code address: 0x%X,
 >0      belong      0                                   {invalid}
@@ -66,7 +66,7 @@
 >>152   belong      !0x700
 >>>152  belong      !0x900                              \b, {invalid}

-8       string      \x00\x00\x09\x00\x00\x00\x00\x00    VxWorks symbol table, big endian,
+8       string      \x00\x00\x09\x00\x00\x00\x00\x00    VxWorks symbol table, big endian,{overlap}
 >4      belong      0                                   {invalid}
 >4      ubelong     x                                   first entry: [type: uninitialized data, code address: 0x%X,
 >0      belong      0                                   {invalid}
@@ -99,7 +99,7 @@
 >>152   belong      !0x700
 >>>152  belong      !0x900                              \b, {invalid}

-8       string      \x00\x05\x00\x00\x00\x00\x00\x00    VxWorks symbol table, little endian,
+8       string      \x00\x05\x00\x00\x00\x00\x00\x00    VxWorks symbol table, little endian,{overlap}
 >4      lelong      0                                   {invalid}
 >4      ulelong     x                                   first entry: [type: function, code address: 0x%X,
 >0      lelong      0                                   {invalid}
@@ -132,7 +132,7 @@
 >>152   lelong      !0x700
 >>>152  lelong      !0x900                              \b, {invalid}

-8       string      \x00\x07\x00\x00\x00\x00\x00\x00    VxWorks symbol table, little endian,
+8       string      \x00\x07\x00\x00\x00\x00\x00\x00    VxWorks symbol table, little endian,{overlap}
 >4      lelong      0                                   {invalid}
 >4      ulelong     x                                   first entry: [type: initialized data, code address: 0x%X,
 >0      lelong      0                                   {invalid}
@@ -165,7 +165,7 @@
 >>152   lelong      !0x700
 >>>152  lelong      !0x900                              \b, {invalid}

-8       string      \x00\x09\x00\x00\x00\x00\x00\x00    VxWorks symbol table, little endian,
+8       string      \x00\x09\x00\x00\x00\x00\x00\x00    VxWorks symbol table, little endian,{overlap}
 >4      lelong      0                                   {invalid}
 >4      ulelong     x                                   first entry: [type: uninitialized data, code address: 0x%X,
 >0      lelong      0                                   {invalid}

--- a/src/binwalk/modules/entropy.py
+++ b/src/binwalk/modules/entropy.py
@@ -3,7 +3,6 @@
 import os
 import math
 import zlib
-import numpy as np
 import binwalk.core.common
 from binwalk.core.compat import *
 from binwalk.core.module import Module, Option, Kwarg