Commit 0dfe9bbb by devttys0

Merge pull request #5 from moshekaplan/patch-1

Use sets for doing repeated lookups faster
parents 19cc0bc9 2d0c5cfb
...@@ -16,14 +16,14 @@ class FileStrings(object): ...@@ -16,14 +16,14 @@ class FileStrings(object):
MAX_SPECIAL_CHARS_RATIO = .4 MAX_SPECIAL_CHARS_RATIO = .4
MAX_ENTROPY = 0.9 MAX_ENTROPY = 0.9
LETTERS = [x for x in string.letters] LETTERS = set(string.letters)
NUMBERS = [x for x in string.digits] NUMBERS = set(string.digits)
PRINTABLE = [x for x in string.printable] PRINTABLE = set(string.printable)
WHITESPACE = [x for x in string.whitespace] WHITESPACE = set(string.whitespace)
PUNCTUATION = [x for x in string.punctuation] PUNCTUATION = set(string.punctuation)
NEWLINES = ['\r', '\n', '\x0b', '\x0c'] NEWLINES = set(['\r', '\n', '\x0b', '\x0c'])
VOWELS = ['A', 'E', 'I', 'O', 'U', 'a', 'e', 'i', 'o', 'u'] VOWELS = set(['A', 'E', 'I', 'O', 'U', 'a', 'e', 'i', 'o', 'u'])
NON_ALPHA_EXCEPTIONS = ['%', '.', '/', '-', '_'] NON_ALPHA_EXCEPTIONS = set(['%', '.', '/', '-', '_'])
BRACKETED = { BRACKETED = {
'[' : ']', '[' : ']',
'<' : '>', '<' : '>',
...@@ -156,8 +156,8 @@ class FileStrings(object): ...@@ -156,8 +156,8 @@ class FileStrings(object):
''' '''
Returns True if data has a vowel in it, otherwise returns False. Returns True if data has a vowel in it, otherwise returns False.
''' '''
for i in self.VOWELS: for vowel in self.VOWELS:
if i in data: if vowel in data:
return True return True
return False return False
...@@ -166,8 +166,8 @@ class FileStrings(object): ...@@ -166,8 +166,8 @@ class FileStrings(object):
Returns the number of english letters in data. Returns the number of english letters in data.
''' '''
c = 0 c = 0
for i in range(0, len(data)): for char in data:
if data[i] in self.LETTERS: if char in self.LETTERS:
c += 1 c += 1
return c return c
...@@ -185,19 +185,20 @@ class FileStrings(object): ...@@ -185,19 +185,20 @@ class FileStrings(object):
''' '''
Returns the number of non-english letters in data. Returns the number of non-english letters in data.
''' '''
c = 0 count = 0
dlen = len(data) dlen = len(data)
# No exceptions for very short strings # No exceptions for very short strings
if dlen <= self.SUSPECT_STRING_LENGTH: if dlen <= self.SUSPECT_STRING_LENGTH:
exceptions = [] exceptions = []
else: else:
exceptions = self.NON_ALPHA_EXCEPTIONS exceptions = set(self.NON_ALPHA_EXCEPTIONS)
for i in range(0, len(data)): non_alphanumeric = self.LETTERS | self.NUMBERS
if data[i] not in self.LETTERS and data[i] not in self.NUMBERS and data[i] not in exceptions: for char in data:
c += 1 if char not in non_alphanumeric and char not in exceptions:
return c count += 1
return count
def _too_many_special_chars(self, data): def _too_many_special_chars(self, data):
''' '''
...@@ -445,4 +446,3 @@ class Strings(object): ...@@ -445,4 +446,3 @@ class Strings(object):
del self.plugins del self.plugins
return results return results
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment