Updated C installer to always build libfuzzy

d51dd6b1 · devttys0 · 5839cd8d · d51dd6b1 · d51dd6b1 · d51dd6b1
Commit d51dd6b1 authored Dec 21, 2013 by devttys0
47 changed files
--- a/easy_install.sh
+++ b/easy_install.sh
@@ -198,11 +198,11 @@ then
 	pyqtgraph
 fi
-if [ "$(python -c 'import ctypes.util; print (ctypes.util.find_library("fuzzy"))')" == "None" ]
+#if [ "$(python -c 'import ctypes.util; print (ctypes.util.find_library("fuzzy"))')" == "None" ]
-then
+#then
-	echo "libfuzzy not installed; building from source..."
+#	echo "libfuzzy not installed; building from source..."
-	libfuzzy
+#	libfuzzy
-fi
+#fi
 # Get and build the firmware mod kit
 fmk

--- a/setup.py
+++ b/setup.py
@@ -2,15 +2,38 @@
 from __future__ import print_function
 import os
 import sys
-import shutil
 from distutils.core import setup
+from distutils.dir_util import remove_tree
 # Python2/3 compliance
 try:
 	raw_input
-except:
+except NameError:
 	raw_input = input
+def cleanup_build_directory():
+	# Requires to chdir into the src directory first
+	try:
+		remove_tree("build")
+	except KeyboardInterrupt as e:
+		raise e
+	except Exception:
+		pass
+def cleanup_module_directory():
+	# Installing doesn't remove old files that may have been deleted from the module.
+	if "install" in sys.argv:
+		try:
+			import binwalk
+			for path in binwalk.__path__:
+				try:
+					remove_tree(path + os.path.sep + "*")
+				except OSError as e:
+					pass
+		except ImportError:
+			pass
+# Change to the binwalk src directory
 def warning(lines, terminate=True, prompt=True):
 	WIDTH = 115
@@ -36,20 +59,7 @@ if "--yes" in sys.argv:
 else:
 	IGNORE_WARNINGS = False
-# Look for old installations of binwalk and remove them to prevent conflicts with the new API
+# cd into the src directory, no matter where setup.py was invoked from
-try:
-	import binwalk
-	for path in binwalk.__path__:
-		if not os.path.exists(os.path.join(path, "core")):
-			try:
-				print ("Cleaning up old installation...")
-				shutil.rmtree(path)
-			except:
-				pass
-except:
-	pass
-# Change to the binwalk src directory
 os.chdir(os.path.join(os.path.dirname(os.path.realpath(__file__)), "src"))
 print("checking pre-requisites")
@@ -57,14 +67,14 @@ try:
 	import magic
 	try:
 		magic.MAGIC_NO_CHECK_TEXT
-	except Exception as e:
+	except AttributeError as e:
 		msg = ["Pre-requisite failure: " + str(e),
 			"It looks like you have an old or incompatible magic module installed.",
 			"Please install the official python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
 		]
 		warning(msg)
-except Exception as e:
+except ImportError as e:
 	msg = ["Pre-requisite failure:", str(e),
 		"Please install the python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/",
 	]
@@ -73,7 +83,7 @@ except Exception as e:
 try:
 	import pyqtgraph
-except Exception as e:
+except ImportError as e:
 	msg = ["Pre-requisite check warning: " + str(e),
 		"To take advantage of this tool's graphing capabilities, please install the pyqtgraph module.",
 	]
@@ -94,23 +104,26 @@ if not os.path.exists(c_lib_makefile):
 status |= os.system("make")
 if status != 0:
-	msg = ["Build warning: failed to build compression libraries.",
+	msg = ["Build warning: failed to build C libraries.",
-		"Some plugins will not work without these libraries."
+		"Some features will not work without these libraries."
 	]
 	warning(msg, prompt=True)
 elif "install" in sys.argv:
 	if os.system("make install") != 0:
-			msg = ["Install warning: failed to install compression libraries.",
+		msg = ["Install warning: failed to install C libraries.",
-				"Some plugins will not work without these libraries."
+			   "Some features will not work without these libraries."
 		]
 		warning(msg, prompt=True)
+os.system("make distclean")
 os.chdir(working_directory)
+cleanup_build_directory()
+cleanup_module_directory()
 # Generate a new magic file from the files in the magic directory
-print("generating binwalk magic file")
+print("creating binwalk magic file")
 magic_files = os.listdir("magic")
 magic_files.sort()
 fd = open("binwalk/magic/binwalk", "wb")
@@ -136,3 +149,5 @@ setup(	name = "binwalk",
 	scripts = ["scripts/binwalk"],
 )
+cleanup_build_directory()
--- a/src/C/Makefile.in
+++ b/src/C/Makefile.in
@@ -10,18 +10,22 @@ export INSTALL_OPTIONS=@INSTALL_OPTIONS@
 all: clean
 	make -C miniz
 	make -C compress
+	make -C fuzzy
 install:
 	make -C miniz install
 	make -C compress install
+	make -C fuzzy install
 .PHONY: clean distclean
 clean:
 	make -C miniz clean
 	make -C compress clean
+	make -C fuzzy clean
 distclean:
 	make -C miniz distclean
 	make -C compress distclean
+	make -C fuzzy distclean
 	rm -rf *.cache config.* Makefile
--- a/src/C/configure
+++ b/src/C/configure
--- a/src/C/configure.ac
+++ b/src/C/configure.ac
--- a/src/C/fuzzy/AUTHORS
+++ b/src/C/fuzzy/AUTHORS
+ssdeep was written by Jesse Kornblum and Helmut Grohne.
--- a/src/C/fuzzy/COPYING
+++ b/src/C/fuzzy/COPYING
--- a/src/C/fuzzy/ChangeLog
+++ b/src/C/fuzzy/ChangeLog
+2013-07-16: Jesse Kornblum <research@jessekornblum.com>:
+	* fuzzy.c: Fix heap corruption bug #15.
+	* dig.c: Removed extra call to GetFileAttributes
+2013-07-09: Jesse Kornblum <research@jessekornblum.com>:
+	* dig.cpp, engine.cpp, helpers.cpp: Created separate directory traversal code
+	for Win32 systems.
+2013-06-01: Jesse Kornblum <research@jessekornblum.com>:
+	* fuzzy.c: Experimental thread-safe patch from Helmut Grohne.
+	* fuzzy.h: Experimental thread-safe patch from Helmut Grohne.
+2013-05-25: Jesse Kornblum <research@jessekornblum.com>:
+	* fuzzy.c: Fixed bug on string scoring.
+2013-03-12: Jesse Kornblum <research@jessekornblum.com>:
+	* Changelog: Spelled my own name correctly.
+	* fuzzy.c: Fixed memory leak, bug 3607641.
+2012-07-23: Jesse Kornblum <research@jessekornblum.com>:
+	* main.cpp, match.cpp: Renamed the match_pretty function for clarity.
+2012-07-17: Jesse Kornblum <research@jessekornblum.com>:
+	* match.cpp: Fixing bugs in matching, clustering modes
+	* main.cpp: Clarifying comments. Added sanity check for -c, -g
+2012-07-16: Jesse Kornblum <research@jessekornblum.com>:
+	* filedata.cpp: Added includes for *nix compilation
+2012-07-14 Jesse Kornblum <research@jessekornblum.com>:
+	* win.sh: Updated for C++ compiler warnings
+	* normal.sh: Updated for C++ compiler warnings
+	* fuzzy.c: Corrected logic error for when sigs don't have filenames
+	* ui.cpp: Added const to print_error's state variable
+	* match.cpp: Rewrote to use Filedata class
+	* filedata.cpp: Created Filedata class
+	* main.cpp: Adapted to use Filedata class
+2012-07-13 Jesse Kornblum <research@jessekornblum.com>:
+	* fuzzy.c: Fixed major bug regarding incorrect match scores for hashes with long filenames
+	* dig.cpp: Added warning message when not all data on stdin was hashed. Also increased stdin buffer to 512MB.
+	* configure.ac: Version bump to 2.9.
+	* main.cpp: Added flags for clustering mode
+	* ssdeep.h: Added flags for clustering mode
+	* match.cpp: Setting up for clustering mode
+2012-05-25 Jesse Kornblum <research@jessekornblum.com>:
+	* main.cpp: Updated command line argument processing
+	* ssdeep.1: Clarifications on description.
+2012-05-24 Jesse Kornblum <research@jessekornblum.com>:
+	* match.cpp: Convert to C++
+2012-04-24 Jesse Kornblum <research@jessekornblum.com>:
+	* ssdeep.1: Updating support for stdin, clarifying modes
+        * Makefile.am: LF to CR/LF change now done by zip program
+	* ssdeep.h: Adding Doxygen comments
+	* match.cpp: Fixed bug in Win32 filename construction
+	* fuzzy.h: Cleanup and commenting
+	* engine.cpp: Added const definitions
+	* Experimental conversion to C++
+	* Commented out all references to clustering
+	* main.cpp: Fixed some bugs in the command line argument processing
+2012-04-15: Jesse Kornblum <research@jessekornblum.com>:
+	* main.c: Fixed error handling in getopt processing
+2012-02-16 Jesse Kornblum <research@jessekornblum.com>:
+	* main.c: Bump copyright to 2012 in usage message. Add flag for clustering
+	* main.h: Added mode_cluster
+	* ui.c: Changed Win32 to use fputc instead of _tfprintf for filenames
+2012-01-12 Jesse Kornblum <research@jessekornblum>:
+	* ChangeLog: Clean up
+2011-10-17 Jesse Kornblum <research@jessekornblum.com>:
+	* configure.ac: Added AC_SYS_LARGEFILE to handle large files on 32-bit platforms. See bug 3416762.
+2011-09-30 Jesse Kornblum <research@jessekornblum.com>:
+	* ui.c: Redirected error messages to stderr instead of stdout
+	* main.c: Added warning message when the program does not process any file large enough to produce meaningful results.
+2011-09-27 Jesse Kornblum <research@jessekornblum.com>:
+	* main.c: Added support to process stdin.
+	* dig.c: Added process_stdin function
+	* engine.c: Generalized display functions in display_result.
+2011-08-19 Jesse Kornblum <research@jessekornblum.com>:
+	* edit_dist.c: Accepted patch from Brad Spengler to make
+	  thread safe.
+2010-07-15 Jesse Kornblum <research@jessekornblum.com>:
+	* Added quotes and quote escaping to filenames when
+	  displayed in CSV matching mode.
+	* Modified FILEFORMAT to reflect quotation marks in filenames.
+2010-06-24 Jesse Kornblum <research@jessekornblum.com>:
+	* Added parameter checking to my_basename.
+2010-05-05 Jesse Kornblum <research@jessekornblum.com>:
+	* Changed logic for reading files of known hashes to look for
+	  the header this version of the program writes instead of
+	  the v1 header. This was done for the DC3 branch of the code.
+	* Updated README documentation and published version 2.5.
+2010-03-20 Jesse Kornblum <research@jessekornblum.com>:
+	* Fixed define in fuzzy.h to only allow one compilation
+2010-03-19 Jesse Kornblum <research@jessekornblum.com>:
+	* Added 'extern "C"' definitions to fuzzy.h for C++ compatibility
+	* Added return values indicating errors to API functions.
+	* Modified sample program to use errors on return values
+	* Added Doxygen documentation for API function.
+	* Added inttypes.h include to fuzzy.h.
+	* Added error checking for NULL strings in API functions.
+	* Version bump to 2.5
+2010-02-24 Jesse Kornblum <research@jessekornblum.com>:
+	* Experimenting with mode to compare unknown hashes to known
+	  without comparing the unknown against each other.
+	* Version bump to 2.4
+2009-01-20 Jesse Kornblum <research@jessekornblum.com>:
+	* Added -a mode to display all matches regardless of score.
+2009-10-11 Jesse Kornblum <research@jessekornblum.com>:
+	* Fixed typo in usage page.
+2009-07-14 Jesse Kornblum <research@jessekornblum.com>:
+	* Fixed bug that prevented -x mode from working on Win32
+	* Added web pages to trunk. Updated quickstart guide with
+	  automatic installation options and -x mode.
+	* Version bump to 2.2 and updated man page, NEWS
+2009-07-11 Jesse Kornblum <research@jessekornblum.com>:
+	* Cleaned up -x mode to compare two (or more) files of
+	  signatures.
+	* Cleaned up some code comments and Remove Before Flight tags
+2009-04-18 Jesse Kornblum <research@jessekornblum.com>:
+	* Experimenting with -x mode to compare two (or more)
+	  files of signatures
+	* Added some parameter validation code
+2009-01-01 Jesse Kornblum <research@jessekornblum.com>:
+	* Added fuzzy_hash_filename function to hash a file given
+	  its filename. This avoids issues passing FILE * structures
+	  on Win32 systems in programs not using the fopen convention.
+	  See feature request 2142005.
+	* Reconfigured all files using latest autoconf tools
+2008-09-23 Jesse Kornblum <research@jessekornblum.com>:
+	* Reinstated the code to call match_pretty() at the end of
+	  main. This lets the -p and -d modes to display output and
+	  fixes bug 2124423.
+	* Version bump to 2.1.
+2008-04-06 Jesse Kornblum <research@jessekornblum.com>:
+	* Updated man page to include -t and -c modes.
+2008-03-04 Jesse Kornblum <research@jessekornblum.com>:
+	* Made b64 variable static in fuzzy.c
+2008-02-29 Jesse Kornblum <research@jessekornblum.com>:
+	* Version bump to 2.0, reconfigured.
+2008-02-22 Jesse Kornblum <research@jessekornblum.com>:
+	* Changed reading of known hash files back to using
+	  unsigned char values. This also required updating
+	  the match_compare function to handle Unicode
+	  characters when displaying match results.
+	* Flipped files in Win32 zip file to have CR/LF
+2008-02-18 Jesse Kornblum <research@jessekornblum.com>:
+	* Added man pages to the EXTRA_DIST Makefile variable.
+	  Still need to reconfigure.
+	* Added more documentation to the Windows zip file along
+	  with the sample.c file
+	* Removed extraneous recongfiguration from Makefile.am
+2008-02-17 Jesse Kornblum <research@jessekornblum.com>:
+	* Updated build system to create Win32 DLL, documentation,
+	  and packages.
+	* Added check in print_error functions to see if the state
+	  is valid.
+	* Updated documentation, usage message.
+	* Added signature comparison example to sample program.
+2008-02-16 Jesse Kornblum <research@jessekornblum.com>:
+	* Removed block_size value from state variable.
+	  Nobody was using it.
+	* Ammended API functions to support hashing either a buffer
+	  or an open file handle
+	* Moved ssdeep code into engine.c, moved fuzzy hashing
+	  code into fuzzy.c. This will help us create libfuzzy
+	  and fuzzy.dll.
+	* Lots of cleanup to fuzzy hashing code. This includes removing
+	  types like 'uchar' and replacing them with C99 types like
+	  unsigned char. Less work for mother means less work debugging.
+	* Moved definition of __progname to ssdeep.h. It's not being
+	  used by the fuzzy hashing library and caused problems on OS X.
+	* Brought over code to support Unicode from Miss Identify
+	* Version bump to 2.0 beta1
+	* Added sample program to demonstrate API features
+2008-02-15 Jesse Kornblum <research@jessekornblum.com>:
+	* Changing Win32 build to create DLL. All other
+	  versions should have library/header files installed
+2008-02-14 Jesse Kornblum <research@jessekornblum.com>:
+	* Moved to autotools structure
--- a/src/C/fuzzy/FILEFORMAT
+++ b/src/C/fuzzy/FILEFORMAT
+SSDEEP FILE FORMAT VERSION 1.1
+1. REVISION HISTORY
+14 Aug 2006 - Initial version (jk)
+15 Jul 2010 - Adding quotation marks to filenames
+2. FILE HEADER
+The first line of the file is a header, like this:
+ssdeep,1.1--blocksize:hash:hash,filename
+ssdeep - Identifies the file type
+1.1    - The version of the file format, NOT the version of the program
+--     - Separator
+The remainder of the line identifies the format of the file. 
+Note that for version 1.1 these values must be given EXACTLY as shown above
+3. FILE DATA
+Each line represents the hash of one file as listed in the header.
+Specifically, we have the blocksize used by the program, the hash 
+for this blocksize and twice the blocksize, and the filename. Filenames
+are enclosed in quotation marks. Filenames which contain a quotation mark
+will have those quotes slash escaped. For example, the file ma"in.c 
+will be listed as: 
+"ma\"in.c"
--- a/src/C/fuzzy/INSTALL
+++ b/src/C/fuzzy/INSTALL
+Installation Instructions
+*************************
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006 Free Software Foundation, Inc.
+This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+Basic Installation
+==================
+Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package.  The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package.
+   The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation.  It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions.  Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+   It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring.  Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+   If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release.  If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+   The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'.  You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+The simplest way to compile this package is:
+  1. `cd' to the directory containing the package's source code and type
+     `./configure' to configure the package for your system.
+     Running `configure' might take a while.  While running, it prints
+     some messages telling which features it is checking for.
+  2. Type `make' to compile the package.
+  3. Optionally, type `make check' to run any self-tests that come with
+     the package.
+  4. Type `make install' to install the programs and any data files and
+     documentation.
+  5. You can remove the program binaries and object files from the
+     source code directory by typing `make clean'.  To also remove the
+     files that `configure' created (so you can compile the package for
+     a different kind of computer), type `make distclean'.  There is
+     also a `make maintainer-clean' target, but that is intended mainly
+     for the package's developers.  If you use it, you may have to get
+     all sorts of other programs in order to regenerate files that came
+     with the distribution.
+Compilers and Options
+=====================
+Some systems require unusual options for compilation or linking that the
+`configure' script does not know about.  Run `./configure --help' for
+details on some of the pertinent environment variables.
+   You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment.  Here
+is an example:
+     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+   *Note Defining Variables::, for more details.
+Compiling For Multiple Architectures
+====================================
+You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory.  To do this, you can use GNU `make'.  `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script.  `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+   With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory.  After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+Installation Names
+==================
+By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc.  You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX'.
+   You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files.  If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+   In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files.  Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+   If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+Optional Features
+=================
+Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System).  The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+   For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+Specifying the System Type
+==========================
+There may be some features `configure' cannot figure out automatically,
+but needs to determine by the type of machine the package will run on.
+Usually, assuming the package is built to be run on the _same_
+architectures, `configure' can figure that out, but if it prints a
+message saying it cannot guess the machine type, give it the
+`--build=TYPE' option.  TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+     CPU-COMPANY-SYSTEM
+where SYSTEM can have one of these forms:
+     OS KERNEL-OS
+   See the file `config.sub' for the possible values of each field.  If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+   If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+   If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+Sharing Defaults
+================
+If you want to set default values for `configure' scripts to share, you
+can create a site shell script called `config.site' that gives default
+values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists.  Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+Defining Variables
+==================
+Variables not defined in a site shell script can be set in the
+environment passed to `configure'.  However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost.  In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'.  For example:
+     ./configure CC=/usr/local2/bin/gcc
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf bug.  Until the bug is fixed you can use this workaround:
+     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
+`configure' Invocation
+======================
+`configure' recognizes the following options to control how it operates.
+`--help'
+`-h'
+     Print a summary of the options to `configure', and exit.
+`--version'
+`-V'
+     Print the version of Autoconf used to generate the `configure'
+     script, and exit.
+`--cache-file=FILE'
+     Enable the cache: use and save the results of the tests in FILE,
+     traditionally `config.cache'.  FILE defaults to `/dev/null' to
+     disable caching.
+`--config-cache'
+`-C'
+     Alias for `--cache-file=config.cache'.
+`--quiet'
+`--silent'
+`-q'
+     Do not print messages saying which checks are being made.  To
+     suppress all normal output, redirect it to `/dev/null' (any error
+     messages will still be shown).
+`--srcdir=DIR'
+     Look for the package's source code in directory DIR.  Usually
+     `configure' can determine that directory automatically.
+`configure' also accepts some other, not widely useful, options.  Run
+`configure --help' for more details.
--- a/src/C/fuzzy/Makefile.am
+++ b/src/C/fuzzy/Makefile.am
+bin_PROGRAMS=ssdeep
+ssdeep_LDADD=libfuzzy.la
+ssdeep_LDFLAGS=-static
+ACLOCAL_AMFLAGS = -I m4
+lib_LTLIBRARIES=libfuzzy.la
+libfuzzy_la_SOURCES=fuzzy.c edit_dist.c find-file-size.c
+libfuzzy_la_LDFLAGS=-no-undefined -version-info 2:0:0
+include_HEADERS=fuzzy.h
+man_MANS=ssdeep.1
+ssdeep_SOURCES = main.cpp match.cpp engine.cpp filedata.cpp   	\
+                 dig.cpp cycles.cpp helpers.cpp ui.cpp        	\
+                 main.h fuzzy.h tchar-local.h ssdeep.h filedata.h match.h
+dll: $(libfuzzy_la_SOURCES)
+	$(CC) $(CFLAGS) -shared -o fuzzy.dll $(libfuzzy_la_SOURCES) \
+        -Wl,--output-def,fuzzy.def,--out-implib,libfuzzy.a
+	$(STRIP) fuzzy.dll 
+CLEANFILES=fuzzy.dll fuzzy.def
+EXTRA_DIST=$(man_MANS) config.guess config.sub sample.c FILEFORMAT
+WINDOWSDOCS=README.TXT API.TXT FILEFORMAT.TXT NEWS.TXT
+README.TXT: ssdeep.1
+	man ./ssdeep.1 | col -bx > README.TXT
+API.TXT: README
+	cp README API.TXT
+FILEFORMAT.TXT: FILEFORMAT
+	cp FILEFORMAT FILEFORMAT.TXT
+NEWS.TXT: NEWS
+	cp NEWS NEWS.TXT
+win-docs: $(WINDOWSDOCS)
+#	flip -d $(WINDOWSDOCS)
+#	unix2dos $(WINDOWSDOCS)
+win-package: win-docs
+	rm -rf $(distdir).zip $(distdir)
+	make
+	make dll
+	$(STRIP) ssdeep.exe
+	mkdir $(distdir)
+	cp $(WINDOWSDOCS) ssdeep.exe fuzzy.dll fuzzy.def sample.c $(distdir)
+#	flip -d $(distdir)/{sample.c,fuzzy.def}
+#	unix2dos $(distdir)/{sample.c,fuzzy.def}
+	zip -lr9 $(distdir).zip $(distdir)
+	rm -rf $(distdir) $(WINDOWSDOCS)
+world: distclean
+	./configure --host=i386-mingw32
+	make win-package
+	make dist
+# Only generic routines go below this line
+# ------------------------------------------------------------------
+nice:
+	rm -f *~
+preflight:
+	@grep RBF $(DISTFILES)
--- a/src/C/fuzzy/Makefile.in
+++ b/src/C/fuzzy/Makefile.in
--- a/src/C/fuzzy/NEWS
+++ b/src/C/fuzzy/NEWS
+** Version 2.10 - 17 Jul 2013
+* New Features
+  - Fuzzy Hashing engine re-written to be thread safe.
+* Bug Fixes
+  - Able to handle long file paths on Win32.
+  - Fixed bug on comparing signatures with the same block size.
+  - Fixed crash on comparing short signatures.
+  - Fixed memory leak
+** Version 2.9 - 23 Jul 2012
+* New Features
+  - Added warning message for when some data on stdin is not hashed.
+  - Can now hash up to 512MB of data on stdin.
+  - Added clustering mode to group together matching files
+* Bug Fixes
+  - Fixed incorrect match scores for hashes with long filenames.
+** Version 2.8 - 25 May 2012
+* New Features
+  - Converted to C++
+* Bug Fixes
+  - Fixed filename display on Win32.
+  - Fixed support for large files on some platforms.
+  - Fixed errors in handling command line argument processing.
+** Version 2.7 - 30 Sep 2011
+* New Features
+  - Added the capability to process the first 100MB of data
+    from standard input.
+  - Added a warning message when the program does not process
+    any file large enough to produce a meaningful result.
+* Bug Fixes
+  - Standard errors are now sent to stderr, not stdout.
+** Version 2.6 - 28 Sep 2010
+* New Features
+  - Modified the output file format to allow for proper escaping of
+    filenames with quotation marks in them.
+* Bug Fixes
+  - Added quotation marks to filenames in CSV matching mode.
+** Version 2.5 - 6 May 2010
+* New Features
+  - Added API documentation
+  - Added return values indicating errors in API functions
+  - Added compatibility for compiling with C++
+* Bug Fixes
+  - Added parameter validation to API functions
+  - Fixed some cosmetic errors in error handling
+** Version 2.4 - 25 Feb 2010
+* New Features
+  - Added -k mode to compare unknown signatures against known signatures.
+** Version 2.3 - 10 Jan 2010
+* New Features
+  - Added -a mode to display all 'matches', regardless of score.
+** Version 2.2 - 22 Jul 2009
+* New Features
+  - Added capability to compare two or more files containing signatures
+    against one another.
+* Bug Fixes
+  - Changed default behavior to exit program on invalid command line flags
+** Version 2.1 - 1 Jan 2009
+* New Features
+  - Added fuzzy_hash_filename function to hash an entire file given
+    only its filename. Avoids issues on Win32 systems.
+* Bug Fixes
+  - Fixed -p mode to display output
+** Version 2.0 - 2 Apr 2008
+* New Features
+  - Created fuzzy hashing API/DLL
+  - Added support for filenames with Unicode characters on Win32
+  - Added threshold mode
+  - Added CSV mode
+* Bug Fixes
+  - Fixed extra characters appearing during verbose mode
+** Version 1.1 - 14 Aug 2006
+* New Features
+  - First public release
+  - Added verbose mode to display filenames as they're being hashed
+  - Added -d mode to make finding similar files in the same directory tree
+      both easier and faster. Removes the need for two command lines and
+      many extraneous lines of output.
+  - Added -p mode to improve -d mode. Prints bi-directional matches together
+    and omits self matches.
+  - Added LARGEFILE_SOURCE define to Linux version to allow processing
+    of large files. (You never know...)
+* Bug Fixes
+  - Fixed cosmetic errors in usage message. Updated man page.
+** Version 1.0 - 31 Mar 2006
+* New Features
+ - Released internally
+ - Added silent mode, -s. All error messages are suppressed.
+* Bug Fixes
+ - Fixed failure to close files after reading in engine.c
+ - Fixed routine to read headers of matching hashes on Windows.
+ - Fixed handling of symbolic links
+ - Fixed cosmetic bug to display error messages if file open fails
+     (e.g. Permission denied, etc)
+ - Removed quotation marks from the signatures but not the file names.
+     Filenames may contain spaces, but signatures may not. Two bytes
+     per line adds up when we starting compiling large hash sets.
+ - Redirected all error messages to stderr instead of stdout
+ - Removed duplicate defines at the start of engine.c
+ - Replaced all references to u32 with C99 standard uint32_t
+ - Added error checking for memory allocation in main.c:main() and
+     engine.c:hash_file()
+ - Removed useless logical AND of 0xFFFFFFFF from rolling hash update
+** Version 0.1 - 4 Nov 2005
+* New Features
+ - Proof of concept
+ - This version supports recursion, relative and bare file names, and
+     can perform positive matching using a previous output.
--- a/src/C/fuzzy/README
+++ b/src/C/fuzzy/README
+**** FUZZY HASHING API ****
+This file documents the fuzzy hashing API. Information on how to use the
+fuzzy hashing program ssdeep can be found in the man page. On *nix
+systems you can view this file with:
+$ man ./ssdeep.1
+Windows users can get the ssdeep usage information from README.TXT.
+** Using the API in Your Own Progrms **
+You can use the fuzzy hashing API in your own programs by doing 
+the following:
+1. Include the fuzzy hashing header
+#include <fuzzy.h>
+2. Call one of the functions:
+* Fuzzy hashing a buffer of text:
+int fuzzy_hash_buf(const unsigned char *buf,
+		   uint32_t      buf_len,
+	           char          *result);
+This function computes the fuzzy hash of the buffer 'buf' and stores the
+result in result. You MUST allocate result to hold FUZZY_MAX_RESULT
+characters before calling this function. The length of the buffer should
+be passed in via buf_len. It is the user's responsibility to append the
+filename, if any, to the output. The function returns zero on success,
+one on error.
+* Fuzzy hashing a file:
+There are in fact two ways to fuzzy hash a file. If you already 
+have an open file handle you can use:
+int fuzzy_hash_file(FILE *handle,
+	            char *result);
+This function computes the fuzzy hash of the file pointed to by handle
+and stores the result in result. You MUST allocate result to hold
+FUZZY_MAX_RESULT characters before calling this function. It is the 
+user's responsibility to append the filename to the output. 
+The function returns zero on success, one on error.
+The other function to hash a file takes a file name:
+int fuzzy_hash_filename(const char * filename,
+			char * result);
+Like the function above, this function stores the fuzzy hash result
+in the parameter result. You MUST allocate result to hold 
+FUZZY_MAX_RESULT characters before calling this function.
+* Compare two fuzzy hash signatures:
+int fuzzy_compare(const char *sig1, const char *sig2);
+This function returns a value from 0 to 100 indicating the match 
+score of the two signatures. A match score of zero indicates the \
+sigantures did not match.
+3. Compile
+To compile the program using gcc:
+   $ gcc -Wall -I/usr/local/include -L/usr/local/lib sample.c -Lfuzzy
+Using mingw:
+   C:\> gcc -Wall -Ic:\path\to\includes sample.c fuzzy.dll
+Using Microsoft Visual C (MSVC):
+To paraphrase the MinGW documentation, 
+http://www.mingw.org/mingwfaq.shtml#faq-msvcdll:
+The Windows ssdeep package includes a Win32 DLL and a .def file. Although
+MSVC users can't use the DLL directly, they can easily create a .lib file
+using the Microsoft LIB tool:
+   C:\> lib /machine:i386 /def:fuzzy.def
+You can then compile your program using the resulting library:
+   C:\> cl sample.c fuzzy.lib
+** Sample Program **
+A sample program that uses the API is in sample.c. 
+** See Also ** 
+- Jesse D. Kornblum, "Identifying almost identical files using context 
+triggered piecewise hashing", Digital Investigaton, 3(S):91-97, 
+September 2006, http://dx.doi.org/10.1016/j.diin.2006.06.015,
+The Proceedings of the 6th Annual Digital Forensic Research Workshop
\ No newline at end of file
--- a/src/C/fuzzy/TODO
+++ b/src/C/fuzzy/TODO
+- Update man page
+- Update web page, to include new man page
+- Write README
+- Find a way to estimate device sizes on Windows
+    Perhaps an IOTCL_DISK_GET_DRIVE_GEOMETRY_EX would work?
+- See if Windows Vista's symbolic links create problems for dig.c
--- a/src/C/fuzzy/aclocal.m4
+++ b/src/C/fuzzy/aclocal.m4
--- a/src/C/fuzzy/compile
+++ b/src/C/fuzzy/compile
+#! /bin/sh
+# Wrapper for compilers which do not understand '-c -o'.
+scriptversion=2012-10-14.11; # UTC
+# Copyright (C) 1999-2013 Free Software Foundation, Inc.
+# Written by Tom Tromey <tromey@cygnus.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+nl='
+'
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent tools from complaining about whitespace usage.
+IFS=" ""	$nl"
+file_conv=
+# func_file_conv build_file lazy
+# Convert a $build file to $host form and store it in $file
+# Currently only supports Windows hosts. If the determined conversion
+# type is listed in (the comma separated) LAZY, no conversion will
+# take place.
+func_file_conv ()
+{
+  file=$1
+  case $file in
+    / | /[!/]*) # absolute file, and not a UNC file
+      if test -z "$file_conv"; then
+	# lazily determine how to convert abs files
+	case `uname -s` in
+	  MINGW*)
+	    file_conv=mingw
+	    ;;
+	  CYGWIN*)
+	    file_conv=cygwin
+	    ;;
+	  *)
+	    file_conv=wine
+	    ;;
+	esac
+      fi
+      case $file_conv/,$2, in
+	*,$file_conv,*)
+	  ;;
+	mingw/*)
+	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
+	  ;;
+	cygwin/*)
+	  file=`cygpath -m "$file" || echo "$file"`
+	  ;;
+	wine/*)
+	  file=`winepath -w "$file" || echo "$file"`
+	  ;;
+      esac
+      ;;
+  esac
+}
+# func_cl_dashL linkdir
+# Make cl look for libraries in LINKDIR
+func_cl_dashL ()
+{
+  func_file_conv "$1"
+  if test -z "$lib_path"; then
+    lib_path=$file
+  else
+    lib_path="$lib_path;$file"
+  fi
+  linker_opts="$linker_opts -LIBPATH:$file"
+}
+# func_cl_dashl library
+# Do a library search-path lookup for cl
+func_cl_dashl ()
+{
+  lib=$1
+  found=no
+  save_IFS=$IFS
+  IFS=';'
+  for dir in $lib_path $LIB
+  do
+    IFS=$save_IFS
+    if $shared && test -f "$dir/$lib.dll.lib"; then
+      found=yes
+      lib=$dir/$lib.dll.lib
+      break
+    fi
+    if test -f "$dir/$lib.lib"; then
+      found=yes
+      lib=$dir/$lib.lib
+      break
+    fi
+    if test -f "$dir/lib$lib.a"; then
+      found=yes
+      lib=$dir/lib$lib.a
+      break
+    fi
+  done
+  IFS=$save_IFS
+  if test "$found" != yes; then
+    lib=$lib.lib
+  fi
+}
+# func_cl_wrapper cl arg...
+# Adjust compile command to suit cl
+func_cl_wrapper ()
+{
+  # Assume a capable shell
+  lib_path=
+  shared=:
+  linker_opts=
+  for arg
+  do
+    if test -n "$eat"; then
+      eat=
+    else
+      case $1 in
+	-o)
+	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
+	  eat=1
+	  case $2 in
+	    *.o | *.[oO][bB][jJ])
+	      func_file_conv "$2"
+	      set x "$@" -Fo"$file"
+	      shift
+	      ;;
+	    *)
+	      func_file_conv "$2"
+	      set x "$@" -Fe"$file"
+	      shift
+	      ;;
+	  esac
+	  ;;
+	-I)
+	  eat=1
+	  func_file_conv "$2" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-I*)
+	  func_file_conv "${1#-I}" mingw
+	  set x "$@" -I"$file"
+	  shift
+	  ;;
+	-l)
+	  eat=1
+	  func_cl_dashl "$2"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-l*)
+	  func_cl_dashl "${1#-l}"
+	  set x "$@" "$lib"
+	  shift
+	  ;;
+	-L)
+	  eat=1
+	  func_cl_dashL "$2"
+	  ;;
+	-L*)
+	  func_cl_dashL "${1#-L}"
+	  ;;
+	-static)
+	  shared=false
+	  ;;
+	-Wl,*)
+	  arg=${1#-Wl,}
+	  save_ifs="$IFS"; IFS=','
+	  for flag in $arg; do
+	    IFS="$save_ifs"
+	    linker_opts="$linker_opts $flag"
+	  done
+	  IFS="$save_ifs"
+	  ;;
+	-Xlinker)
+	  eat=1
+	  linker_opts="$linker_opts $2"
+	  ;;
+	-*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
+	  func_file_conv "$1"
+	  set x "$@" -Tp"$file"
+	  shift
+	  ;;
+	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
+	  func_file_conv "$1" mingw
+	  set x "$@" "$file"
+	  shift
+	  ;;
+	*)
+	  set x "$@" "$1"
+	  shift
+	  ;;
+      esac
+    fi
+    shift
+  done
+  if test -n "$linker_opts"; then
+    linker_opts="-link$linker_opts"
+  fi
+  exec "$@" $linker_opts
+  exit 1
+}
+eat=
+case $1 in
+  '')
+     echo "$0: No command.  Try '$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: compile [--help] [--version] PROGRAM [ARGS]
+Wrapper for compilers which do not understand '-c -o'.
+Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
+arguments, and rename the output as expected.
+If you are trying to build a whole package this is not the
+right script to run: please start by reading the file 'INSTALL'.
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit $?
+    ;;
+  -v | --v*)
+    echo "compile $scriptversion"
+    exit $?
+    ;;
+  cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
+    func_cl_wrapper "$@"      # Doesn't return...
+    ;;
+esac
+ofile=
+cfile=
+for arg
+do
+  if test -n "$eat"; then
+    eat=
+  else
+    case $1 in
+      -o)
+	# configure might choose to run compile as 'compile cc -o foo foo.c'.
+	# So we strip '-o arg' only if arg is an object.
+	eat=1
+	case $2 in
+	  *.o | *.obj)
+	    ofile=$2
+	    ;;
+	  *)
+	    set x "$@" -o "$2"
+	    shift
+	    ;;
+	esac
+	;;
+      *.c)
+	cfile=$1
+	set x "$@" "$1"
+	shift
+	;;
+      *)
+	set x "$@" "$1"
+	shift
+	;;
+    esac
+  fi
+  shift
+done
+if test -z "$ofile" || test -z "$cfile"; then
+  # If no '-o' option was seen then we might have been invoked from a
+  # pattern rule where we don't need one.  That is ok -- this is a
+  # normal compilation that the losing compiler can handle.  If no
+  # '.c' file was seen then we are probably linking.  That is also
+  # ok.
+  exec "$@"
+fi
+# Name of file we expect compiler to create.
+cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
+# Create the lock directory.
+# Note: use '[/\\:.-]' here to ensure that we don't use the same name
+# that we are using for the .o file.  Also, base the name on the expected
+# object file name, since that is what matters with a parallel build.
+lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
+while true; do
+  if mkdir "$lockdir" >/dev/null 2>&1; then
+    break
+  fi
+  sleep 1
+done
+# FIXME: race condition here if user kills between mkdir and trap.
+trap "rmdir '$lockdir'; exit 1" 1 2 15
+# Run the compile.
+"$@"
+ret=$?
+if test -f "$cofile"; then
+  test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
+elif test -f "${cofile}bj"; then
+  test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
+fi
+rmdir "$lockdir"
+exit $ret
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
--- a/src/C/fuzzy/config.guess
+++ b/src/C/fuzzy/config.guess
--- a/src/C/fuzzy/config.h.in
+++ b/src/C/fuzzy/config.h.in
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+/* Define to 1 if you have the <dirent.h> header file. */
+#undef HAVE_DIRENT_H
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
+#undef HAVE_FSEEKO
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+/* Define to 1 if you have the <libgen.h> header file. */
+#undef HAVE_LIBGEN_H
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+/* Define to 1 if you have the <sys/disk.h> header file. */
+#undef HAVE_SYS_DISK_H
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+#undef HAVE_SYS_IOCTL_H
+/* Define to 1 if you have the <sys/mount.h> header file. */
+#undef HAVE_SYS_MOUNT_H
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+/* Define to 1 if you have the <wchar.h> header file. */
+#undef HAVE_WCHAR_H
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LT_OBJDIR
+/* Name of package */
+#undef PACKAGE
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+/* Version number of package */
+#undef VERSION
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
+/* Enable large inode numbers on Mac OS X 10.5.  */
+#ifndef _DARWIN_USE_64_BIT_INODE
+# define _DARWIN_USE_64_BIT_INODE 1
+#endif
+/* Number of bits in a file offset, on hosts where this is settable. */
+#undef _FILE_OFFSET_BITS
+/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
+#undef _LARGEFILE_SOURCE
+/* Define for large files, on AIX-style hosts. */
+#undef _LARGE_FILES
+/* Linux operating system functions */
+#undef __LINUX__
--- a/src/C/fuzzy/config.sub
+++ b/src/C/fuzzy/config.sub
--- a/src/C/fuzzy/configure
+++ b/src/C/fuzzy/configure
--- a/src/C/fuzzy/configure.ac
+++ b/src/C/fuzzy/configure.ac
+AC_INIT([SSDEEP],[2.10],[research@jessekornblum.com])
+AM_INIT_AUTOMAKE
+AC_CONFIG_FILES([Makefile])
+AM_CONFIG_HEADER([config.h])
+AC_CANONICAL_HOST
+AC_PROG_CC
+AC_PROG_CXX
+AC_LIBTOOL_WIN32_DLL
+#AC_PROG_LIBTOOL
+AM_PROG_LIBTOOL
+AC_PROG_INSTALL
+AC_CONFIG_MACRO_DIR([m4])
+case $host in
+  *-*-*linux*-*) AC_DEFINE([__LINUX__],1,[Linux operating system functions]) ;;
+  *-*-mingw32)   LIBS="-liberty $LIBS" && CPPFLAGS="-DUNICODE -D_UNICODE $CPPFLAGS"
+esac
+# Bring additional directories where things might be found into our
+# search path. I don't know why autoconf doesn't do this by default
+for spfx in /usr/local /opt/local /sw ; do
+    echo checking ${spfx}/include
+    if test -d ${spfx}/include; then
+        CPPFLAGS="-I${spfx}/include $CPPFLAGS"
+        LDFLAGS="-L${spfx}/lib $LDFLAGS"
+    fi
+done
+AC_C_BIGENDIAN
+AC_SYS_LARGEFILE
+AC_CHECK_HEADERS([libgen.h])
+AC_CHECK_HEADERS([dirent.h])
+AC_CHECK_HEADERS([inttypes.h])
+AC_CHECK_HEADERS([fcntl.h sys/types.h sys/ioctl.h sys/param.h wchar.h unistd.h sys/stat.h sys/disk.h])
+AC_CHECK_HEADER([inttypes.h],,AC_MSG_ERROR([You must have inttypes.h or some other C99 equivalent]),)
+# These includes are required on FreeBSD
+AC_CHECK_HEADERS([sys/mount.h],[],[],
+[#ifdef HAVE_SYS_TYPES_H
+ # include <sys/types.h>
+ #endif
+ #ifdef HAVE_SYS_PARAM_H
+ # include <sys/param.h>
+ #endif])
+AC_FUNC_FSEEKO
+AC_OUTPUT
--- a/src/C/fuzzy/cycles.cpp
+++ b/src/C/fuzzy/cycles.cpp
+/* MD5DEEP
+ *
+ * By Jesse Kornblum
+ *
+ * This is a work of the US Government. In accordance with 17 USC 105,
+ * copyright protection is not available for any work of the US Government.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+// $Id: cycles.cpp 184 2013-07-10 05:24:26Z jessekornblum $
+#include "ssdeep.h"
+typedef struct dir_table {
+  TCHAR *name;
+  struct dir_table *next;
+} dir_table;
+dir_table *my_table = NULL;
+/* This function was used in the dark ages for debugging
+static void dump_table(void)
+{
+  struct dir_table *t = my_table;
+  while (t != NULL)
+  {
+    print_status (_TEXT("* %s"), t->name);
+    t = t->next;
+  }
+  print_status ("-- end of table --");
+}
+*/
+int done_processing_dir(TCHAR *fn)
+{
+  dir_table *last, *temp;
+  TCHAR *d_name = (TCHAR *)malloc(sizeof(TCHAR) * SSDEEP_PATH_MAX);
+#ifdef _WIN32
+  _wfullpath(d_name,fn,SSDEEP_PATH_MAX);
+#else
+  realpath(fn,d_name);
+#endif
+  if (my_table == NULL)
+  {
+    internal_error("Table is NULL in done_processing_dir");
+    // This code never gets executed... 
+    free(d_name);
+    return FALSE;
+  }
+  temp = my_table;
+  if (!_tcsncmp(d_name,temp->name,SSDEEP_PATH_MAX))
+  {
+    my_table = my_table->next;
+    free(temp->name);
+    free(temp);
+    free(d_name);
+    return TRUE;
+  }
+  while (temp->next != NULL)
+  {
+    last = temp;
+    temp = temp->next;
+    if (!_tcsncmp(d_name,temp->name,SSDEEP_PATH_MAX))
+    {
+      last->next = temp->next;
+      free(temp->name);
+      free(temp);
+      free(d_name);
+      return TRUE;
+    }
+  }
+  internal_error("%s: Directory %s not found in done_processing_dir",
+		 __progname, d_name);
+  // This code never gets executed... 
+  //  free (d_name);
+  return FALSE;
+}
+int processing_dir(TCHAR *fn)
+{
+  dir_table *new_dir, *temp;
+  TCHAR *d_name = (TCHAR *)malloc(sizeof(TCHAR) * SSDEEP_PATH_MAX);
+#ifdef _WIN32
+  _wfullpath(d_name,fn,SSDEEP_PATH_MAX);
+#else
+  realpath(fn,d_name);
+#endif
+  if (my_table == NULL)
+  {
+    my_table = (dir_table*)malloc(sizeof(dir_table));
+    my_table->name = _tcsdup(d_name);
+    my_table->next = NULL;
+    free(d_name);
+    return TRUE;
+  }
+  temp = my_table;
+  while (temp->next != NULL)
+  {
+    /* We should never be adding a directory that is already here */
+    if (!_tcsncmp(temp->name,d_name,SSDEEP_PATH_MAX))
+    {
+      internal_error("%s: Attempt to add existing %s in processing_dir",
+		     __progname, d_name);
+      // Does not execute
+      free(d_name);
+      return FALSE;
+    }
+    temp = temp->next;       
+  }
+  new_dir = (dir_table*)malloc(sizeof(dir_table));
+  new_dir->name = _tcsdup(d_name);
+  new_dir->next = NULL;  
+  temp->next = new_dir;
+  free(d_name);
+  return TRUE;
+}
+int have_processed_dir(TCHAR *fn)
+{
+  dir_table *temp;
+  TCHAR *d_name;
+  if (my_table == NULL)
+    return FALSE;
+  d_name = (TCHAR *)malloc(sizeof(TCHAR) * SSDEEP_PATH_MAX);
+#ifdef _WIN32
+  _wfullpath(d_name,fn,SSDEEP_PATH_MAX);
+#else
+  realpath(fn,d_name);
+#endif
+  temp = my_table;
+  while (temp != NULL)
+  {
+    if (!_tcsncmp(temp->name,d_name,SSDEEP_PATH_MAX))
+    {
+      free(d_name);
+      return TRUE;
+    }
+    temp = temp->next;       
+  }
+  free(d_name);
+  return FALSE;
+}
--- a/src/C/fuzzy/depcomp
+++ b/src/C/fuzzy/depcomp
--- a/src/C/fuzzy/dig.cpp
+++ b/src/C/fuzzy/dig.cpp
--- a/src/C/fuzzy/edit_dist.c
+++ b/src/C/fuzzy/edit_dist.c
+/*
+  This edit distance code is taken from trn3.6. A few minor
+  modifications have been made by Andrew Tridgell <tridge@samba.org>
+  for use in spamsum.
+*/
+/***************************************************************************/
+/* The authors make no claims as to the fitness or correctness of this software
+ * for any use whatsoever, and it is provided as is. Any use of this software
+ * is at the user's own risk. 
+ */
+#include <stdio.h>
+#include <stdlib.h>
+/* edit_dist -- returns the minimum edit distance between two strings
+	Program by:  Mark Maimone   CMU Computer Science   13 Nov 89
+	Last Modified:  28 Jan 90
+   If the input strings have length n and m, the algorithm runs in time
+   O(nm) and space O(min(m,n)).
+HISTORY
+   13 Nov 89 (mwm) Created edit_dist() and set_costs().
+   28 Jan 90 (mwm) Added view_costs().  Should verify that THRESHOLD
+   computations will work even when THRESHOLD is not a multiple of
+   sizeof(int).
+   17 May 93 (mwm) Improved performance when used with trn's newsgroup
+   processing; assume all costs are 1, and you can terminate when a
+   threshold is exceeded.
+*/
+#define MIN_DIST 100
+#define	TRN_SPEEDUP		/* Use a less-general version of the
+				   routine, one that's better for trn.
+				   All change costs are 1, and it's okay
+				   to terminate if the edit distance is
+				   known to exceed MIN_DIST */
+#define THRESHOLD 4000		/* worry about allocating more memory only
+				   when this # of bytes is exceeded */
+#define STRLENTHRESHOLD ((int) ((THRESHOLD / sizeof (int) - 3) / 2))
+#define SAFE_ASSIGN(x,y) (((x) != NULL) ? (*(x) = (y)) : (y))
+#define swap_int(x,y)  do { int _iswap = (x); (x) = (y); (y) = _iswap; } while (0)
+#define swap_char(x,y) do { const char *_cswap = (x); (x) = (y); (y) = _cswap; } while (0)
+static inline int min3(int x, int y, int z) {
+	return x < y ? (x < z ? x : z) : (z < y) ? z : y;
+}
+static inline int min2(int x, int y)
+{
+	return x < y ? x : y;
+}
+static int insert_cost = 1;
+static int delete_cost = 1;
+#ifndef TRN_SPEEDUP
+static int change_cost = 1;
+static int swap_cost   = 1;
+#endif
+/* edit_distn -- returns the edit distance between two strings, or -1 on
+   failure */
+int
+edit_distn(const char *from, int from_len, const char *to, int to_len)
+{
+#ifndef TRN_SPEEDUP
+    register int ins, del, ch;	  	/* local copies of edit costs */
+#endif
+    register int row, col, index;	/* dynamic programming counters */
+    register int radix;			/* radix for modular indexing */
+#ifdef TRN_SPEEDUP
+    register int low;
+#endif
+    int *buffer;			/* pointer to storage for one row
+					   of the d.p. array */
+    int store[THRESHOLD / sizeof (int)];
+					/* a small amount of static
+					   storage, to be used when the
+					   input strings are small enough */
+/* Handle trivial cases when one string is empty */
+    if (from == NULL || !from_len)
+	if (to == NULL || !to_len)
+	    return 0;
+	else
+	    return to_len * insert_cost;
+    else if (to == NULL || !to_len)
+	return from_len * delete_cost;
+/* Initialize registers */
+    radix = 2 * from_len + 3;
+#ifdef TRN_SPEEDUP
+#define ins 1
+#define del 1
+#define ch 3
+#define swap_cost 5
+#else
+    ins  = insert_cost;
+    del  = delete_cost;
+    ch   = change_cost;
+#endif
+/* Make   from   short enough to fit in the static storage, if it's at all
+   possible */
+    if (from_len > to_len && from_len > STRLENTHRESHOLD) {
+	swap_int(from_len, to_len);
+	swap_char(from, to);
+#ifndef TRN_SPEEDUP
+	swap_int(ins, del);
+#endif
+    } /* if from_len > to_len */
+/* Allocate the array storage (from the heap if necessary) */
+    if (from_len <= STRLENTHRESHOLD)
+	buffer = store;
+    else
+	buffer = (int *) malloc(radix * sizeof (int));
+/* Here's where the fun begins.  We will find the minimum edit distance
+   using dynamic programming.  We only need to store two rows of the matrix
+   at a time, since we always progress down the matrix.  For example,
+   given the strings "one" and "two", and insert, delete and change costs
+   equal to 1:
+	   _  o  n  e
+	_  0  1  2  3
+	t  1  1  2  3
+	w  2  2  2  3
+	o  3  2  3  3
+   The dynamic programming recursion is defined as follows:
+	ar(x,0) := x * insert_cost
+	ar(0,y) := y * delete_cost
+	ar(x,y) := min(a(x - 1, y - 1) + (from[x] == to[y] ? 0 : change),
+		       a(x - 1, y) + insert_cost,
+		       a(x, y - 1) + delete_cost,
+		       a(x - 2, y - 2) + (from[x] == to[y-1] &&
+					  from[x-1] == to[y] ? swap_cost :
+					  infinity))
+   Since this only looks at most two rows and three columns back, we need
+   only store the values for the two preceeding rows.  In this
+   implementation, we do not explicitly store the zero column, so only 2 *
+   from_len + 2   words are needed.  However, in the implementation of the
+   swap_cost   check, the current matrix value is used as a buffer; we
+   can't overwrite the earlier value until the   swap_cost   check has
+   been performed.  So we use   2 * from_len + 3   elements in the buffer.
+*/
+#define ar(x,y,index) (((x) == 0) ? (y) * del : (((y) == 0) ? (x) * ins : \
+	buffer[mod(index)]))
+#define NW(x,y)	  ar(x, y, index + from_len + 2)
+#define N(x,y)	  ar(x, y, index + from_len + 3)
+#define W(x,y)	  ar(x, y, index + radix - 1)
+#define NNWW(x,y) ar(x, y, index + 1)
+#define mod(x) ((x) % radix)
+    index = 0;
+#ifdef DEBUG_EDITDIST
+    printf("      ");
+    for (col = 0; col < from_len; col++)
+	printf(" %c ", from[col]);
+    printf("\n   ");
+    for (col = 0; col <= from_len; col++)
+	printf("%2d ", col * del);
+#endif
+/* Row 0 is handled implicitly; its value at a given column is   col*del.
+   The loop below computes the values for Row 1.  At this point we know the
+   strings are nonempty.  We also don't need to consider swap costs in row
+   1.
+   COMMENT:  the indicies   row and col   below point into the STRING, so
+   the corresponding MATRIX indicies are   row+1 and col+1.
+*/
+    buffer[index++] = min2(ins + del, (from[0] == to[0] ? 0 : ch));
+#ifdef TRN_SPEEDUP
+    low = buffer[mod(index + radix - 1)];
+#endif
+#ifdef DEBUG_EDITDIST
+    printf("\n %c %2d %2d ", to[0], ins, buffer[index - 1]);
+#endif
+    for (col = 1; col < from_len; col++) {
+	buffer[index] = min3(
+		col * del + ((from[col] == to[0]) ? 0 : ch),
+		(col + 1) * del + ins,
+		buffer[index - 1] + del);
+#ifdef TRN_SPEEDUP
+	if (buffer[index] < low)
+	    low = buffer[index];
+#endif
+	index++;
+#ifdef DEBUG_EDITDIST
+	printf("%2d ", buffer[index - 1]);
+#endif
+    } /* for col = 1 */
+#ifdef DEBUG_EDITDIST
+    printf("\n %c %2d ", to[1], 2 * ins);
+#endif
+/* Now handle the rest of the matrix */
+    for (row = 1; row < to_len; row++) {
+	for (col = 0; col < from_len; col++) {
+	    buffer[index] = min3(
+		    NW(row, col) + ((from[col] == to[row]) ? 0 : ch),
+		    N(row, col + 1) + ins,
+		    W(row + 1, col) + del);
+	    if (from[col] == to[row - 1] && col > 0 &&
+		    from[col - 1] == to[row])		    
+		buffer[index] = min2(buffer[index],
+			NNWW(row - 1, col - 1) + swap_cost);
+#ifdef DEBUG_EDITDIST
+	    printf("%2d ", buffer[index]);
+#endif
+#ifdef TRN_SPEEDUP
+	    if (buffer[index] < low || col == 0)
+		low = buffer[index];
+#endif
+	    index = mod(index + 1);
+	} /* for col = 1 */
+#ifdef DEBUG_EDITDIST
+	if (row < to_len - 1)
+	    printf("\n %c %2d ", to[row+1], (row + 2) * ins);
+	else
+	    printf("\n");
+#endif
+#ifdef TRN_SPEEDUP
+	if (low > MIN_DIST)
+	    break;
+#endif
+    } /* for row = 1 */
+    row = buffer[mod(index + radix - 1)];
+    if (buffer != store)
+	free((char *) buffer);
+    return row;
+} /* edit_distn */
--- a/src/C/fuzzy/engine.cpp
+++ b/src/C/fuzzy/engine.cpp
+// $Id: engine.cpp 184 2013-07-10 05:24:26Z jessekornblum $ 
+#include "main.h"
+#include "ssdeep.h"
+#include "match.h"
+#define MAX_STATUS_MSG   78
+bool display_result(state *s, const TCHAR * fn, const char * sum)
+{
+  // Only spend the extra time to make a Filedata object if we need to
+  if (MODE(mode_match_pretty) or MODE(mode_match) or MODE(mode_directory))
+  {
+    Filedata * f;
+    try 
+    {
+      f = new Filedata(fn, sum);
+    } 
+    catch (std::bad_alloc)
+    {
+      fatal_error("%s: Unable to create Filedata object in engine.cpp:display_result()", __progname);
+    }
+    if (MODE(mode_match_pretty)) 
+    {
+      if (match_add(s,f))
+	print_error_unicode(s,fn,"Unable to add hash to set of known hashes");
+    }
+    else
+    {
+      // This block is for MODE(mode_match) or MODE(mode_directory)
+      match_compare(s,f);
+      if (MODE(mode_directory))
+	if (match_add(s,f))
+	  print_error_unicode(s,
+			      fn,
+			      "Unable to add hash to set of known hashes");
+    }
+  }
+  else
+  {
+    // No special options selected. Display the hash for this file
+    if (s->first_file_processed)
+    {
+      print_status("%s", OUTPUT_FILE_HEADER);
+      s->first_file_processed = false;
+    }
+    printf ("%s,\"", sum);
+    display_filename(stdout,fn,TRUE);
+    print_status("\"");
+  }
+  return false;
+}
+int hash_file(state *s, TCHAR *fn)
+{
+  size_t fn_length;
+  char *sum;
+  TCHAR *my_filename, *msg;
+  FILE *handle;
+#ifdef WIN32  
+  TCHAR expanded_fn[SSDEEP_PATH_MAX];
+  if (not expanded_path(fn)) {
+    _sntprintf(expanded_fn, 
+	       SSDEEP_PATH_MAX,
+	       _TEXT("\\\\?\\%s"), 
+	       fn);
+  } else {
+    _tcsncpy(expanded_fn, fn, SSDEEP_PATH_MAX);
+  }
+  handle = _tfopen(expanded_fn, _TEXT("rb"));
+# else
+  handle = fopen(fn, "rb");
+#endif
+  if (NULL == handle)
+  {
+    print_error_unicode(s,fn,"%s", strerror(errno));
+    return TRUE;
+  }
+  if ((sum = (char *)malloc(sizeof(char) * FUZZY_MAX_RESULT)) == NULL)
+  {
+    fclose(handle);
+    print_error_unicode(s,fn,"%s", strerror(errno));
+    return TRUE;
+  }
+  if ((msg = (TCHAR *)malloc(sizeof(TCHAR) * (MAX_STATUS_MSG + 2))) == NULL)
+  {
+    free(sum);
+    fclose(handle);
+    print_error_unicode(s,fn,"%s", strerror(errno));
+    return TRUE;
+  }
+  if (MODE(mode_verbose))
+  {
+    fn_length = _tcslen(fn);
+    if (fn_length > MAX_STATUS_MSG)
+    {
+      // We have to make a duplicate of the string to call basename on it
+      // We need the original name for the output later on
+      my_filename = _tcsdup(fn);
+      my_basename(my_filename);
+    }
+    else
+      my_filename = fn;
+    _sntprintf(msg,
+	       MAX_STATUS_MSG-1,
+	       _TEXT("Hashing: %s%s"), 
+	       my_filename, 
+	       _TEXT(BLANK_LINE));
+    _ftprintf(stderr,_TEXT("%s\r"), msg);
+    if (fn_length > MAX_STATUS_MSG)
+      free(my_filename);
+  }
+  fuzzy_hash_file(handle,sum);
+  prepare_filename(s,fn);
+  display_result(s,fn,sum);
+  if (find_file_size(handle) > SSDEEP_MIN_FILE_SIZE)
+    s->found_meaningful_file = true;
+  s->processed_file = true;
+  fclose(handle);
+  free(sum);
+  free(msg);
+  return FALSE;
+}
--- a/src/C/fuzzy/filedata.cpp
+++ b/src/C/fuzzy/filedata.cpp
+// SSDEEP
+// $Id: filedata.cpp 163 2012-07-17 19:59:54Z jessekornblum $
+// Copyright (C) 2012 Kyrus. See COPYING for details.
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "filedata.h"
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+bool Filedata::valid(void) const
+{
+  // A valid fuzzy hash has the form
+  // [blocksize]:[sig1]:[sig2]
+  // with no filename at the end
+  // First find the block size
+  const char * sig = m_signature.c_str();
+  unsigned int block_size;
+  if (-1 == sscanf(sig, "%u:", &block_size))
+    return false;
+  // Move past the blocksize
+  sig = strchr(sig,':');
+  if (!sig)
+    return false;
+  // Move past the first colon and Look for the second colon
+  ++sig;
+  sig = strchr(sig,':');
+  if (!sig)
+    return false;
+  // Finally, a valid signature does *not* have a filename at the end of it
+  sig = strchr(sig,',');
+  if (sig)
+    return false;
+  return true;
+}
+void Filedata::clear_cluster(void)
+{
+  if (NULL == m_cluster)
+    return;
+  // We don't want to call the destructors on the individual elements
+  // so we have to clear the set first.
+  m_cluster->clear();
+  m_cluster = NULL;
+}
+Filedata::Filedata(const TCHAR *fn, const char * sig, const char * match_file)
+{
+  m_signature = std::string(sig);
+  if (not valid())
+    throw std::bad_alloc();
+  m_filename = _tcsdup(fn);
+  m_cluster  = NULL;
+  if (NULL == match_file)
+    m_has_match_file = false;
+  else
+  {
+    m_has_match_file = true;
+    m_match_file = std::string(match_file);
+  }
+}
+Filedata::Filedata(const std::string sig, const char * match_file)
+{
+  // Set the easy stuff first
+  m_cluster = NULL;
+  if (NULL == match_file)
+    m_has_match_file = false;
+  else
+  {
+    m_has_match_file = true;
+    m_match_file = std::string(match_file);
+  }
+  // If we don't have a filename included with the sig, that's ok,
+  // but we should find out now.
+  // If there is a filename, it should be immediately after the
+  // first comma and enclosed in quotation marks.
+  size_t start, stop;
+  start = sig.find_first_of(",\"");
+  if (std::string::npos == start)
+  {
+    // There is no filename. Ok. We still have a valid Filedata.
+    m_filename  = _tcsdup(_TEXT("[NO FILENAME]"));
+    m_signature = std::string(sig);
+    // We still have to check the validity of the signature
+    if (not valid())
+      throw std::bad_alloc();
+    return;
+  }
+  // There is a filename. Ok.
+  // Advance past the comma and quotation mark.
+  start += 2;
+  // Look for the second quotation mark, which should be at the end
+  // of the string. 
+  stop = sig.find_last_of('"');
+  if (stop != sig.size() - 1)
+    throw std::bad_alloc();
+  // Strip off the final quotation mark and record the filename
+  std::string tmp = sig.substr(start,(stop - start));
+  // Strip off the filename from the signature. Remember that "start"
+  // now points to two characters ahead of the comma
+  m_signature = sig.substr(0,start-2);
+  // Unescape any quotation marks in the filename
+  while (tmp.find(std::string("\\\"")) != std::string::npos)
+    tmp.replace(tmp.find(std::string("\\\"")),2,std::string("\""));
+#ifndef _WIN32
+  m_filename = strdup(tmp.c_str());
+#else
+  char * tmp2 = strdup(tmp.c_str());
+  // On Win32 we have to do a kludgy cast from ordinary char 
+  // values to the TCHAR values we use internally. Because we may have
+  // reset the string length, get it again.
+  // The extra +1 is for the terminating newline
+  size_t i, sz = strlen(tmp2);
+  m_filename = (TCHAR *)malloc(sizeof(TCHAR) * (sz + 1));
+  if (NULL == m_filename)
+    throw std::bad_alloc();
+  for (i = 0 ; i < sz ; i++)
+    m_filename[i] = (TCHAR)(tmp2[i]);
+  m_filename[i] = 0;
+#endif
+}
+std::ostream& operator<<(std::ostream& o, const Filedata& f)
+{
+  return o << f.get_signature() << "," << f.get_filename() << ",";
+}
+bool operator==(const Filedata& a, const Filedata& b)
+{
+  if (a.get_signature() != b.get_signature())
+    return false;
+  if (a.has_match_file() and not b.has_match_file())
+    return false;
+  if (not a.has_match_file() and b.has_match_file())
+    return false;
+  if (a.has_match_file() and b.has_match_file())
+  {
+    if (a.get_match_file() != b.get_match_file())
+      return false;
+  }
+  return true;
+}
--- a/src/C/fuzzy/filedata.h
+++ b/src/C/fuzzy/filedata.h
+#ifndef __FILEDATA_H
+#define __FILEDATA_H
+/// @file filedata.h
+// Copyright (C) 2012 Kyrus. See COPYING for details
+// $Id: filedata.h 160 2012-07-17 01:00:07Z jessekornblum $
+#include <set>
+#include <string>
+#include <iostream>
+#include "tchar-local.h"
+/// Contains a fuzzy hash and associated metadata for file
+class Filedata
+{
+ public:
+ Filedata() : m_has_match_file(false) {}
+  /// Creates a new Filedata object with the given filename and signature
+  ///
+  /// If sig is not valid, throws std::bad_alloc
+  Filedata(const TCHAR * fn, const char * sig, const char * match_file = NULL);
+  /// Creates a new Filedata object with the given filename and signature
+  ///
+  /// If sig is not valid, throws std::bad_alloc
+  Filedata(const std::string sig, const char * match_file = NULL);
+  /// Returns the file's fuzzy hash without a filename. 
+  /// std::string("[blocksize]:[sig1]:[sig2]")
+  std::string get_signature(void) const { return m_signature; }
+  /// Returns the file's name
+  /// RBF - Should this be a std::wstring?
+  TCHAR * get_filename(void) const { return m_filename; }
+  /// Returns true if this file came from a file of known files on the disk
+  bool has_match_file(void) const { return m_has_match_file; }
+  /// Returns the name of the file on the disk from which this file came
+  /// RBF - Should this be a std::wstring?
+  std::string get_match_file(void) const { return m_match_file; }
+  /// Returns true if this file belongs to a cluster of similar files
+  bool has_cluster(void) const { return (m_cluster != NULL); }
+  void set_cluster(std::set<Filedata *> *c) { m_cluster = c; }
+  std::set<Filedata* >* get_cluster(void) const { return m_cluster; }
+  void clear_cluster(void);
+ private:
+  std::set<Filedata *> * m_cluster;
+  /// Original signature in the form [blocksize]:[sig1]:[sig2]
+  /// It may also contain the filename, but there is no guarantee of that
+  /// one way or the other.
+  std::string m_signature;
+  /// RBF - Should this be a std::wstring?
+  TCHAR * m_filename;
+  /// File of hashes where we got this known file from, if any
+  std::string m_match_file;
+  bool m_has_match_file;
+  /// Returns true if the m_signature field contains a valid fuzzy hash
+  bool valid(void) const;
+};
+/// Display [blocksize]:[sig1]:[sig2],"filename"
+std::ostream& operator<<(std::ostream& o, const Filedata& f);
+/// RBF - We can use this IF AND ONLY IF get_filename() returns a std::wstring
+//bool operator==(const Filedata& a, const Filedata& b);
+#endif  // ifndef __FILEDATA_H
--- a/src/C/fuzzy/find-file-size.c
+++ b/src/C/fuzzy/find-file-size.c
+// Fuzzy Hashing by Jesse Kornblum
+// Copyright (C) 2012 Kyrus
+// Copyright (C) 2008 ManTech International Corporation
+//
+// $Id: find-file-size.c 144 2012-04-24 14:59:33Z jessekornblum $ 
+//
+#include "main.h"
+#ifndef _WIN32
+// Return the size, in bytes of an open file stream. On error, return 0 
+#if defined (__LINUX__)
+off_t find_file_size(FILE *f) 
+{
+  off_t num_sectors = 0, sector_size = 0;
+  int fd = fileno(f);
+  struct stat sb;
+  if (fstat(fd,&sb))
+    return 0;
+  if (S_ISREG(sb.st_mode) || S_ISDIR(sb.st_mode))
+    return sb.st_size;
+#ifdef HAVE_SYS_IOCTL_H
+#ifdef HAVE_SYS_MOUNT_H
+  if (S_ISCHR(sb.st_mode) || S_ISBLK(sb.st_mode))
+  {
+#if defined(_IO) && defined(BLKGETSIZE)
+    if (ioctl(fd, BLKGETSIZE, &num_sectors))
+    {
+      return 0;
+    }
+#else
+    // If we can't run the ioctl call, we can't do anything here
+    return 0;
+#endif // ifdefined _IO and BLKGETSIZE
+#if defined(_IO) && defined(BLKSSZGET)
+    if (ioctl(fd, BLKSSZGET, &sector_size))
+    {
+      return 0;
+    }
+    if (0 == sector_size)
+      sector_size = 512;
+#else
+    sector_size = 512;
+#endif  // ifdef _IO and BLKSSZGET
+    return (num_sectors * sector_size);
+  }
+#endif // #ifdef HAVE_SYS_MOUNT_H
+#endif // #ifdef HAVE_SYS_IOCTL_H
+  return 0;
+}  
+#elif defined (__APPLE__)
+off_t find_file_size(FILE *f) {
+  struct stat info;
+  off_t total = 0;
+  off_t original = ftello(f);
+  int fd = fileno(f);
+  uint32_t blocksize = 0;
+  uint64_t blockcount = 0;
+  // I'd prefer not to use fstat as it will follow symbolic links. We don't
+  // follow symbolic links. That being said, all symbolic links *should*
+  // have been caught before we got here. 
+  if (fstat(fd, &info))
+  {
+    return 0;
+  }
+#ifdef HAVE_SYS_IOCTL_H
+  // Block devices, like /dev/hda, don't return a normal filesize.
+  // If we are working with a block device, we have to ask the operating
+  // system to tell us the true size of the device. 
+  //
+  // This isn't the recommended way to do check for block devices, 
+  // but using S_ISBLK(info.stmode) wasn't working. 
+  if (info.st_mode & S_IFBLK)
+  {    
+    // Get the block size 
+    if (ioctl(fd, DKIOCGETBLOCKSIZE,&blocksize) < 0) 
+    {
+      return 0;
+    } 
+    // Get the number of blocks
+    if (ioctl(fd, DKIOCGETBLOCKCOUNT, &blockcount) < 0) 
+    {
+    }
+    total = blocksize * blockcount;
+  }
+#endif     // ifdef HAVE_IOCTL_H
+  else 
+  {
+    if ((fseeko(f,0,SEEK_END)))
+      return 0;
+    total = ftello(f);
+    if ((fseeko(f,original,SEEK_SET)))
+      return 0;
+  }
+  return (total - original);
+}
+#else   // ifdef __APPLE__
+// This is code for general UNIX systems 
+// (e.g. NetBSD, FreeBSD, OpenBSD, etc) 
+static off_t
+midpoint (off_t a, off_t b, long blksize)
+{
+  off_t aprime = a / blksize;
+  off_t bprime = b / blksize;
+  off_t c, cprime;
+  cprime = (bprime - aprime) / 2 + aprime;
+  c = cprime * blksize;
+  return c;
+}
+off_t find_dev_size(int fd, int blk_size)
+{
+  off_t curr = 0, amount = 0;
+  void *buf;
+  if (blk_size == 0)
+    return 0;
+  buf = malloc(blk_size);
+  for (;;) {
+    ssize_t nread;
+    lseek(fd, curr, SEEK_SET);
+    nread = read(fd, buf, blk_size);
+    if (nread < blk_size) 
+    {
+      if (nread <= 0) 
+	{
+	  if (curr == amount) 
+	  {
+	    free(buf);
+	    lseek(fd, 0, SEEK_SET);
+	    return amount;
+	  }
+	  curr = midpoint(amount, curr, blk_size);
+	}
+      else 
+	{ // 0 < nread < blk_size 
+	  free(buf);
+	  lseek(fd, 0, SEEK_SET);
+	  return amount + nread;
+	}
+    } 
+    else 
+    {
+      amount = curr + blk_size;
+      curr = amount * 2;
+    }
+  }
+  free(buf);
+  lseek(fd, 0, SEEK_SET);
+  return amount;
+}
+off_t find_file_size(FILE *f) 
+{
+  int fd = fileno(f);
+  struct stat sb;
+  if (fstat(fd,&sb))
+    return 0;
+  if (S_ISREG(sb.st_mode) || S_ISDIR(sb.st_mode))
+    return sb.st_size;
+  else if (S_ISCHR(sb.st_mode) || S_ISBLK(sb.st_mode))
+    return find_dev_size(fd,sb.st_blksize);
+  return 0;
+}  
+#endif // ifdef __LINUX__
+#endif // ifndef _WIN32
+#if defined(_WIN32)
+off_t find_file_size(FILE *f) 
+{
+  off_t total = 0, original = ftello(f);
+  // Windows does not support running fstat on block devices,
+  // so there's no point in mucking about with them. 
+  if ((fseeko(f,0,SEEK_END)))
+    return 0;
+  total = ftello(f);
+  if ((fseeko(f,original,SEEK_SET)))
+    return 0;
+  return total;
+}
+#endif // ifdef _WIN32 
--- a/src/C/fuzzy/fuzzy.c
+++ b/src/C/fuzzy/fuzzy.c
--- a/src/C/fuzzy/fuzzy.h
+++ b/src/C/fuzzy/fuzzy.h
+/*
+ * Copyright (C) ManTech International Corporation 2010
+ * Copyright (C) Kyrus 2012
+ * Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
+ *
+ * $Id: fuzzy.h 180 2013-06-10 23:24:26Z jessekornblum $
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Earlier versions of this code can be found at:
+ *     http://ssdeep.sf.net/
+ */
+/// \mainpage
+/// This is the documentation for the fuzzy hashing API from ssdeep.
+///
+/// There is a complete function reference in fuzzy.h.
+///
+/// The most recent version of this documentation can be found
+/// at http://ssdeep.sourceforge.net/.
+///
+/// \copydoc fuzzy.h
+///
+/// \version 3.0
+///
+/// \author Jesse Kornblum, research@jessekornblum.com
+/// \author Helmut Grohne, helmut@subdivi.de
+/// \file fuzzy.h
+/// \brief
+/// These functions allow a programmer to compute the fuzzy hashes
+/// (also called the context-triggered piecewise hashes) of
+/// \link fuzzy_hash_buf() a buffer
+/// of text @endlink,
+/// \link fuzzy_hash_filename() the contents of a file on the disk @endlink,
+/// and
+/// @link fuzzy_hash_file() the contents of
+/// an open file handle @endlink .
+/// There is also a function to
+/// @link fuzzy_compare() compute the
+/// similarity between any two fuzzy signatures @endlink.
+#include <stdint.h>
+#include <stdio.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifndef FUZZY_H
+#define FUZZY_H
+/**
+ * @brief fuzzy_digest flag indicating to eliminate sequences of more than
+ *        three identical characters
+ */
+#define FUZZY_FLAG_ELIMSEQ 0x1u
+/**
+ * @brief fuzzy_digest flag indicating not to truncate the second part to
+ *        SPAMSUM_LENGTH/2 characters.
+ */
+#define FUZZY_FLAG_NOTRUNC 0x2u
+struct fuzzy_state;
+/**
+ * @brief Construct a fuzzy_state object and return it.
+ *
+ * To use it call fuzzy_update and fuzzy_digest on it. It must be disposed
+ * with fuzzy_free.
+ * @return the constructed fuzzy_state or NULL on failure
+ */
+extern /*@only@*/ /*@null@*/ struct fuzzy_state *fuzzy_new(void);
+/**
+ * @brief Feed the data contained in the given buffer to the state.
+ *
+ * When an error occurs, the state is undefined. In that case it must not be
+ * passed to any function besides fuzzy_free.
+ * @param buffer The data to be hashes
+ * @param buffer_size The length of the given buffer
+ * @return zero on success, non-zero on error
+ */
+extern int fuzzy_update(struct fuzzy_state *state, 
+			const unsigned char *buffer,
+			size_t buffer_size);
+/**
+ * @brief Obtain the fuzzy hash from the state.
+ *
+ * This operation does not change the state at all. It reports the hash for the
+ * concatenation of the data previously fed using fuzzy_update. 
+ * @param result Where the fuzzy hash is stored. This variable
+ * must be allocated to hold at least FUZZY_MAX_RESULT bytes.
+ * @param flags is a bitwise or of FUZZY_FLAG_* macros. The absence of flags is
+ * represented by a zero.
+ * @return zero on success, non-zero on error
+ */
+extern int fuzzy_digest(const struct fuzzy_state *state,
+			/*@out@*/ char *result, 
+			unsigned int flags);
+/**
+ * @brief Dispose a fuzzy state.
+ */
+extern void fuzzy_free(/*@only@*/ struct fuzzy_state *state);
+/**
+ * @brief Compute the fuzzy hash of a buffer
+ *
+ * The computes the fuzzy hash of the first buf_len bytes of the buffer.
+ * It is the caller's responsibility to append the filename,
+ * if any, to result after computation. 
+ * @param buf The data to be fuzzy hashed
+ * @param buf_len The length of the data being hashed
+ * @param result Where the fuzzy hash of buf is stored. This variable
+ * must be allocated to hold at least FUZZY_MAX_RESULT bytes.
+ * @return Returns zero on success, non-zero on error.
+ */
+extern int fuzzy_hash_buf(const unsigned char *buf, 
+			  uint32_t buf_len,
+			  /*@out@*/ char *result);
+/**
+ * @brief Compute the fuzzy hash of a file using an open handle
+ *
+ * Computes the fuzzy hash of the contents of the open file, starting
+ * at the beginning of the file. When finished, the file pointer is
+ * returned to its original position. If an error occurs, the file 
+ * pointer's value is undefined.
+ * It is the callers's responsibility to append the filename
+ * to the result after computation.
+ * @param handle Open handle to the file to be hashed
+ * @param result Where the fuzzy hash of the file is stored. This 
+ * variable must be allocated to hold at least FUZZY_MAX_RESULT bytes.
+ * @return Returns zero on success, non-zero on error
+ */
+extern int fuzzy_hash_file(FILE *handle, /*@out@*/ char *result);
+/**
+ * @brief Compute the fuzzy hash of a stream using an open handle
+ *
+ * Computes the fuzzy hash of the contents of the open stream, starting at the
+ * current file position until reaching EOF. Unlike fuzzy_hash_file the stream
+ * is never seeked. If an error occurs, the result as well as the file position
+ * are undefined.
+ * It is the callers's responsibility to append the filename
+ * to the result after computation.
+ * @param handle Open handle to the stream to be hashed
+ * @param result Where the fuzzy hash of the file is stored. This 
+ * variable must be allocated to hold at least FUZZY_MAX_RESULT bytes.
+ * @return Returns zero on success, non-zero on error
+ */
+extern int fuzzy_hash_stream(FILE *handle, /*@out@*/ char *result);
+/**
+ * @brief Compute the fuzzy hash of a file
+ *
+ * Opens, reads, and hashes the contents of the file 'filename' 
+ * The result must be allocated to hold FUZZY_MAX_RESULT characters. 
+ * It is the caller's responsibility to append the filename
+ * to the result after computation. 
+ * @param filename The file to be hashed
+ * @param result Where the fuzzy hash of the file is stored. This 
+ * variable must be allocated to hold at least FUZZY_MAX_RESULT bytes.
+ * @return Returns zero on success, non-zero on error. 
+ */
+extern int fuzzy_hash_filename(const char *filename, /*@out@*/ char * result);
+/// Computes the match score between two fuzzy hash signatures.
+/// @return Returns a value from zero to 100 indicating the
+/// match score of the 
+/// two signatures. A match score of zero indicates the sigantures
+/// did not match. When an error occurs, such as if one of the
+/// inputs is NULL, returns -1.
+extern int fuzzy_compare(const char *sig1, const char *sig2);
+/** Length of an individual fuzzy hash signature component. */
+#define SPAMSUM_LENGTH 64
+/** The longest possible length for a fuzzy hash signature
+ * (without the filename) */
+#define FUZZY_MAX_RESULT (2 * SPAMSUM_LENGTH + 20)
+#ifdef __cplusplus
+} 
+#endif
+#endif
--- a/src/C/fuzzy/helpers.cpp
+++ b/src/C/fuzzy/helpers.cpp
+// ssdeep
+// Copyright (C) 2012 Kyrus
+// Copyright (C) 2006 ManTech International Corporation
+//
+// $Id: helpers.cpp 184 2013-07-10 05:24:26Z jessekornblum $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#include "ssdeep.h"
+void try_msg(void)
+{
+  fprintf (stderr,"Try `%s -h` for more information.%s", __progname, NEWLINE);
+}
+bool expanded_path(TCHAR *p)
+{
+  if (_tcsncmp(p,_TEXT("\\\\?\\"),4))
+    return false;
+  return true;
+}
+void sanity_check(state *s, int condition, const char *msg)
+{
+  if (NULL == s)
+    exit(EXIT_FAILURE);
+  if (condition)
+    {
+      if (!(s->mode & mode_silent))
+	{
+	  print_status("%s: %s", __progname, msg);
+	  try_msg();
+	}
+      exit (EXIT_FAILURE);
+    }
+}
+// The basename function kept misbehaving on OS X, so I rewrote it.
+// This function isn't perfect, nor is it designed to be. Because
+// we're guaranteed to be working with a filename here, there's no way
+// that s will end with a DIR_SEPARATOR (e.g. /foo/bar/). This function
+// will not work properly for a string that ends in a DIR_SEPARATOR */
+int my_basename(TCHAR *s)
+{
+  size_t len;
+  TCHAR * tmp;
+  if (NULL == s)
+    return TRUE;
+  tmp = _tcsrchr(s,DIR_SEPARATOR);
+  if (NULL == tmp)
+    return FALSE;
+  len = _tcslen(tmp);
+  // We advance tmp one character to move us past the DIR_SEPARATOR
+  _tmemmove(s,tmp+1,len);
+  return FALSE;
+}
+int my_dirname(TCHAR *c)
+{
+  TCHAR *tmp;
+  if (NULL == c)
+    return TRUE;
+  // If there are no DIR_SEPARATORs in the directory name, then the 
+  // directory name should be the empty string 
+  tmp = _tcsrchr(c,DIR_SEPARATOR);
+  if (NULL != tmp)
+    tmp[1] = 0;
+  else
+    c[0] = 0;
+  return FALSE;
+}
+void prepare_filename(state *s, TCHAR *fn)
+{
+  if (s->mode & mode_barename)
+  {
+    if (my_basename(fn))
+    {
+      print_error_unicode(s,fn,"Unable to shorten filename");
+      return;
+    }
+  }
+}
+// Remove the newlines, if any. Works on both DOS and *nix newlines
+void chop_line_tchar(TCHAR *s)
+{
+  size_t pos = _tcslen(s);
+  while (pos > 0) 
+  {
+    // We split up the two checks because we can never know which
+    // condition the computer will examine if first. If pos == 0, we
+    // don't want to be checking s[pos-1] under any circumstances! 
+    if (!(s[pos-1] == _TEXT('\r') || s[pos-1] == _TEXT('\n')))
+      return;
+    s[pos-1] = 0;
+    --pos;
+  }
+}
+// Remove the newlines, if any. Works on both DOS and *nix newlines
+void chop_line(char *s)
+{
+  size_t pos = strlen(s);
+  while (pos > 0) 
+  {
+    // We split up the two checks because we can never know which
+    // condition the computer will examine if first. If pos == 0, we
+    // don't want to be checking s[pos-1] under any circumstances! 
+    if (!(s[pos-1] == _TEXT('\r') || s[pos-1] == _TEXT('\n')))
+      return;
+    s[pos-1] = 0;
+    --pos;
+  }
+}
+// Shift the contents of a string so that the values after 'new_start'
+// will now begin at location 'start' 
+void shift_string_tchar(TCHAR *fn, unsigned int start, unsigned int new_start)
+{
+  size_t sz = _tcslen(fn);
+  if (start > sz || new_start < start)
+    return;
+  while (new_start < sz)
+    {
+      fn[start] = fn[new_start];
+      new_start++;
+      start++;
+    }
+  fn[start] = 0;
+}
+// Find the index of the next comma in the string s starting at index start.
+// If there is no next comma, returns -1.
+int find_next_comma_tchar(TCHAR *s, unsigned int start)
+{
+  size_t size = _tcslen(s);
+  unsigned int pos = start;
+  int in_quote = FALSE;
+  while (pos < size)
+  {
+    switch (s[pos]) {
+    case _TEXT('"'):
+      in_quote = !in_quote;
+      break;
+    case _TEXT(','):
+      if (in_quote)
+        break;
+    // Although it's potentially unwise to cast an unsigned int back
+    // to an int, problems will only occur when the value is beyond
+    // the range of int. Because we're working with the index of a
+    // string that is probably less than 32,000 characters, we should
+    // be okay. 
+      return (int)pos;
+    }
+    ++pos;
+  }
+  return -1;
+}
+void mm_magic(void){MM_INIT("%s\n","\x49\x20\x64\x6f\x20\x6e\x6f\x74\x20\x62\x65\x6c\x69\x65\x76\x65\x20\x77\x65\x20\x77\x69\x6c\x6c\x20\x67\x65\x74\x20\x45\x64\x64\x69\x65\x20\x56\x61\x6e\x20\x48\x61\x6c\x65\x6e\x20\x75\x6e\x74\x69\x6c\x20\x77\x65\x20\x68\x61\x76\x65\x20\x61\x20\x74\x72\x69\x75\x6d\x70\x68\x61\x6e\x74\x20\x76\x69\x64\x65\x6f\x2e");}
+// Returns the string after the nth comma in the string s. If that
+// string is quoted, the quotes are removed. If there is no valid
+// string to be found, returns TRUE. Otherwise, returns FALSE 
+int find_comma_separated_string_tchar(TCHAR *s, unsigned int n)
+{
+  int start = 0, end;
+  unsigned int count = 0;
+  while (count < n)
+  {
+    if ((start = find_next_comma_tchar(s,start)) == -1)
+      return TRUE;
+    ++count;
+    // Advance the pointer past the current comma
+    ++start;
+  }
+  // It's okay if there is no next comma, it just means that this is
+  // the last comma separated value in the string 
+  if ((end = find_next_comma_tchar(s,start)) == -1)
+    end = _tcslen(s);
+  // Strip off the quotation marks, if necessary. We don't have to worry
+  // about uneven quotation marks (i.e quotes at the start but not the end
+  // as they are handled by the the find_next_comma function. 
+  if (s[start] == _TEXT('"'))
+    ++start;
+  if (s[end - 1] == _TEXT('"'))
+    end--;
+  s[end] = 0;
+  shift_string_tchar(s,0,start);
+  return FALSE;
+}
+// Shift the contents of a string so that the values after 'new_start'
+// will now begin at location 'start' 
+void shift_string(char *fn, size_t start, size_t new_start)
+{
+  // TODO: Can shift_string be replaced with memmove? 
+  if (start > strlen(fn) || new_start < start)
+    return;
+  while (new_start < strlen(fn))
+    {
+      fn[start] = fn[new_start];
+      new_start++;
+      start++;
+    }
+  fn[start] = 0;
+}
+// Find the index of the next comma in the string s starting at index start.
+// If there is no next comma, returns -1
+int find_next_comma(char *s, unsigned int start)
+{
+  size_t size=strlen(s);
+  unsigned int pos = start; 
+  int in_quote = FALSE;
+  while (pos < size)
+    {
+      switch (s[pos]) {
+      case '"':
+	in_quote = !in_quote;
+	break;
+      case ',':
+	if (in_quote)
+	  break;
+	// Although it's potentially unwise to cast an unsigned int back
+	// to an int, problems will only occur when the value is beyond 
+	// the range of int. Because we're working with the index of a 
+	// string that is probably less than 32,000 characters, we should
+	// be okay.
+	return (int)pos;
+      }
+      ++pos;
+    }
+  return -1;
+}
+/// Returns the string after the nth comma in the string s. If that
+/// string is quoted, the quotes are removed. If there is no valid 
+/// string to be found, returns TRUE. Otherwise, returns FALSE 
+int find_comma_separated_string(char *s, unsigned int n)
+{
+  int start = 0, end;
+  unsigned int count = 0; 
+  while (count < n)
+    {
+      if ((start = find_next_comma(s,start)) == -1)
+	return TRUE;
+      ++count;
+      // Advance the pointer past the current comma
+      ++start;
+    }
+  // It's okay if there is no next comma, it just means that this is
+  // the last comma separated value in the string 
+  if ((end = find_next_comma(s,start)) == -1)
+    end = strlen(s);
+  // Strip off the quotation marks, if necessary. We don't have to worry
+  // about uneven quotation marks (i.e quotes at the start but not the end
+  // as they are handled by the the find_next_comma function.
+  if (s[start] == '"')
+    ++start;
+  if (s[end - 1] == '"')
+    end--;
+  s[end] = 0;
+  shift_string(s,0,start);
+  return FALSE;
+}
+int remove_escaped_quotes(char * str)
+{
+  if (NULL == str)
+    return TRUE;
+  size_t pos = 0;
+  while (str[pos] != 0)
+  {
+    if ('\\' == str[pos] && '"' == str[pos+1])
+      shift_string(str,pos,pos+1);
+    ++pos;
+  }
+  return FALSE;
+}
--- a/src/C/fuzzy/install-sh
+++ b/src/C/fuzzy/install-sh
--- a/src/C/fuzzy/ltmain.sh
+++ b/src/C/fuzzy/ltmain.sh
--- a/src/C/fuzzy/main.cpp
+++ b/src/C/fuzzy/main.cpp
+// Fuzzy Hashing by Jesse Kornblum
+// Copyright (C) 2013 Facebook
+// Copyright (C) 2012 Kyrus
+// Copyright (C) 2010 ManTech International Corporation
+//
+// $Id: main.cpp 187 2013-07-10 06:56:14Z jessekornblum $
+//
+// This program is licensed under version 2 of the GNU Public License.
+// See the file COPYING for details. 
+#include "ssdeep.h"
+#include "match.h"
+#ifdef _WIN32 
+// This can't go in main.h or we get multiple definitions of it
+// Allows us to open standard input in binary mode by default 
+// See http://gnuwin32.sourceforge.net/compile.html for more 
+int _CRT_fmode = _O_BINARY;
+#endif
+static bool initialize_state(state *s)
+{
+  if (NULL == s)
+    return true;
+  s->mode                  = mode_none;
+  s->first_file_processed  = true;
+  s->found_meaningful_file = false;
+  s->processed_file        = false;
+  s->threshold = 0;
+  return false;
+}
+// In order to fit on one Win32 screen this function should produce
+// no more than 22 lines of output.
+static void usage(void)
+{
+  print_status ("%s version %s by Jesse Kornblum", __progname, VERSION);
+  print_status ("Copyright (C) 2013 Facebook");
+  print_status ("");
+  print_status ("Usage: %s [-m file] [-k file] [-dpgvrsblcxa] [-t val] [-h|-V] [FILES]", 
+	  __progname);
+  print_status ("-m - Match FILES against known hashes in file");
+  print_status ("-k - Match signatures in FILES against signatures in file");
+  print_status ("-d - Directory mode, compare all files in a directory");
+  print_status ("-p - Pretty matching mode. Similar to -d but includes all matches");
+  print_status ("-g - Cluster matches together");
+  print_status ("-v - Verbose mode. Displays filename as its being processed");
+  print_status ("-r - Recursive mode");
+  print_status ("-s - Silent mode; all errors are supressed");
+  print_status ("-b - Uses only the bare name of files; all path information omitted");
+  print_status ("-l - Uses relative paths for filenames");
+  print_status ("-c - Prints output in CSV format");
+  print_status ("-x - Compare FILES as signature files");
+  print_status ("-a - Display all matches, regardless of score");
+  print_status ("-t - Only displays matches above the given threshold");
+  print_status ("-h - Display this help message");
+  print_status ("-V - Display version number and exit");
+}
+static void process_cmd_line(state *s, int argc, char **argv)
+{
+  int i, match_files_loaded = FALSE;
+  while ((i=getopt(argc,argv,"gavhVpdsblcxt:rm:k:")) != -1) {
+    switch(i) {
+    case 'g':
+      s->mode |= mode_cluster;
+      break;
+    case 'a':
+      s->mode |= mode_display_all;
+      break;
+    case 'v': 
+      if (MODE(mode_verbose))
+      {
+	print_error(s,"%s: Already at maximum verbosity", __progname);
+	print_error(s,
+		    "%s: Error message displayed to user correctly", 
+		    __progname);
+      }
+      else
+	s->mode |= mode_verbose;
+      break;
+    case 'p':
+      s->mode |= mode_match_pretty;
+      break;
+    case 'd':
+      s->mode |= mode_directory; 
+      break;
+    case 's':
+      s->mode |= mode_silent; break;
+    case 'b':
+      s->mode |= mode_barename; break;
+    case 'l':
+      s->mode |= mode_relative; break;
+    case 'c':
+      s->mode |= mode_csv; break;
+    case 'x':
+      s->mode |= mode_sigcompare; break;
+    case 'r':
+      s->mode |= mode_recursive; break;
+    case 't':
+      s->threshold = (uint8_t)atol(optarg);
+      if (s->threshold > 100)
+	fatal_error("%s: Illegal threshold", __progname);
+      s->mode |= mode_threshold;
+      break;
+    case 'm':
+      if (MODE(mode_compare_unknown) || MODE(mode_sigcompare))
+	fatal_error("Positive matching cannot be combined with other matching modes");
+      s->mode |= mode_match;
+      if (not match_load(s,optarg))
+	match_files_loaded = TRUE;
+      break;
+    case 'k':
+      if (MODE(mode_match) || MODE(mode_sigcompare))
+	fatal_error("Signature matching cannot be combined with other matching modes");
+      s->mode |= mode_compare_unknown;
+      if (not match_load(s,optarg))
+	match_files_loaded = TRUE;
+      break;
+    case 'h':
+      usage(); 
+      exit (EXIT_SUCCESS);
+    case 'V':
+      print_status ("%s", VERSION);
+      exit (EXIT_SUCCESS);
+    default:
+      try_msg();
+      exit (EXIT_FAILURE);
+    }
+  }
+  // We don't include mode_sigcompare in this list as we haven't loaded
+  // the matching files yet. In that mode the matching files are in fact 
+  // the command line arguments.
+  sanity_check(s,
+	       ((MODE(mode_match) || MODE(mode_compare_unknown))
+		&& not match_files_loaded),
+	       "No matching files loaded");
+  sanity_check(s,
+	       ((s->mode & mode_barename) && (s->mode & mode_relative)),
+	       "Relative paths and bare names are mutually exclusive");
+  sanity_check(s,
+	       ((s->mode & mode_match_pretty) && (s->mode & mode_directory)),
+	       "Directory mode and pretty matching are mutually exclusive");
+  sanity_check(s,
+	       MODE(mode_csv) and MODE(mode_cluster),
+	       "CSV and clustering modes cannot be combined");
+  // -m, -p, and -d are incompatible with -k and -x
+  // The former treat FILES as raw files. The latter require them to be sigs
+  sanity_check(s,
+	       ((MODE(mode_match) or MODE(mode_match_pretty) or MODE(mode_directory))
+		and
+		(MODE(mode_compare_unknown) or MODE(mode_sigcompare))),
+	       "Incompatible matching modes");
+}
+#ifdef _WIN32
+static int prepare_windows_command_line(state *s)
+{
+  int argc;
+  TCHAR **argv;
+  argv = CommandLineToArgvW(GetCommandLineW(),&argc);
+  s->argc = argc;
+  s->argv = argv;
+  return FALSE;
+}
+#endif
+static int is_absolute_path(TCHAR *fn)
+{
+  if (NULL == fn)
+    internal_error("Unknown error in is_absolute_path");
+#ifdef _WIN32
+  return (isalpha(fn[0]) and _TEXT(':') == fn[1]);
+# else
+  return (DIR_SEPARATOR == fn[0]);
+#endif
+}
+static void generate_filename(state *s, TCHAR *fn, TCHAR *cwd, TCHAR *input)
+{
+  if (NULL == fn || NULL == input)
+    internal_error("Error calling generate_filename");
+  if ((s->mode & mode_relative) || is_absolute_path(input))
+    _tcsncpy(fn, input, SSDEEP_PATH_MAX);
+  else {
+    // Windows systems don't have symbolic links, so we don't
+    // have to worry about carefully preserving the paths
+    // they follow. Just use the system command to resolve the paths
+#ifdef _WIN32
+    _wfullpath(fn, input, SSDEEP_PATH_MAX);
+#else     
+    if (NULL == cwd)
+      // If we can't get the current working directory, we're not
+      // going to be able to build the relative path to this file anyway.
+      // So we just call realpath and make the best of things
+      realpath(input, fn);
+    else
+      snprintf(fn, SSDEEP_PATH_MAX, "%s%c%s", cwd, DIR_SEPARATOR, input);
+#endif
+  }
+}
+int main(int argc, char **argv)
+{
+  int count, status, goal = argc;
+  state *s;
+  TCHAR *fn, *cwd;
+#ifndef __GLIBC__
+  //  __progname  = basename(argv[0]);
+#endif
+  s = new state;
+  if (initialize_state(s))
+    fatal_error("%s: Unable to initialize state variable", __progname);
+  process_cmd_line(s,argc,argv);
+#ifdef _WIN32
+  if (prepare_windows_command_line(s))
+    fatal_error("%s: Unable to process command line arguments", __progname);
+#else
+  s->argc = argc;
+  s->argv = argv;
+#endif
+  // Anything left on the command line at this point is a file
+  // or directory we're supposed to process. If there's nothing
+  // specified, we should tackle standard input 
+  if (optind == argc) {
+    status = process_stdin(s);
+  }
+  else {
+    MD5DEEP_ALLOC(TCHAR, fn, SSDEEP_PATH_MAX);
+    MD5DEEP_ALLOC(TCHAR, cwd, SSDEEP_PATH_MAX);
+    cwd = _tgetcwd(cwd, SSDEEP_PATH_MAX);
+    if (NULL == cwd)
+      fatal_error("%s: %s", __progname, strerror(errno));
+    count = optind;
+    // The signature comparsion mode needs to use the command line
+    // arguments and argument count. We don't do wildcard expansion
+    // on it on Win32 (i.e. where it matters). The setting of 'goal'
+    // to the original argc occured at the start of main(), so we just
+    // need to update it if we're *not* in signature compare mode.
+    if (not (s->mode & mode_sigcompare)) {
+      goal = s->argc;
+    }
+    while (count < goal)
+    {
+      if (MODE(mode_sigcompare))
+	match_load(s,argv[count]);
+      else if (MODE(mode_compare_unknown))
+	match_compare_unknown(s,argv[count]);
+      else {
+	generate_filename(s, fn, cwd, s->argv[count]);
+#ifdef _WIN32
+	status = process_win32(s, fn);
+#else
+	status = process_normal(s, fn);
+#endif
+      }
+      ++count;
+    }
+    // If we processed files, but didn't find anything large enough
+    // to be meaningful, we should display a warning message to the user.
+    // This happens mostly when people are testing very small files
+    // e.g. $ echo "hello world" > foo && ssdeep foo
+    if ((not s->found_meaningful_file) and s->processed_file)
+    {
+      print_error(s,"%s: Did not process files large enough to produce meaningful results", __progname);
+    }
+  }
+  // If the user has requested us to compare signature files, use
+  // our existng code to pretty-print directory matching to do the
+  // work for us.
+  if (MODE(mode_sigcompare))
+    s->mode |= mode_match_pretty;
+  if (MODE(mode_match_pretty) or MODE(mode_sigcompare) or MODE(mode_cluster))
+    find_matches_in_known(s);
+  if (MODE(mode_cluster))
+    display_clusters(s);
+  return (EXIT_SUCCESS);
+}
--- a/src/C/fuzzy/main.h
+++ b/src/C/fuzzy/main.h
+// ssdeep
+// Copyright (C) 2012 Kyrus
+//
+// $Id: main.h 144 2012-04-24 14:59:33Z jessekornblum $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#ifndef __MAIN_H
+#define __MAIN_H
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <inttypes.h>
+#ifdef HAVE_DIRENT_H
+# include <dirent.h>
+#endif
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_PARAM_H
+# include <sys/param.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+# include <sys/ioctl.h>
+#endif
+#ifdef HAVE_SYS_MOUNT_H
+# include <sys/mount.h>
+#endif 
+#ifdef HAVE_SYS_DISK_H
+# include <sys/disk.h>
+#endif
+#ifdef HAVE_LIBGEN_H
+# include <libgen.h>
+#endif
+// This allows us to open standard input in binary mode by default 
+// See http://gnuwin32.sourceforge.net/compile.html for more.
+// Technically it isn't needed in ssdeep as we don't process standard
+// input. But it was part of Jesse's template, so in it goes!
+#ifdef HAVE_FCNTL_H
+# include <fcntl.h>
+#endif
+#ifndef HAVE_FSEEKO
+# define fseeko fseek
+# define ftello ftell
+#endif
+#define FALSE  0
+#define TRUE   1
+#ifndef MIN
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#endif
+#ifndef MAX
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#endif
+#endif   // #ifndef __MAIN_H
--- a/src/C/fuzzy/match.cpp
+++ b/src/C/fuzzy/match.cpp
+// ssdeep
+// (C) Copyright 2012 Kyrus
+//
+// $Id: match.cpp 164 2012-07-23 16:12:36Z jessekornblum $
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+//You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#include "match.h"
+// The longest line we should encounter when reading files of known hashes 
+#define MAX_STR_LEN  2048
+#define MIN_SUBSTR_LEN 7
+// ------------------------------------------------------------------
+// SIGNATURE FILE FUNCTIONS
+// ------------------------------------------------------------------
+/// Open a file of known hashes and determine if it's valid
+///
+/// @param s State variable
+/// @param fn filename to open
+/// 
+/// @return Returns false success, true on error
+bool sig_file_open(state *s, const char * fn)
+{
+  if (NULL == s or NULL == fn)
+    return true;
+  s->known_handle = fopen(fn,"rb");
+  if (NULL == s->known_handle)
+  {
+    if ( ! (MODE(mode_silent)) )
+      perror(fn);
+    return true;
+  }
+  // The first line of the file should contain a valid ssdeep header. 
+  char buffer[MAX_STR_LEN];
+  if (NULL == fgets(buffer,MAX_STR_LEN,s->known_handle))
+  {
+    if ( ! (MODE(mode_silent)) )
+      perror(fn);
+    fclose(s->known_handle);
+    return true;
+  }
+  chop_line(buffer);
+  if (strncmp(buffer,SSDEEPV1_0_HEADER,MAX_STR_LEN) and 
+      strncmp(buffer,SSDEEPV1_1_HEADER,MAX_STR_LEN)) 
+  {
+    if ( ! (MODE(mode_silent)) )
+      print_error(s,"%s: Invalid file header.", fn);
+    fclose(s->known_handle);
+    return true;
+  }
+  // We've now read the first line
+  s->line_number = 1;
+  s->known_fn = strdup(fn);
+  return false;
+}
+/// @brief Read the next entry in a file of known hashes and convert 
+/// it to a Filedata 
+///
+/// @param s State variable
+/// @param f Structure where to store the data we read
+///
+/// @return Returns true if there is no entry to read or on error. 
+/// Otherwise, false.
+bool sig_file_next(state *s, Filedata ** f)
+{
+  if (NULL == s or NULL == f or NULL == s->known_handle)
+    return true;
+  char buffer[MAX_STR_LEN];
+  memset(buffer,0,MAX_STR_LEN);
+  if (NULL == fgets(buffer,MAX_STR_LEN,s->known_handle))
+    return true;
+  s->line_number++;
+  chop_line(buffer);
+  try 
+  {
+    *f = new Filedata(std::string(buffer),s->known_fn);
+  }
+  catch (std::bad_alloc)
+  {
+    // This can happen on a badly formatted line, or a blank one.
+    // We don't display errors on blank lines.
+    if (strlen(buffer) > 0)
+      print_error(s,
+		  "%s: Bad hash in line %llu", 
+		  s->known_fn, 
+		  s->line_number);
+    return true;
+  }
+  return false;
+}
+bool sig_file_close(state *s)
+{
+  if (NULL == s)
+    return true;
+  free(s->known_fn);
+  if (s->known_handle != NULL) 
+    return true;
+  if (fclose(s->known_handle))
+    return true;
+  return false;
+}
+bool sig_file_end(state *s)
+{
+  return (feof(s->known_handle));
+}
+// ------------------------------------------------------------------
+// MATCHING FUNCTIONS
+// ------------------------------------------------------------------
+void display_clusters(const state *s)
+{
+  if (NULL == s)
+    return;
+  std::set<std::set<Filedata *> *>::const_iterator it;
+  for (it = s->all_clusters.begin(); it != s->all_clusters.end() ; ++it)
+  {
+    print_status("** Cluster size %u", (*it)->size());
+    std::set<Filedata *>::const_iterator cit;
+    for (cit = (*it)->begin() ; cit != (*it)->end() ; ++cit)
+    {
+      display_filename(stdout,(*cit)->get_filename(),FALSE);
+      print_status("");
+    }
+    print_status("");
+  }
+}
+void cluster_add(Filedata * dest, Filedata * src)
+{
+  dest->get_cluster()->insert(src);
+  src->set_cluster(dest->get_cluster());
+}
+void cluster_join(state *s, Filedata * a, Filedata * b)
+{
+  // If these items are already in the same cluster there is nothing to do
+  if (a->get_cluster() == b->get_cluster())
+    return;
+  Filedata * dest, * src;
+  // Combine the smaller cluster into the larger cluster for speed
+  // (fewer items to move)
+  if (a->get_cluster()->size() > b->get_cluster()->size())
+  {
+    dest = a; 
+    src  = b;
+  }
+  else
+  {
+    dest = b; 
+    src  = a;
+  }
+  // Add members of src to dest
+  std::set<Filedata *>::const_iterator it;
+  for (it =  src->get_cluster()->begin() ; 
+       it != src->get_cluster()->end() ; 
+       ++it)
+  {
+    dest->get_cluster()->insert(*it);
+  }
+  // Remove the old cluster
+  s->all_clusters.erase(src->get_cluster());
+  // This call sets the cluster to NULL. Do not access the src
+  // cluster after this call!
+  src->clear_cluster();
+  src->set_cluster(dest->get_cluster());
+}
+void handle_clustering(state *s, Filedata *a, Filedata *b)
+{
+  bool a_has = a->has_cluster(), b_has = b->has_cluster();
+  // In the easiest case, one of these has a cluster and one doesn't
+  if (a_has and not b_has)
+  {
+    cluster_add(a,b);
+    return;
+  }
+  if (b_has and not a_has)
+  {
+    cluster_add(b,a);
+    return;
+  }
+  // Combine existing clusters
+  if (a_has and b_has)
+  {
+    cluster_join(s,a,b);
+    return;
+  }
+  // Create a new cluster
+  std::set<Filedata *> * cluster = new std::set<Filedata *>();
+  cluster->insert(a);
+  cluster->insert(b);
+  s->all_clusters.insert(cluster);
+  a->set_cluster(cluster);
+  b->set_cluster(cluster);
+}
+void handle_match(state *s, 
+		  Filedata *a, 
+		  Filedata *b, 
+		  int score)
+{
+  if (s->mode & mode_csv)
+  {
+    printf("\"");
+    display_filename(stdout,a->get_filename(),TRUE);
+    printf("\",\"");
+    display_filename(stdout,b->get_filename(),TRUE);
+    print_status("\",%u", score);
+  }
+  else if (s->mode & mode_cluster)
+  {
+    handle_clustering(s,a,b);
+  }
+  else
+  {
+    // The match file names may be empty. If so, we don't print them
+    // or the colon which separates them from the filename
+    if (a->has_match_file())
+      printf ("%s:", a->get_match_file().c_str());
+    display_filename(stdout,a->get_filename(),FALSE);
+    printf (" matches ");
+    if (b->has_match_file())
+      printf ("%s:", b->get_match_file().c_str());
+    display_filename(stdout,b->get_filename(),FALSE);
+    print_status(" (%u)", score);
+  }
+}
+bool match_compare(state *s, Filedata * f)
+{
+  if (NULL == s)
+    fatal_error("%s: Null state passed into match_compare", __progname);
+  bool status = false;  
+  size_t fn_len = _tcslen(f->get_filename());
+  std::vector<Filedata* >::const_iterator it;
+  for (it = s->all_files.begin() ; it != s->all_files.end() ; ++it)
+  {
+    // When in pretty mode, we still want to avoid printing
+    // A matches A (100).
+    if (s->mode & mode_match_pretty)
+    {
+      if (!(_tcsncmp(f->get_filename(),
+		     (*it)->get_filename(),
+		     std::max(fn_len,_tcslen((*it)->get_filename())))) and
+	  (f->get_signature() == (*it)->get_signature()))
+      {
+	// Unless these results from different matching files (such as
+	// what happens in sigcompare mode). That being said, we have to
+	// be careful to avoid NULL values such as when working in 
+	// normal pretty print mode.
+	if (not(f->has_match_file()) or 
+	    f->get_match_file() == (*it)->get_match_file())
+	  continue;
+      }
+    }
+    int score =  fuzzy_compare(f->get_signature().c_str(), 
+			       (*it)->get_signature().c_str());
+    if (-1 == score)
+      print_error(s, "%s: Bad hashes in comparison", __progname);
+    else
+    {
+      if (score > s->threshold or MODE(mode_display_all))
+      {
+	handle_match(s,f,(*it),score);
+	status = true;
+      }
+    }
+  }
+  return status;
+}
+bool find_matches_in_known(state *s)
+{
+  if (NULL == s)
+    return true;
+  // Walk the vector which contains all of the known files
+  std::vector<Filedata *>::const_iterator it;
+  for (it = s->all_files.begin() ; it != s->all_files.end() ; ++it)
+  {
+    bool status = match_compare(s,*it);
+    // In pretty mode and sigcompare mode we need to display a blank
+    // line after each file. In clustering mode we don't display anything
+    // right now.
+    if (status and not(MODE(mode_cluster)))
+      print_status("");
+  }
+  return false;
+}
+bool match_add(state *s, Filedata * f)
+{
+  if (NULL == s)
+    return true;
+  s->all_files.push_back(f);
+  return false;
+}
+bool match_load(state *s, const char *fn)
+{
+  if (NULL == s or NULL == fn)
+    return true;
+  if (sig_file_open(s,fn))
+    return true;
+  bool status;
+  do 
+  {
+    Filedata * f; 
+    status = sig_file_next(s,&f);
+    if (not status)
+    {
+      if (match_add(s,f))
+      {
+	// One bad hash doesn't mean this load was a failure.
+	// We don't change the return status because match_add failed.
+	print_error(s,"%s: unable to insert hash", fn);
+	break;
+      }
+    }
+  } while (not sig_file_end(s));
+  sig_file_close(s);
+  return false;
+}
+bool match_compare_unknown(state *s, const char * fn)
+{ 
+  if (NULL == s or NULL == fn)
+    return true;
+  if (sig_file_open(s,fn))
+    return true;
+  bool status;
+  do
+  {
+    Filedata *f;
+    status = sig_file_next(s,&f);
+    if (not status)
+      match_compare(s,f);
+  } while (not sig_file_end(s));
+  sig_file_close(s);
+  return false;
+}
--- a/src/C/fuzzy/match.h
+++ b/src/C/fuzzy/match.h
+#ifndef __MATCH_H
+#define __MATCH_H
+// SSDEEP
+// $Id$
+// Copyright (C) 2012 Kyrus.
+#include "ssdeep.h"
+#include "filedata.h"
+// *********************************************************************
+// Matching functions
+// *********************************************************************
+/// @brief Match the file f against the set of knowns
+///
+/// @return Returns false if there are no matches, true if at least one match
+/// @param s State variable
+/// @param f Filedata structure for the file.
+bool match_compare(state *s, Filedata * f);
+/// @brief Load a file of known hashes
+///
+/// @return Returns false on success, true on error
+bool match_load(state *s, const char *fn);
+/// @brief Add a single new hash to the set of known hashes
+///
+/// @return Returns false on success, true on error
+bool match_add(state *s, Filedata * f);
+/// Find and display all matches in the set of known hashes
+bool find_matches_in_known(state *s);
+/// Load the known hashes from the file fn and compare them to the
+/// set of known hashes
+bool match_compare_unknown(state *s, const char * fn);
+/// Display the results of clustering operations
+void display_clusters(const state *s);
+#endif   // ifndef __MATCH_H
--- a/src/C/fuzzy/missing
+++ b/src/C/fuzzy/missing
+#! /bin/sh
+# Common wrapper for a few potentially missing GNU programs.
+scriptversion=2012-06-26.16; # UTC
+# Copyright (C) 1996-2013 Free Software Foundation, Inc.
+# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+if test $# -eq 0; then
+  echo 1>&2 "Try '$0 --help' for more information"
+  exit 1
+fi
+case $1 in
+  --is-lightweight)
+    # Used by our autoconf macros to check whether the available missing
+    # script is modern enough.
+    exit 0
+    ;;
+  --run)
+    # Back-compat with the calling convention used by older automake.
+    shift
+    ;;
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
+to PROGRAM being missing or too old.
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+Supported PROGRAM values:
+  aclocal   autoconf  autoheader   autom4te  automake  makeinfo
+  bison     yacc      flex         lex       help2man
+Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
+'g' are ignored when checking the name.
+Send bug reports to <bug-automake@gnu.org>."
+    exit $?
+    ;;
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    exit $?
+    ;;
+  -*)
+    echo 1>&2 "$0: unknown '$1' option"
+    echo 1>&2 "Try '$0 --help' for more information"
+    exit 1
+    ;;
+esac
+# Run the given program, remember its exit status.
+"$@"; st=$?
+# If it succeeded, we are done.
+test $st -eq 0 && exit 0
+# Also exit now if we it failed (or wasn't found), and '--version' was
+# passed; such an option is passed most likely to detect whether the
+# program is present and works.
+case $2 in --version|--help) exit $st;; esac
+# Exit code 63 means version mismatch.  This often happens when the user
+# tries to use an ancient version of a tool on a file that requires a
+# minimum version.
+if test $st -eq 63; then
+  msg="probably too old"
+elif test $st -eq 127; then
+  # Program was missing.
+  msg="missing on your system"
+else
+  # Program was found and executed, but failed.  Give up.
+  exit $st
+fi
+perl_URL=http://www.perl.org/
+flex_URL=http://flex.sourceforge.net/
+gnu_software_URL=http://www.gnu.org/software
+program_details ()
+{
+  case $1 in
+    aclocal|automake)
+      echo "The '$1' program is part of the GNU Automake package:"
+      echo "<$gnu_software_URL/automake>"
+      echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/autoconf>"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+    autoconf|autom4te|autoheader)
+      echo "The '$1' program is part of the GNU Autoconf package:"
+      echo "<$gnu_software_URL/autoconf/>"
+      echo "It also requires GNU m4 and Perl in order to run:"
+      echo "<$gnu_software_URL/m4/>"
+      echo "<$perl_URL>"
+      ;;
+  esac
+}
+give_advice ()
+{
+  # Normalize program name to check for.
+  normalized_program=`echo "$1" | sed '
+    s/^gnu-//; t
+    s/^gnu//; t
+    s/^g//; t'`
+  printf '%s\n' "'$1' is $msg."
+  configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
+  case $normalized_program in
+    autoconf*)
+      echo "You should only need it if you modified 'configure.ac',"
+      echo "or m4 files included by it."
+      program_details 'autoconf'
+      ;;
+    autoheader*)
+      echo "You should only need it if you modified 'acconfig.h' or"
+      echo "$configure_deps."
+      program_details 'autoheader'
+      ;;
+    automake*)
+      echo "You should only need it if you modified 'Makefile.am' or"
+      echo "$configure_deps."
+      program_details 'automake'
+      ;;
+    aclocal*)
+      echo "You should only need it if you modified 'acinclude.m4' or"
+      echo "$configure_deps."
+      program_details 'aclocal'
+      ;;
+   autom4te*)
+      echo "You might have modified some maintainer files that require"
+      echo "the 'automa4te' program to be rebuilt."
+      program_details 'autom4te'
+      ;;
+    bison*|yacc*)
+      echo "You should only need it if you modified a '.y' file."
+      echo "You may want to install the GNU Bison package:"
+      echo "<$gnu_software_URL/bison/>"
+      ;;
+    lex*|flex*)
+      echo "You should only need it if you modified a '.l' file."
+      echo "You may want to install the Fast Lexical Analyzer package:"
+      echo "<$flex_URL>"
+      ;;
+    help2man*)
+      echo "You should only need it if you modified a dependency" \
+           "of a man page."
+      echo "You may want to install the GNU Help2man package:"
+      echo "<$gnu_software_URL/help2man/>"
+    ;;
+    makeinfo*)
+      echo "You should only need it if you modified a '.texi' file, or"
+      echo "any other file indirectly affecting the aspect of the manual."
+      echo "You might want to install the Texinfo package:"
+      echo "<$gnu_software_URL/texinfo/>"
+      echo "The spurious makeinfo call might also be the consequence of"
+      echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
+      echo "want to install GNU make:"
+      echo "<$gnu_software_URL/make/>"
+      ;;
+    *)
+      echo "You might have modified some files without having the proper"
+      echo "tools for further handling them.  Check the 'README' file, it"
+      echo "often tells you about the needed prerequisites for installing"
+      echo "this package.  You may also peek at any GNU archive site, in"
+      echo "case some other package contains this missing '$1' program."
+      ;;
+  esac
+}
+give_advice "$1" | sed -e '1s/^/WARNING: /' \
+                       -e '2,$s/^/         /' >&2
+# Propagate the correct exit status (expected to be 127 for a program
+# not found, 63 for a program that failed due to version mismatch).
+exit $st
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-time-zone: "UTC"
+# time-stamp-end: "; # UTC"
+# End:
--- a/src/C/fuzzy/sample.c
+++ b/src/C/fuzzy/sample.c
+/* Fuzzy Hashing by Jesse Kornblum
+   Copyright (C) 2010 ManTech International Corporation
+   This program demonstrates some of the capabilities of 
+   the fuzzy hashing library.
+   To compile the program using gcc:
+   $ gcc -Wall -I/usr/local/include -L/usr/local/lib sample.c -Lfuzzy
+   Using mingw:
+   C:\> gcc -Wall -Ic:\path\to\includes sample.c fuzzy.dll
+   Using Microsoft Visual C:
+   C:\> lib /machine:i386 /def:fuzzy.def
+   C:\> cl sample.c fuzzy.lib
+   See the README that came with this file for more details on using
+   the library on Windows systems with Microsoft Visual C. 
+   The functions generate_random and write_data are generic routines to make
+   random data for hashing. The real magic happens in the main() function.
+   THIS SOFTWARE IS NOT DESIGNED OR INTENDED FOR USE OR RESALE AS ON-LINE
+   CONTROL EQUIPMENT IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE
+   PERFORMANCE, SUCH AS IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT
+   NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL, DIRECT LIFE
+   SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH THE FAILURE OF THE
+   SOFTWARE COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SEVERE
+   PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH RISK ACTIVITIES").  THE AUTHOR
+   SPECIFICALLY DISCLAIMS ANY EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR
+   HIGH RISK ACTIVITIES.   */
+// $Id: sample.c 97 2010-03-19 15:10:06Z jessekornblum $
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <fuzzy.h>
+#define FILENAME "foo.dat" 
+#define SIZE 0x50000
+void generate_random(unsigned char *buf, uint32_t sz)
+{
+  uint32_t i;
+  for (i = 0 ; i < sz ; ++i)
+    buf[i] = (unsigned char)(rand() % 255);
+  buf[(sz-1)] = 0;
+}
+int write_data(const unsigned char *buf, 
+	       const uint32_t sz, 
+	       const char *fn)
+{
+  printf ("Writing to %s\n", fn);
+  FILE * handle = fopen(fn,"wb");
+  if (NULL == handle)
+    return 1;
+  fwrite(buf,sz,1,handle);
+  fclose(handle);
+  return 0;
+}
+int main(int argc, char **argv)
+{
+  unsigned char * buf;
+  char * result, * result2;
+  FILE *handle; 
+  srand(1);
+  buf     = (unsigned char *)malloc(SIZE);
+  result  = (char *)malloc(FUZZY_MAX_RESULT);
+  result2 = (char *)malloc(FUZZY_MAX_RESULT);
+  if (NULL == result || NULL == buf || NULL == result2)
+  {
+    fprintf (stderr,"%s: Out of memory\n", argv[0]);
+    return EXIT_FAILURE;
+  }
+  generate_random(buf,SIZE);
+  if (write_data(buf,SIZE,FILENAME))
+    return EXIT_FAILURE;
+  printf ("Hashing buffer\n");
+  int status = fuzzy_hash_buf(buf,SIZE,result);
+  if (status)
+    printf ("Error during buf hash\n");
+  else
+    printf ("%s\n", result);
+  handle = fopen(FILENAME,"rb");
+  if (NULL == handle)
+    {
+      perror(FILENAME);
+      return EXIT_FAILURE;
+    }
+  printf ("Hashing file\n");
+  status = fuzzy_hash_file(handle,result);
+  if (status)
+    printf ("Error during file hash\n");
+  else
+    printf ("%s\n", result);
+  fclose(handle);
+  printf ("Modifying buffer and comparing to file\n");
+  int i;
+  for (i = 0x100 ; i < 0x110 ; ++i)
+    buf[i] = 37;
+  status = fuzzy_hash_buf(buf,SIZE,result2);  
+  if (status)
+    printf ("Error during buffer hash\n");
+  else
+    printf ("%s\n", result2);
+  i = fuzzy_compare(result,result2);
+  if (-1 == i)
+    printf ("An error occured during matching\n");
+  else
+  {
+    if (i != 0)
+      printf ("MATCH: score = %d\n", i);
+    else
+      printf ("did not match\n");
+  }
+  return EXIT_SUCCESS;
+}
--- a/src/C/fuzzy/ssdeep.1
+++ b/src/C/fuzzy/ssdeep.1
+.TH SSDEEP "1" "Version 2.10 \- 17 Jul 2013" "Facebook" "Facebook"
+.SH NAME
+ssdeep - Computes context triggered piecewise hashes (fuzzy hashes)
+.SH SYNOPSIS
+.B ssdeep [-m <file>] [-k <file>] [-vdprgsblcxa] [-t val] [FILES]
+.br
+.B ssdeep [-V|h]
+.SH DESCRIPTION
+.PP
+Computes a signature based on context triggered piecewise hashes
+for each input file, also called a fuzzy hash.
+If requested, the program matches those signatures against
+a file of known signatures and reports any possible matches.
+It can also examine one or more files of signatures and find any
+matches in those files.
+Output is written to standard out and errors to standard error.
+The program only accepts the first 100MB of data presented
+via standard input.
+.TP
+\fB\-m <file>\fR
+Loads the specified file of known hashes to be used for matching. This file must
+be a previous output of the program. The program
+then hashes each entry in FILES and compares these signatures to the known signatures.
+Any matches which score above the threshold are displayed.
+This flag may be used multiple times to load more known signatures.
+This flag may not be used with the \-k or \-x flags.
+.TP
+\fB\-k <file>\fR
+Load the specified file of known hashes to be used for matching. This file must
+be a previous output of the program. The program
+then treats each entry in FILES as a set of known hashes as well. The hashes in these
+FILES are compared to the known hashes from this file. Matches which score
+above the threshold are displayed. Both the file specified here and the
+input FILES should contain fuzzy hashes.
+This flag may be used multiple times to load more known signatures.
+This flag may not be used with the \-m, \-d, or \-p flags.
+.TP
+\fB\-v\fR
+Verbose mode. The name of each file is printed to standard error
+as it is being hashed.
+.TP
+\fB\-d\fR
+Computes a signature for each entry in the FILES and compares it to the set
+of known signatures. Matches which score above the threshold are displayed. The
+computed signature is then added to the set of known signatures.
+This flag may not be used with the \-k or \-x flags.
+.TP
+\fB\-p\fR
+Works like the \-d flag, but displays all matches for each file. That is,
+for two files A and B which match score above the threshold, displays
+"A matches B" and "B matches A".
+This flag may not be used with the \-k or \-x flags.
+.TP
+\fB\-r\fR
+Enables recursive mode. All subdirectories are traversed.
+Please note that recursive mode cannot be used to examine all
+files of a given file extension. For example, invoking the program with
+\fB\-r *.txt\fR will examine all files in directories that end in .txt.
+If you want to process all files in a directory tree with the .txt suffix,
+try using the \fBfind(1)\fR command.
+.TP
+\fB\-g\fR
+Similar files are grouped together into clusters. This can be handy
+for finding more similar files. That is, if you are searching for file
+A, which matches B, anything which matches B will also be included in
+the cluster.
+.TP
+\fB\-s\fR
+Silent mode. All error messages are suppressed.
+.TP
+\fB\-b\fR
+Enables bare mode. Strips any leading directory information from
+displayed filenames.
+This flag may not be used in conjunction with the \fB\-l\fR flag.
+.TP
+\fB\-l\fR
+Enables relative file paths. Instead of printing the absolute path for
+each file, displays the relative file path as indicated on the command
+line. This flag may not be used in conjunction with the \fB\-b\fR flag.
+.TP
+\fB\-c\fR
+Enables comma separated output mode. In any of the matching modes
+\-d, \-p, or \-m,
+displays the results as input file, known file, matching score.
+.TP
+\fB\-x\fR
+Signature file matching.
+Each entry in FILES must contain signatures generated by a previous output
+of the program. Each signature is loaded and compared against the set of
+known hashes. Match scores above the threshold are displayed. Each signature
+is then added to the set of knowns.
+This flag may not be used with the \-m, \-d, or \-p flags.
+.TP
+\fB\-a\fR
+Displays all matches in any of the matching mode, regardless of score.
+Using the \-a flag displays all results, even if the match score is zero.
+.TP
+\fB\-t <val>\fR
+In any of the matching modes, only display matches when match
+score is greater than the given value. The default threshold value is zero.
+.TP
+\fB\-h\fR
+Show a help screen and exit.
+.TP
+\fB\-V\fR
+Show the version number and exit.
+.SH RETURN VALUE
+Returns 0 on success, 1 if there is a problem.
+Read errors, permission denied, and encountering directories while
+not in recursive mode are still considered successes. Problems are
+things like being unable to load the matching file, specifying
+both bare and relative paths, etc.
+.SH AUTHOR
+ssdeep was written by Jesse Kornblum of Facebook,
+.br
+research@jessekornblum.com
+.PP
+.SH COPYRIGHT
+This program is Copyright (C) 2013 Facebook and is licensed under the terms
+of the General Public License. See the file COPYING for details.
+.SH SEE ALSO
+This program is based on SpamSum by Dr. Andrews Tridgell.
+.br
+http://www.samba.org/ftp/unpacked/junkcode/spamsum/
--- a/src/C/fuzzy/ssdeep.h
+++ b/src/C/fuzzy/ssdeep.h
+#ifndef __SSDEEP_H
+#define __SSDEEP_H
+// Fuzzy Hashing by Jesse Kornblum
+// Copyright (C) 2013 Facebook
+// Copyright (C) 2012 Kyrus
+// Copyright (C) 2008 ManTech International Corporation
+//
+// $Id: ssdeep.h 190 2013-07-11 00:40:22Z jessekornblum $
+//
+#include "main.h"
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include "fuzzy.h"
+#include "tchar-local.h"
+#include "filedata.h"
+// This is a kludge, but it works.
+#define __progname "ssdeep"
+#define SSDEEPV1_0_HEADER        "ssdeep,1.0--blocksize:hash:hash,filename"
+#define SSDEEPV1_1_HEADER        "ssdeep,1.1--blocksize:hash:hash,filename"
+#define OUTPUT_FILE_HEADER     SSDEEPV1_1_HEADER
+// We print a warning for files smaller than this size
+#define SSDEEP_MIN_FILE_SIZE   4096
+// The default 'PATH_MAX' on Windows is about 255 bytes. We can expand
+// this limit to 32,767 characters by prepending filenames with "\\?\"
+#define SSDEEP_PATH_MAX 32767
+#define MD5DEEP_ALLOC(TYPE,VAR,SIZE)     \
+VAR = (TYPE *)malloc(sizeof(TYPE) * SIZE);  \
+if (NULL == VAR)  \
+   return EXIT_FAILURE; \
+memset(VAR,0,SIZE * sizeof(TYPE));
+// These are the types of files we can encounter while hashing
+#define file_regular    0
+#define file_directory  1
+#define file_door       2
+#define file_block      3
+#define file_character  4
+#define file_pipe       5
+#define file_socket     6
+#define file_symlink    7
+#define file_unknown  254
+typedef struct _filedata_t
+{
+  uint64_t id;
+  /// Original signature in the form [blocksize]:[sig1]:[sig2]
+  std::string signature;
+  uint64_t blocksize;
+  /// Holds signature equal to blocksize
+  std::string s1;
+  /// Holds signature equal to blocksize * 2
+  std::string s2;
+  TCHAR * filename;
+  /// File of hashes where we got this known file from.
+  std::string match_file;
+  /// Cluster which contains this file
+  std::set<_filedata_t> * cluster;
+} filedata_t;
+typedef struct {
+  uint64_t  mode;
+  bool       first_file_processed;
+  // Known hashes
+  std::vector<Filedata *> all_files;
+  // Known clusters
+  std::set< std::set<Filedata *> * > all_clusters;
+  /// Display files who score above the threshold
+  uint8_t   threshold;
+  bool       found_meaningful_file;
+  bool       processed_file;
+  int       argc;
+  TCHAR     **argv;
+  /// Current line number in file of known hashes
+  uint64_t line_number;
+  /// File handle to file of known hashes
+  FILE     * known_handle;
+  /// Filename of known hashes
+  char     * known_fn;
+} state;
+#define MM_INIT  printf
+// Things required when cross compiling for Microsoft Windows
+#ifdef _WIN32
+// We create macros for the Windows equivalent UNIX functions.
+// No worries about lstat to stat; Windows doesn't have symbolic links
+#define lstat(A,B)      stat(A,B)
+#define realpath(A,B)   _fullpath(B,A,PATH_MAX)
+#define snprintf        _snprintf
+char *basename(char *a);
+extern char *optarg;
+extern int optind;
+int getopt(int argc, char *const argv[], const char *optstring);
+#define NEWLINE        "\r\n"
+#define DIR_SEPARATOR  '\\'
+#else   // ifdef _WIN32
+// For all other operating systems
+#define NEWLINE       "\n"
+#define DIR_SEPARATOR '/'
+#endif  // ifdef _WIN32/else
+// Because the modes are stored in a uint64_t variable, they must
+// be less than or equal to 1<<63
+#define mode_none            0
+#define mode_recursive       1
+#define mode_match        1<<1
+#define mode_barename     1<<2
+#define mode_relative     1<<3
+#define mode_silent       1<<4
+#define mode_directory    1<<5
+#define mode_match_pretty 1<<6
+#define mode_verbose      1<<7
+#define mode_csv          1<<8
+#define mode_threshold    1<<9
+#define mode_sigcompare   1<<10
+#define mode_display_all  1<<11
+#define mode_compare_unknown 1<<12
+#define mode_cluster      1<<13
+#define mode_recursive_cluster 1<<14
+#define MODE(A)   (s->mode & A)
+#define BLANK_LINE   \
+"                                                                               "
+// *********************************************************************
+// Checking for cycles
+// *********************************************************************
+int done_processing_dir(TCHAR *fn);
+int processing_dir(TCHAR *fn);
+int have_processed_dir(TCHAR *fn);
+bool process_win32(state *s, TCHAR *fn);
+int process_normal(state *s, TCHAR *fn);
+int process_stdin(state *s);
+// *********************************************************************
+// Fuzzy Hashing Engine
+// *********************************************************************
+int hash_file(state *s, TCHAR *fn);
+bool display_result(state *s, const TCHAR * fn, const char * sum);
+// *********************************************************************
+// Helper functions
+// *********************************************************************
+void try_msg(void);
+bool expanded_path(TCHAR *p);
+void sanity_check(state *s, int condition, const char *msg);
+// The basename function kept misbehaving on OS X, so I rewrote it.
+// This function isn't perfect, nor is it designed to be. Because
+// we're guarenteed to be working with a filename here, there's no way
+// that s will end with a DIR_SEPARATOR (e.g. /foo/bar/). This function
+// will not work properly for a string that ends in a DIR_SEPARATOR
+int my_basename(TCHAR *s);
+int my_dirname(TCHAR *s);
+// Remove the newlines, if any, from the string. Works with both
+// \r and \r\n style newlines
+void chop_line_tchar(TCHAR *s);
+void chop_line(char *s);
+int find_comma_separated_string_tchar(TCHAR *s, unsigned int n);
+void shift_string_tchar(TCHAR *fn, unsigned int start, unsigned int new_start);
+int find_comma_separated_string(char *s, unsigned int n);
+void shift_string(char *fn, size_t start, size_t new_start);
+int remove_escaped_quotes(char * str);
+void prepare_filename(state *s, TCHAR *fn);
+// Returns the size of the given file, in bytes.
+#ifdef __cplusplus
+extern "C" {
+#endif
+off_t find_file_size(FILE *h);
+#ifdef __cplusplus
+}
+#endif
+// *********************************************************************
+// User Interface Functions
+// *********************************************************************
+void print_status(const char *fmt, ...);
+void print_error(const state *s, const char *fmt, ...);
+void print_error_unicode(state *s, const TCHAR *fn, const char *fmt, ...);
+void internal_error(const char *fmt, ... );
+void fatal_error(const char *fmt, ... );
+void display_filename(FILE *out, const TCHAR *fn, int escape_quotes);
+#endif  // #ifndef __SSDEEP_H
--- a/src/C/fuzzy/tchar-local.h
+++ b/src/C/fuzzy/tchar-local.h
+/* $Id: tchar-local.h 61 2008-02-22 23:18:59Z jessekornblum $ */
+#ifndef __TCHAR_LOCAL_H
+#define __TCHAR_LOCAL_H
+/* Unicode support */
+#ifdef _WIN32
+// This says that we require Windows NT 4.0 to run
+#define _WIN32_WINNT 0x0400
+# include <windows.h>
+# include <wchar.h>
+# include <tchar.h>
+/* The PRINTF_S character is used in situations where we have a string
+   with one TCHAR and one char argument. It's impossible to use the
+   _TEXT macro because we don't know which will be which. */
+#define  PRINTF_S   "S"
+#define _tmemmove      wmemmove
+/* The Win32 API does have lstat, just stat. As such, we don't have to
+   worry about the difference between the two. */
+#define _lstat         _tstat
+#define _sstat         _tstat
+#define _tstat_t       struct _stat
+#else  // ifdef _WIN32
+#define  PRINTF_S   "s"
+/* The next few paragraphs are similar to tchar.h when UNICODE
+   is not defined. They define all of the _t* functions to use
+   the standard char * functions. This works just fine on Linux and OS X */
+#define  TCHAR      char
+#define  _TDIR      DIR
+#define  _TEXT(A)   A
+#define  _sntprintf snprintf
+#define  _tprintf   printf
+#define  _ftprintf  fprintf
+#define  _lstat     lstat
+#define  _sstat     stat
+#define  _tstat_t   struct stat
+#define  _tgetcwd   getcwd
+#define  _tfopen    fopen
+#define  _fgetts    fgets
+#define  _topendir  opendir
+#define  _treaddir  readdir
+#define  _tdirent   dirent
+#define  _tclosedir closedir
+#define  _tcsncpy   strncpy
+#define  _tcslen    strlen
+#define  _tcsnicmp  strncasecmp
+#define  _tcsncmp   strncmp
+#define  _tcsrchr   strrchr
+#define  _tmemmove  memmove
+#define  _tcsdup    strdup
+#define  _tcsstr    strstr
+#endif
+#endif //   __TCHAR_LOCAL_H
--- a/src/C/fuzzy/ui.cpp
+++ b/src/C/fuzzy/ui.cpp
--- a/src/C/ssdeep-2.10.tar.gz
+++ b/src/C/ssdeep-2.10.tar.gz
--- a/src/binwalk/magic/binarch
+++ b/src/binwalk/magic/binarch