Commit 558352d0 by James Cropcho

Merge pull request #54 from todvora/master

Data processing refactored, possibility to not persist results in mongodb
parents 4ee4004a e97eb70e
...@@ -30,35 +30,42 @@ Wrapper can be created with this command: ...@@ -30,35 +30,42 @@ Wrapper can be created with this command:
Variety wrapper = new Variety("test", "users"); Variety wrapper = new Variety("test", "users");
``` ```
Where the first parameter is analyzed database name and second analyzed collection name. Wrapper is written following where the first parameter is analyzed database name and second analyzed collection name. Wrapper is written following
[builder pattern](https://en.wikipedia.org/wiki/Builder_pattern): [builder pattern](https://en.wikipedia.org/wiki/Builder_pattern):
``` ```
VarietyAnalysis analysis = new Variety("test", "users") ResultsValidator analysis = new Variety("test", "users")
.withMaxDepth(10) .withMaxDepth(10)
.withSort("{name:-1}") .withSort("{name:-1}")
.withLimit(5) .withLimit(5)
.runAnalysis(); .runDatabaseAnalysis();
``` ```
```VarietyAnalysis``` is the actual analysis result. Main purpose is to easy verify results: ```ResultsValidator``` is the actual analysis result. Main purpose is to easy verify results:
``` ```
verifyResult(String key, double totalOccurrences, double percentContaining, String... types) validate(String key, double totalOccurrences, double percentContaining, String... types)
``` ```
If the result does not match expectations, AssertionError is thrown (standard JUnit behavior). If the result does not match expectations, AssertionError is thrown (standard JUnit behavior). There are two possibilities,
how to obtain results. Variety can store results in collection in MongoDB, or output results as a valid JSON to standard
output. This two ways have own representations in wrapper:
- runDatabaseAnalysis
- runJsonAnalysis
Both of them preset important options for Variety (quiet, persistResults, outputFormat) to comply with validator.
## Tests lifecycle ## Tests lifecycle
- Initialization, prepare data. Every test has method annotated with `@Before`. - Initialization, prepare data. Every test has method annotated with `@Before`.
- Variety analysis, run variety.js against prepared data and verify results. See `Variety.java`, method `runAnalysis()` and methods annotated with `@Test`. - Variety analysis, run variety.js against prepared data and verify results. See `Variety.java`, method `runDatabaseAnalysis()` and methods annotated with `@Test`.
- Resources cleanup, see method annotated with `@After`. - Resources cleanup, see method annotated with `@After`.
## Used databases and collections ## Used databases and collections
Tests use two databases, `test` and `varietyResults`. In DB `test`, there will be created collection `users`. Tests use two databases, `test` and `varietyResults`. In DB `test`, there will be created collection `users`.
Collection is later analyzed by variety and results stored in DB `varietyResults`, collection `usersKeys`. Collection is later analyzed by variety and results stored in DB `varietyResults`, collection `usersKeys`.
Cleanup method should remove both test and analysis data. Cleanup method should remove both test and analysis data. In case of JSON validator, there is no results db/collection created.
## Contribute ## Contribute
You can extend current test cases or create new JUnit test. All tests under `test/src/test/` are automatically included into run. You can extend current test cases or create new JUnit test. All tests under `test/src/test/` are automatically included into run.
\ No newline at end of file
package com.github.variety; package com.github.variety;
import com.github.variety.validator.DbResultsValidator;
import com.github.variety.validator.JsonResultsValidator;
import com.github.variety.validator.ResultsValidator;
import com.mongodb.DB; import com.mongodb.DB;
import com.mongodb.DBCollection; import com.mongodb.DBCollection;
import com.mongodb.MongoClient; import com.mongodb.MongoClient;
...@@ -33,6 +36,7 @@ public class Variety { ...@@ -33,6 +36,7 @@ public class Variety {
public static final String PARAM_MAXDEPTH = "maxDepth"; public static final String PARAM_MAXDEPTH = "maxDepth";
public static final String PARAM_LIMIT = "limit"; public static final String PARAM_LIMIT = "limit";
public static final String PARAM_OUTPUT_FORMAT = "outputFormat"; public static final String PARAM_OUTPUT_FORMAT = "outputFormat";
public static final String PARAM_PERSIST_RESULTS = "persistResults";
private final String inputDatabase; private final String inputDatabase;
private final String inputCollection; private final String inputCollection;
...@@ -44,8 +48,7 @@ public class Variety { ...@@ -44,8 +48,7 @@ public class Variety {
private String sort; private String sort;
private String outputFormat; private String outputFormat;
private boolean quiet; private boolean quiet;
private boolean persistResults;
private boolean isStdoutForwarded = true;
/** /**
* Create variety wrapper with defined connection do analysed database and collection * Create variety wrapper with defined connection do analysed database and collection
...@@ -105,6 +108,10 @@ public class Variety { ...@@ -105,6 +108,10 @@ public class Variety {
return this; return this;
} }
/**
* Variety wrapper for {@code format} option.
* @param format valid values are either 'json' or 'ascii'
*/
public Variety withFormat(final String format) { public Variety withFormat(final String format) {
this.outputFormat = format; this.outputFormat = format;
return this; return this;
...@@ -114,31 +121,25 @@ public class Variety { ...@@ -114,31 +121,25 @@ public class Variety {
* Wrapper for command line option '--quiet', that is passed to mongo shell. Variety is able to read this option * Wrapper for command line option '--quiet', that is passed to mongo shell. Variety is able to read this option
* and mute its logs with metadata. * and mute its logs with metadata.
*/ */
public Variety withQuiet(boolean quiet) { public Variety withQuiet(final boolean quiet) {
this.quiet = quiet; this.quiet = quiet;
return this; return this;
} }
/** /**
* Enable analysis output stdout of script to stdout of java process. * Variety wrapper for {@code persistResults} option
* Deprecated because it should only be used for debugging of test, not real/production tests itself. If you
* need to read stdout of variety, it can be accessed through {@link VarietyAnalysis#getStdOut()}
*/ */
@Deprecated() public Variety withPersistResults(final boolean persistResults) {
public Variety withStdoutForwarded(final boolean isStdoutForwarded) { this.persistResults = persistResults;
this.isStdoutForwarded = isStdoutForwarded;
return this; return this;
} }
/** /**
* Executes mongo shell with configured variety options and variety.js script in path. * Executes mongo shell with configured variety options and variety.js script in path.
* @return Results of analysis including stdout of variety.js and verifier of collected keys * @return Stdout of variety.js
* @throws IOException
* @throws InterruptedException
*/ */
public VarietyAnalysis runAnalysis() throws IOException, InterruptedException { private String runAnalysis() throws IOException, InterruptedException {
final List<String> commands = new ArrayList<>();
List<String> commands = new ArrayList<>();
commands.add("mongo"); commands.add("mongo");
commands.add(this.inputDatabase); commands.add(this.inputDatabase);
if(quiet) { if(quiet) {
...@@ -156,10 +157,19 @@ public class Variety { ...@@ -156,10 +157,19 @@ public class Variety {
if(returnCode != 0) { if(returnCode != 0) {
throw new RuntimeException("Failed to execute variety.js with arguments: " + Arrays.toString(cmdarray) + ".\n" + stdOut); throw new RuntimeException("Failed to execute variety.js with arguments: " + Arrays.toString(cmdarray) + ".\n" + stdOut);
} else if(isStdoutForwarded) {
System.out.println(stdOut);
} }
return new VarietyAnalysis(mongoClient, inputCollection, stdOut); System.out.println(stdOut);
return stdOut;
}
public ResultsValidator runJsonAnalysis() throws IOException, InterruptedException {
final String stdOut = withFormat(FORMAT_JSON).withQuiet(true).runAnalysis();
return new JsonResultsValidator(stdOut);
}
public ResultsValidator runDatabaseAnalysis() throws IOException, InterruptedException {
final String stdOut = withFormat(FORMAT_ASCII).withPersistResults(true).runAnalysis();
return new DbResultsValidator(mongoClient, inputCollection, stdOut);
} }
/** /**
...@@ -189,6 +199,9 @@ public class Variety { ...@@ -189,6 +199,9 @@ public class Variety {
args.add(PARAM_OUTPUT_FORMAT + " = '" + outputFormat + "'"); args.add(PARAM_OUTPUT_FORMAT + " = '" + outputFormat + "'");
} }
if(persistResults) {
args.add(PARAM_PERSIST_RESULTS + " = " + persistResults);
}
return args.toString(); return args.toString();
} }
......
package com.github.variety; package com.github.variety.validator;
import com.github.variety.Variety;
import com.mongodb.*; import com.mongodb.*;
import org.junit.Assert; import org.junit.Assert;
import java.util.Arrays; import java.util.Arrays;
/** public class DbResultsValidator implements ResultsValidator {
* Results of variety.js run in mongo shell. Contains stdout of shell and access to results collection. For convenience there
* is defined method verifyResult, that checks correct types and occurrences of desired key.
*/
public class VarietyAnalysis {
private final MongoClient mongoClient; private final MongoClient mongoClient;
private final String sourceCollectionName; private final String sourceCollectionName;
private final String stdOut; private final String stdOut;
/** public DbResultsValidator(final MongoClient mongoClient, final String sourceCollectionName, final String stdOut) {
* @param mongoClient connection to MongoDB
* @param sourceCollectionName name of original source collection. Used to access results in variety database
* @param stdOut output of analysis execution - output of variety.js script
*/
public VarietyAnalysis(final MongoClient mongoClient, final String sourceCollectionName, final String stdOut) {
this.mongoClient = mongoClient; this.mongoClient = mongoClient;
this.sourceCollectionName = sourceCollectionName; this.sourceCollectionName = sourceCollectionName;
this.stdOut = stdOut; this.stdOut = stdOut;
} }
@Override
public void validate(final String key, final long totalOccurrences, final double percentContaining, final String... types) {
verifyResult(key, totalOccurrences, percentContaining, types);
}
@Override
public long getResultsCount() {
return getResultsCollection().count();
}
public String getStdOut() {
return stdOut;
}
/** /**
* Verifier for collected results in variety analysis * Verifier for collected results in variety analysis
* @param key Results should contain entry with this key * @param key Results should contain entry with this key
...@@ -34,45 +40,35 @@ public class VarietyAnalysis { ...@@ -34,45 +40,35 @@ public class VarietyAnalysis {
* @param percentContaining Results should contain entry with this relative percentage * @param percentContaining Results should contain entry with this relative percentage
* @param types Expected data types of this entry (Based on MongoDB type names) * @param types Expected data types of this entry (Based on MongoDB type names)
*/ */
public void verifyResult(final String key, final double totalOccurrences, final double percentContaining, final String... types) { private void verifyResult(final String key, final long totalOccurrences, final double percentContaining, final String... types) {
final DBCursor cursor = getResultsCollection().find(new BasicDBObject("_id.key", key)); final DBCursor cursor = getResultsCollection().find(new BasicDBObject("_id.key", key));
Assert.assertEquals("Entry with key '" + key + "' not found in variety results", 1, cursor.size()); Assert.assertEquals("Entry with key '" + key + "' not found in variety results", 1, cursor.size());
final DBObject result = cursor.next(); final DBObject result = cursor.next();
verifyKeyTypes(key, result, types); verifyKeyTypes(key, result, types);
Assert.assertEquals("Failed to verify total occurrences of key " + key, totalOccurrences, result.get("totalOccurrences")); Assert.assertEquals("Failed to verify total occurrences of key " + key, totalOccurrences, ((Double)result.get("totalOccurrences")).longValue());
Assert.assertEquals("Failed to verify percents of key " + key, percentContaining, result.get("percentContaining")); Assert.assertEquals("Failed to verify percents of key " + key, percentContaining, result.get("percentContaining"));
cursor.close(); cursor.close();
} }
private void verifyKeyTypes(final String key, final DBObject result, final String[] expectedTypes) { private void verifyKeyTypes(final String key, final DBObject result, final String[] expectedTypes) {
final BasicDBList types = (BasicDBList)((DBObject) result.get("value")).get("types"); final BasicDBList types = (BasicDBList)((DBObject) result.get("value")).get("types");
Assert.assertEquals( Assert.assertEquals(
"Incorrect count of expected(" + Arrays.toString(expectedTypes) + ") and real types(" + Arrays.toString(types.toArray()) "Incorrect count of expected(" + Arrays.toString(expectedTypes) + ") and real types(" + Arrays.toString(types.toArray())
+ ") of key: " + key, expectedTypes.length, types.size()); + ") of key: " + key, expectedTypes.length, types.size());
for (final String expected : expectedTypes) { for (final String expected : expectedTypes) {
if (!types.contains(expected)) { if (!types.contains(expected)) {
Assert.fail("Type '" + expected + "' not found in real types(" + Arrays.toString(expectedTypes) + ") of key: " + key); Assert.fail("Type '" + expected + "' not found in real types(" + Arrays.toString(expectedTypes) + ") of key: " + key);
} }
} }
}
/**
* @return Direct access to variety results collection of this analysis
*/
public DBCollection getResultsCollection() {
return mongoClient.getDB(Variety.VARIETY_RESULTS_DBNAME).getCollection(getResultsCollectionName());
} }
/** private DBCollection getResultsCollection() {
* @return Standard output of mongo client with variety.js analysis script executed. return mongoClient.getDB(Variety.VARIETY_RESULTS_DBNAME).getCollection(getResultsCollectionName());
*/
public String getStdOut() {
return stdOut;
} }
/** /**
......
package com.github.variety.validator;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.util.JSON;
import org.junit.Assert;
import java.util.*;
public class JsonResultsValidator implements ResultsValidator {
private final List<VarietyEntry> entries;
private final String stdOut;
public JsonResultsValidator(final String stdOut) {
this.entries = parse(stdOut);
this.stdOut = stdOut;
}
private List<VarietyEntry> parse(final String stdOut) {
final BasicDBList parse = (BasicDBList) JSON.parse(stdOut);
final List<VarietyEntry> entries = new ArrayList<>();
for(final Object o : parse) {
final BasicDBObject obj = (BasicDBObject)o;
final String key = ((BasicDBObject)obj.get("_id")).getString("key");
final long totalOccurrences = obj.getLong("totalOccurrences");
final double percentContaining = obj.getDouble("percentContaining");
final BasicDBList typesList = (BasicDBList) ((BasicDBObject)obj.get("value")).get("types");
final HashSet<String> types = new HashSet<>(Arrays.asList(typesList.toArray(new String[typesList.size()])));
entries.add(new VarietyEntry(key, totalOccurrences, percentContaining, types));
}
return entries;
}
@Override
public void validate(final String key, final long totalOccurrences, final double percentContaining, final String... types) {
final Optional<VarietyEntry> first = entries.stream().filter(entry -> entry.getKey().equals(key)).findFirst();
if(!first.isPresent()) {
Assert.fail("Entry with key '" + key + "' not found in variety results");
}
final VarietyEntry varietyEntry = first.get();
Assert.assertEquals("Failed to verify types of key " + key, new HashSet<>(Arrays.asList(types)), varietyEntry.getTypes());
Assert.assertEquals("Failed to verify total occurrences of key " + key, totalOccurrences, varietyEntry.getTotalOccurrences());
Assert.assertEquals("Failed to verify percents of key " + key, percentContaining, varietyEntry.getPercentContaining(), 1e-15); // TODO: precision?
}
@Override
public long getResultsCount() {
return entries.size();
}
public String getStdOut() {
return stdOut;
}
private class VarietyEntry {
private final String key;
private final long totalOccurrences;
private final double percentContaining;
private final Set<String> types;
private VarietyEntry(final String key, final long totalOccurrences, final double percentContaining, final Set<String> types) {
this.key = key;
this.totalOccurrences = totalOccurrences;
this.percentContaining = percentContaining;
this.types = types;
}
private String getKey() {
return key;
}
private long getTotalOccurrences() {
return totalOccurrences;
}
private double getPercentContaining() {
return percentContaining;
}
private Set<String> getTypes() {
return types;
}
}
}
package com.github.variety.validator;
public interface ResultsValidator {
void validate(String key, long totalOccurrences, double percentContaining, String... types);
long getResultsCount();
String getStdOut();
}
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
...@@ -25,14 +25,28 @@ public class BasicAnalysisTest { ...@@ -25,14 +25,28 @@ public class BasicAnalysisTest {
variety.getSourceCollection().drop(); variety.getSourceCollection().drop();
} }
/**
* Validate correct results read from DB
*/
@Test @Test
public void verifyBasicResults() throws Exception { public void verifyBasicResultsDb() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); validate(variety.runDatabaseAnalysis());
analysis.verifyResult("_id", 5, 100, "ObjectId"); }
analysis.verifyResult("name", 5, 100, "String");
analysis.verifyResult("bio", 3, 60, "String"); /**
analysis.verifyResult("pets", 2, 40, "String", "Array"); * Validate correct results read from JSON standard output
analysis.verifyResult("someBinData", 1, 20, "BinData-old"); */
analysis.verifyResult("someWeirdLegacyKey", 1, 20, "String"); @Test
public void verifyBasicResultsJson() throws Exception {
validate(variety.runJsonAnalysis());
}
private void validate(final ResultsValidator analysis) {
analysis.validate("_id", 5, 100, "ObjectId");
analysis.validate("name", 5, 100, "String");
analysis.validate("bio", 3, 60, "String");
analysis.validate("pets", 2, 40, "String", "Array");
analysis.validate("someBinData", 1, 20, "BinData-old");
analysis.validate("someWeirdLegacyKey", 1, 20, "String");
} }
} }
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import com.mongodb.BasicDBObject; import com.mongodb.BasicDBObject;
import org.bson.types.Binary; import org.bson.types.Binary;
import org.junit.After; import org.junit.After;
...@@ -48,23 +48,23 @@ public class DatatypeRecognitionTest { ...@@ -48,23 +48,23 @@ public class DatatypeRecognitionTest {
@Test @Test
public void testDatatypeRecognition() throws Exception { public void testDatatypeRecognition() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); final ResultsValidator analysis = variety.runDatabaseAnalysis();
Assert.assertEquals(14, analysis.getResultsCollection().count()); Assert.assertEquals(14, analysis.getResultsCount());
analysis.verifyResult("_id", 1, 100, "ObjectId"); analysis.validate("_id", 1, 100, "ObjectId");
analysis.verifyResult("key_string", 1, 100, "String"); analysis.validate("key_string", 1, 100, "String");
analysis.verifyResult("key_boolean", 1, 100, "Boolean"); analysis.validate("key_boolean", 1, 100, "Boolean");
analysis.verifyResult("key_number", 1, 100, "Number"); analysis.validate("key_number", 1, 100, "Number");
analysis.verifyResult("key_date", 1, 100, "Date"); analysis.validate("key_date", 1, 100, "Date");
analysis.verifyResult("key_binData-generic", 1, 100, "BinData-generic"); analysis.validate("key_binData-generic", 1, 100, "BinData-generic");
analysis.verifyResult("key_binData-function", 1, 100, "BinData-function"); analysis.validate("key_binData-function", 1, 100, "BinData-function");
analysis.verifyResult("key_binData-old", 1, 100, "BinData-old"); analysis.validate("key_binData-old", 1, 100, "BinData-old");
analysis.verifyResult("key_binData-UUID", 1, 100, "BinData-UUID"); analysis.validate("key_binData-UUID", 1, 100, "BinData-UUID");
analysis.verifyResult("key_binData-MD5", 1, 100, "BinData-MD5"); analysis.validate("key_binData-MD5", 1, 100, "BinData-MD5");
analysis.verifyResult("key_binData-user", 1, 100, "BinData-user"); analysis.validate("key_binData-user", 1, 100, "BinData-user");
analysis.verifyResult("key_array", 1, 100, "Array"); analysis.validate("key_array", 1, 100, "Array");
analysis.verifyResult("key_object", 1, 100, "Object"); analysis.validate("key_object", 1, 100, "Object");
analysis.verifyResult("key_null", 1, 100, "null"); // TODO: why has 'null' first letter lowercase, unlike all other types? analysis.validate("key_null", 1, 100, "null"); // TODO: why has 'null' first letter lowercase, unlike all other types?
} }
} }
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
...@@ -28,10 +28,10 @@ public class LimitResultsAnalysisTest { ...@@ -28,10 +28,10 @@ public class LimitResultsAnalysisTest {
@Test @Test
public void verifyLimitedResults() throws Exception { public void verifyLimitedResults() throws Exception {
final VarietyAnalysis analysis = variety.withLimit(1).runAnalysis(); final ResultsValidator analysis = variety.withLimit(1).runDatabaseAnalysis();
analysis.verifyResult("_id", 5, 100, "ObjectId"); analysis.validate("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String"); analysis.validate("name", 5, 100, "String");
analysis.verifyResult("someBinData", 1, 20, "BinData-old"); analysis.validate("someBinData", 1, 20, "BinData-old");
} }
} }
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.mongodb.util.JSON; import com.mongodb.util.JSON;
import org.junit.After; import org.junit.After;
...@@ -29,32 +29,32 @@ public class MaxDepthAnalysisTest { ...@@ -29,32 +29,32 @@ public class MaxDepthAnalysisTest {
@Test @Test
public void testUnlimitedAnalysis() throws Exception { public void testUnlimitedAnalysis() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); final ResultsValidator analysis = variety.runDatabaseAnalysis();
Assert.assertEquals("Variety results have not correct count of entries", 8, analysis.getResultsCollection().count()); // 8 results, including '_id' and 'name' Assert.assertEquals("Variety results have not correct count of entries", 8, analysis.getResultsCount()); // 8 results, including '_id' and 'name'
analysis.verifyResult("_id", 1, EXPECTED_PERCENTS, "ObjectId"); analysis.validate("_id", 1, EXPECTED_PERCENTS, "ObjectId");
analysis.verifyResult("name", 1, EXPECTED_PERCENTS, "String"); analysis.validate("name", 1, EXPECTED_PERCENTS, "String");
analysis.verifyResult("someNestedObject", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject.a", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a.b", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject.a.b", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a.b.c", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject.a.b.c", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a.b.c.d", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject.a.b.c.d", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a.b.c.d.e", 1, EXPECTED_PERCENTS, "Number"); analysis.validate("someNestedObject.a.b.c.d.e", 1, EXPECTED_PERCENTS, "Number");
} }
@Test @Test
public void testLimitedDepthAnalysis() throws Exception { public void testLimitedDepthAnalysis() throws Exception {
final VarietyAnalysis analysis = variety.withMaxDepth(3).runAnalysis(); final ResultsValidator analysis = variety.withMaxDepth(3).runDatabaseAnalysis();
Assert.assertEquals("Variety results have not correct count of entries", 5, analysis.getResultsCollection().count()); // 5 results, including '_id' and 'name' Assert.assertEquals("Variety results have not correct count of entries", 5, analysis.getResultsCount()); // 5 results, including '_id' and 'name'
analysis.verifyResult("_id", 1, EXPECTED_PERCENTS, "ObjectId"); analysis.validate("_id", 1, EXPECTED_PERCENTS, "ObjectId");
analysis.verifyResult("name", 1, EXPECTED_PERCENTS, "String"); analysis.validate("name", 1, EXPECTED_PERCENTS, "String");
analysis.verifyResult("someNestedObject", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject.a", 1, EXPECTED_PERCENTS, "Object");
analysis.verifyResult("someNestedObject.a.b", 1, EXPECTED_PERCENTS, "Object"); analysis.validate("someNestedObject.a.b", 1, EXPECTED_PERCENTS, "Object");
} }
......
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import com.mongodb.BasicDBList;
import com.mongodb.util.JSON;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
...@@ -30,21 +28,18 @@ public class OutputFormatTest { ...@@ -30,21 +28,18 @@ public class OutputFormatTest {
@Test @Test
public void verifyJsonEntries() throws Exception { public void verifyJsonEntries() throws Exception {
final VarietyAnalysis analysis = variety final ResultsValidator analysis = variety
.withQuiet(true) // do not output any other metadata, only results .withQuiet(true) // do not output any other metadata, only results
.withFormat(Variety.FORMAT_JSON) .withFormat(Variety.FORMAT_JSON)
.runAnalysis(); .runJsonAnalysis();
// Verify, that output is parse-able json by transforming stdout to json
final BasicDBList parsed = (BasicDBList) JSON.parse(analysis.getStdOut());
// there should be seven different json results // there should be seven different json results
Assert.assertEquals(7, parsed.size()); Assert.assertEquals(7, analysis.getResultsCount());
} }
@Test @Test
public void verifyAsciiTableOutput() throws Exception { public void verifyAsciiTableOutput() throws Exception {
final VarietyAnalysis analysis = variety.withFormat(Variety.FORMAT_ASCII).runAnalysis(); final ResultsValidator analysis = variety.withFormat(Variety.FORMAT_ASCII).runDatabaseAnalysis();
// filter only lines starting with character '|' // filter only lines starting with character '|'
final String actual = Stream.of(analysis.getStdOut().split("\n")) final String actual = Stream.of(analysis.getStdOut().split("\n"))
......
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
...@@ -35,7 +35,7 @@ public class ParametersParsingTest { ...@@ -35,7 +35,7 @@ public class ParametersParsingTest {
*/ */
@Test @Test
public void verifyDefaultResultsStdout() throws Exception { public void verifyDefaultResultsStdout() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); final ResultsValidator analysis = variety.runDatabaseAnalysis();
final Map<String, String> params = getParamsMap(analysis.getStdOut()); final Map<String, String> params = getParamsMap(analysis.getStdOut());
...@@ -50,12 +50,12 @@ public class ParametersParsingTest { ...@@ -50,12 +50,12 @@ public class ParametersParsingTest {
*/ */
@Test @Test
public void verifyRestrictedResultsStdout() throws Exception { public void verifyRestrictedResultsStdout() throws Exception {
final VarietyAnalysis analysis = variety final ResultsValidator analysis = variety
.withQuery("{name:'Harry'}") .withQuery("{name:'Harry'}")
.withSort("{name:1}") .withSort("{name:1}")
.withMaxDepth(5) .withMaxDepth(5)
.withLimit(2) .withLimit(2)
.runAnalysis(); .runDatabaseAnalysis();
final Map<String, String> params = getParamsMap(analysis.getStdOut()); final Map<String, String> params = getParamsMap(analysis.getStdOut());
...@@ -72,7 +72,7 @@ public class ParametersParsingTest { ...@@ -72,7 +72,7 @@ public class ParametersParsingTest {
public void testUnknownCollectionResponse() throws Exception { public void testUnknownCollectionResponse() throws Exception {
this.variety = new Variety("test", "--unknown--"); this.variety = new Variety("test", "--unknown--");
try { try {
variety.runAnalysis(); variety.runDatabaseAnalysis();
Assert.fail("It should throw exception"); Assert.fail("It should throw exception");
} catch (final RuntimeException e) { } catch (final RuntimeException e) {
Assert.assertTrue(e.getMessage().contains("does not exist or is empty")); Assert.assertTrue(e.getMessage().contains("does not exist or is empty"));
...@@ -81,23 +81,30 @@ public class ParametersParsingTest { ...@@ -81,23 +81,30 @@ public class ParametersParsingTest {
@Test @Test
public void testDefaultOutputFormatParam() throws Exception { public void testDefaultOutputFormatParam() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); // format option not provided final ResultsValidator analysis = variety.runDatabaseAnalysis(); // format option not provided
final Map<String, String> params = getParamsMap(analysis.getStdOut()); final Map<String, String> params = getParamsMap(analysis.getStdOut());
Assert.assertEquals("ascii", params.get(Variety.PARAM_OUTPUT_FORMAT)); Assert.assertEquals("ascii", params.get(Variety.PARAM_OUTPUT_FORMAT));
} }
@Test @Test
public void testAsciiOutputFormatParam() throws Exception { public void testAsciiOutputFormatParam() throws Exception {
final VarietyAnalysis analysis = variety.withFormat(Variety.FORMAT_ASCII).runAnalysis(); final ResultsValidator analysis = variety.withFormat(Variety.FORMAT_ASCII).runDatabaseAnalysis();
final Map<String, String> params = getParamsMap(analysis.getStdOut()); final Map<String, String> params = getParamsMap(analysis.getStdOut());
Assert.assertEquals("ascii", params.get(Variety.PARAM_OUTPUT_FORMAT)); Assert.assertEquals("ascii", params.get(Variety.PARAM_OUTPUT_FORMAT));
} }
@Test @Test
public void testJsonOutputFormatParam() throws Exception { public void testPersistResultsParam() throws Exception {
final VarietyAnalysis analysis = variety.withFormat(Variety.FORMAT_JSON).runAnalysis(); final ResultsValidator analysis = variety.runDatabaseAnalysis();
final Map<String, String> params = getParamsMap(analysis.getStdOut()); final Map<String, String> params = getParamsMap(analysis.getStdOut());
Assert.assertEquals("json", params.get(Variety.PARAM_OUTPUT_FORMAT)); Assert.assertEquals("true", params.get(Variety.PARAM_PERSIST_RESULTS));
}
@Test
public void testJsonOutputFormatParam() throws Exception {
final ResultsValidator analysis = variety.withFormat(Variety.FORMAT_JSON).runJsonAnalysis();
// verify, that result is clean parsable json with 7 entries found
Assert.assertEquals(7, analysis.getResultsCount());
} }
/** /**
......
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
...@@ -24,13 +24,13 @@ public class QueryLimitedAnalysisTest { ...@@ -24,13 +24,13 @@ public class QueryLimitedAnalysisTest {
@Test @Test
public void testQueryLimitedAnalysis() throws Exception { public void testQueryLimitedAnalysis() throws Exception {
final VarietyAnalysis analysis = variety.withQuery("{someBinData:{$exists: true}}").runAnalysis(); final ResultsValidator analysis = variety.withQuery("{someBinData:{$exists: true}}").runDatabaseAnalysis();
Assert.assertEquals(3, analysis.getResultsCollection().count()); Assert.assertEquals(3, analysis.getResultsCount());
// we analyzed only the keys of objects defined by query. But total counts and percents are computed from the complete collection // we analyzed only the keys of objects defined by query. But total counts and percents are computed from the complete collection
analysis.verifyResult("_id", 5, 100, "ObjectId"); analysis.validate("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String"); analysis.validate("name", 5, 100, "String");
analysis.verifyResult("someBinData", 1, 20, "BinData-old"); analysis.validate("someBinData", 1, 20, "BinData-old");
} }
} }
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
...@@ -33,7 +33,7 @@ public class QuietOptionTest { ...@@ -33,7 +33,7 @@ public class QuietOptionTest {
*/ */
@Test @Test
public void testQuietLogs() throws Exception { public void testQuietLogs() throws Exception {
final VarietyAnalysis varietyAnalysis = variety.withQuiet(true).runAnalysis(); final ResultsValidator varietyAnalysis = variety.withQuiet(true).runDatabaseAnalysis();
Assert.assertEquals(SampleData.EXPECTED_DATA_ASCII_TABLE, varietyAnalysis.getStdOut()); Assert.assertEquals(SampleData.EXPECTED_DATA_ASCII_TABLE, varietyAnalysis.getStdOut());
} }
} }
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
...@@ -30,13 +30,13 @@ public class SortedAnalysisTest { ...@@ -30,13 +30,13 @@ public class SortedAnalysisTest {
@Test @Test
public void testSortedAnalysis() throws Exception { public void testSortedAnalysis() throws Exception {
// Sort without limit or other query should not modify results itself. Analysis is done on the same data, only in another order. // Sort without limit or other query should not modify results itself. Analysis is done on the same data, only in another order.
final VarietyAnalysis analysis = variety.withSort("{name:-1}").runAnalysis(); final ResultsValidator analysis = variety.withSort("{name:-1}").runDatabaseAnalysis();
analysis.verifyResult("_id", 5, 100, "ObjectId"); analysis.validate("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String"); analysis.validate("name", 5, 100, "String");
analysis.verifyResult("bio", 3, 60, "String"); analysis.validate("bio", 3, 60, "String");
analysis.verifyResult("pets", 2, 40, "String", "Array"); analysis.validate("pets", 2, 40, "String", "Array");
analysis.verifyResult("someBinData", 1, 20, "BinData-old"); analysis.validate("someBinData", 1, 20, "BinData-old");
analysis.verifyResult("someWeirdLegacyKey", 1, 20, "String"); analysis.validate("someWeirdLegacyKey", 1, 20, "String");
} }
...@@ -45,15 +45,15 @@ public class SortedAnalysisTest { ...@@ -45,15 +45,15 @@ public class SortedAnalysisTest {
// when sorting default SampleData by name desc, first entry becomes Tom. He is only with key 'someWeirdLegacyKey' // when sorting default SampleData by name desc, first entry becomes Tom. He is only with key 'someWeirdLegacyKey'
// Together with applying limit 1, Tom is the only result in analysis. That gives us chance to assume keys and verify // Together with applying limit 1, Tom is the only result in analysis. That gives us chance to assume keys and verify
// that ordering is correct. // that ordering is correct.
final VarietyAnalysis analysis = variety.withSort("{name:-1}").withLimit(1).runAnalysis(); final ResultsValidator analysis = variety.withSort("{name:-1}").withLimit(1).runDatabaseAnalysis();
Assert.assertEquals(5, analysis.getResultsCollection().count()); Assert.assertEquals(5, analysis.getResultsCount());
analysis.verifyResult("_id", 5, 100, "ObjectId"); analysis.validate("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String"); analysis.validate("name", 5, 100, "String");
analysis.verifyResult("bio", 3, 60, "String"); analysis.validate("bio", 3, 60, "String");
analysis.verifyResult("pets", 2, 40, "Array"); analysis.validate("pets", 2, 40, "Array");
analysis.verifyResult("someWeirdLegacyKey", 1, 20, "String"); analysis.validate("someWeirdLegacyKey", 1, 20, "String");
} }
} }
package com.github.variety.test; package com.github.variety.test;
import com.github.variety.Variety; import com.github.variety.Variety;
import com.github.variety.VarietyAnalysis; import com.github.variety.validator.ResultsValidator;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.mongodb.util.JSON; import com.mongodb.util.JSON;
import org.junit.After; import org.junit.After;
...@@ -32,17 +32,17 @@ public class UnnamedObjectsAnalysisTest { ...@@ -32,17 +32,17 @@ public class UnnamedObjectsAnalysisTest {
@Test @Test
public void testUnnamedObjects() throws Exception { public void testUnnamedObjects() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); final ResultsValidator analysis = variety.runDatabaseAnalysis();
Assert.assertEquals(6, analysis.getResultsCollection().count()); Assert.assertEquals(6, analysis.getResultsCount());
analysis.verifyResult("_id", 2, 100, "ObjectId"); analysis.validate("_id", 2, 100, "ObjectId");
analysis.verifyResult("title", 2, 100, "String"); analysis.validate("title", 2, 100, "String");
analysis.verifyResult("comments", 2, 100, "Array"); analysis.validate("comments", 2, 100, "Array");
// unnamed objects are prefixed with .XX key // unnamed objects are prefixed with .XX key
analysis.verifyResult("comments.XX.author", 2, 100, "String"); analysis.validate("comments.XX.author", 2, 100, "String");
analysis.verifyResult("comments.XX.body", 2, 100, "String"); analysis.validate("comments.XX.body", 2, 100, "String");
analysis.verifyResult("comments.XX.visible", 1, 50, "Boolean"); analysis.validate("comments.XX.visible", 1, 50, "Boolean");
} }
} }
...@@ -17,8 +17,8 @@ import java.util.regex.Pattern; ...@@ -17,8 +17,8 @@ import java.util.regex.Pattern;
*/ */
public class VersionInfoTest { public class VersionInfoTest {
public static final Pattern VARIETYJS_PATTERN = Pattern.compile("\\w+\\('(.+), released (.+)'\\).*"); private static final Pattern VARIETYJS_PATTERN = Pattern.compile("\\w+\\('(.+), released (.+)'\\).*");
public static final Pattern CHANGELOG_PATTERN = Pattern.compile("\\((.+)\\)(.+):(.*)"); private static final Pattern CHANGELOG_PATTERN = Pattern.compile("\\((.+)\\)(.+):(.*)");
private List<String> varietyLines; private List<String> varietyLines;
private List<String> changelogLines; private List<String> changelogLines;
...@@ -41,24 +41,24 @@ public class VersionInfoTest { ...@@ -41,24 +41,24 @@ public class VersionInfoTest {
getChangelogDate(changelogLines), getVarietyDate(varietyLines)); getChangelogDate(changelogLines), getVarietyDate(varietyLines));
} }
private String getVarietyVersion(List<String> variety) { private String getVarietyVersion(final List<String> variety) {
return getVarietyPatternGroup(variety, 1); return getVarietyPatternGroup(variety, 1);
} }
private String getVarietyDate(List<String> variety) { private String getVarietyDate(final List<String> variety) {
return getVarietyPatternGroup(variety, 2); return getVarietyPatternGroup(variety, 2);
} }
private String getChangelogDate(List<String> changelog) { private String getChangelogDate(final List<String> changelog) {
return getChangelogPatternGroup(changelog, 1); return getChangelogPatternGroup(changelog, 1);
} }
private String getChangelogVersion(List<String> changelog) { private String getChangelogVersion(final List<String> changelog) {
return getChangelogPatternGroup(changelog, 2); return getChangelogPatternGroup(changelog, 2);
} }
private String getVarietyPatternGroup(final List<String> variety, final int group) { private String getVarietyPatternGroup(final List<String> variety, final int group) {
for (String line : variety) { for (final String line : variety) {
final Matcher matcher = VARIETYJS_PATTERN.matcher(line); final Matcher matcher = VARIETYJS_PATTERN.matcher(line);
if (matcher.matches()) { if (matcher.matches()) {
return matcher.group(group); return matcher.group(group);
...@@ -75,7 +75,7 @@ public class VersionInfoTest { ...@@ -75,7 +75,7 @@ public class VersionInfoTest {
return matcher.group(group).trim(); return matcher.group(group).trim();
} }
private Path getFile(String filename) { private Path getFile(final String filename) {
// on linux could it be for example /{path_to_project}/variety/test/target/test-classes // on linux could it be for example /{path_to_project}/variety/test/target/test-classes
final String testClassesPath = this.getClass().getResource("/").getFile(); final String testClassesPath = this.getClass().getResource("/").getFile();
......
...@@ -72,6 +72,8 @@ log('Using sort of ' + tojson(sort)); ...@@ -72,6 +72,8 @@ log('Using sort of ' + tojson(sort));
if (typeof outputFormat === 'undefined') { var outputFormat = 'ascii'; } if (typeof outputFormat === 'undefined') { var outputFormat = 'ascii'; }
log('Using outputFormat of ' + outputFormat); log('Using outputFormat of ' + outputFormat);
if (typeof persistResults === 'undefined') { var persistResults = false; }
log('Using persistResults of ' + persistResults);
varietyTypeOf = function(thing) { varietyTypeOf = function(thing) {
if (typeof thing === 'undefined') { throw 'varietyTypeOf() requires an argument'; } if (typeof thing === 'undefined') { throw 'varietyTypeOf() requires an argument'; }
...@@ -173,7 +175,7 @@ db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) { ...@@ -173,7 +175,7 @@ db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) {
}); });
var varietyResults = {}; var varietyResults = [];
//now convert the interimResults into the proper format //now convert the interimResults into the proper format
for(var key in interimResults){ for(var key in interimResults){
var entry = interimResults[key]; var entry = interimResults[key];
...@@ -182,55 +184,59 @@ for(var key in interimResults){ ...@@ -182,55 +184,59 @@ for(var key in interimResults){
newEntry['value'] = {'types':Object.keys(entry['types'])}; newEntry['value'] = {'types':Object.keys(entry['types'])};
newEntry['totalOccurrences'] = entry['totalOccurrences']; newEntry['totalOccurrences'] = entry['totalOccurrences'];
newEntry['percentContaining'] = entry['totalOccurrences']*100/limit; newEntry['percentContaining'] = entry['totalOccurrences']*100/limit;
varietyResults[key] = newEntry; varietyResults.push(newEntry);
}
var resultsDB = db.getMongo().getDB('varietyResults');
var resultsCollectionName = collection + 'Keys';
// replace results collection
log('creating results collection: '+resultsCollectionName);
resultsDB[resultsCollectionName].drop();
for(var result in varietyResults) {
resultsDB[resultsCollectionName].insert(varietyResults[result]);
} }
var numDocuments = db[collection].count(); var numDocuments = db[collection].count();
log('removing leaf arrays in results collection, and getting percentages'); // We throw away keys which end in an array index, since they are not useful
resultsDB[resultsCollectionName].find({}).forEach(function(key) { // for our analysis. (We still keep the key of their parent array, though.) -JC
var keyName = key._id.key; var filter = function(item) {
return !item._id.key.match(/\.XX$/);
// We throw away keys which end in an array index, since they are not useful };
// for our analysis. (We still keep the key of their parent array, though.) -JC
if(keyName.match(/\.XX$/)) {
resultsDB[resultsCollectionName].remove({ '_id' : key._id});
return;
}
var map = function(item) {
var keyName = item._id.key;
if(keyName.match(/\.XX/)) { if(keyName.match(/\.XX/)) {
// exists query checks for embedded values for an array // exists query checks for embedded values for an array
// ie. match {arr:[{x:1}]} with {'arr.x':{$exists:true}} // ie. match {arr:[{x:1}]} with {'arr.x':{$exists:true}}
// just need to pull out .XX in this case // just need to pull out .XX in this case
keyName = keyName.replace(/.XX/g,''); keyName = keyName.replace(/.XX/g,'');
} }
// we don't need to set it if limit isn't being used. (it's set above.) // we don't need to set it if limit isn't being used. (it's set above.)
if(limit < numDocuments) { if(limit < numDocuments) {
var existsQuery = {}; var existsQuery = {};
existsQuery[keyName] = {$exists: true}; existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].count(existsQuery); item.totalOccurrences = db[collection].count(existsQuery);
} }
key.percentContaining = (key.totalOccurrences / numDocuments) * 100.0; item.percentContaining = (item.totalOccurrences / numDocuments) * 100.0;
resultsDB[resultsCollectionName].save(key); return item;
}); };
// sort desc by totalOccurrences or by key asc if occurrences equal
var comparator = function(a, b) {
var countsDiff = b.totalOccurrences - a.totalOccurrences;
return countsDiff !== 0 ? countsDiff : a._id.key.localeCompare(b._id.key);
};
var sortedKeys = resultsDB[resultsCollectionName].find({}).sort({totalOccurrences: -1, '_id.key': 1}); // occurrences count & alphabetical order log('removing leaf arrays in results collection, and getting percentages');
varietyResults = varietyResults.filter(filter).map(map).sort(comparator);
if(persistResults) {
var resultsDB = db.getMongo().getDB('varietyResults');
var resultsCollectionName = collection + 'Keys';
// replace results collection
log('creating results collection: '+resultsCollectionName);
resultsDB[resultsCollectionName].drop();
resultsDB[resultsCollectionName].insert(varietyResults);
}
if(outputFormat === 'json') { if(outputFormat === 'json') {
printjson(sortedKeys.toArray()); // valid formatted json output, compressed variant is printjsononeline() printjson(varietyResults); // valid formatted json output, compressed variant is printjsononeline()
} else { // output nice ascii table with results } else { // output nice ascii table with results
var table = [['key', 'types', 'occurrences', 'percents'], ['', '', '', '']]; // header + delimiter rows var table = [['key', 'types', 'occurrences', 'percents'], ['', '', '', '']]; // header + delimiter rows
sortedKeys.forEach(function(key) { varietyResults.forEach(function(key) {
table.push([key._id.key, key.value.types.toString(), key.totalOccurrences.toString(), key.percentContaining.toString()]); table.push([key._id.key, key.value.types.toString(), key.totalOccurrences.toString(), key.percentContaining.toString()]);
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment