Commit 95d2e343 by Wes Freeman

Merge pull request #45 from todvora/master

Fixed maxDepth inconsistency, fixed total counts and percents
parents 97a34e7f e1aafb33
...@@ -32,7 +32,6 @@ public class LimitResultsAnalysisTest { ...@@ -32,7 +32,6 @@ public class LimitResultsAnalysisTest {
analysis.verifyResult("_id", 5, 100, "ObjectId"); analysis.verifyResult("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String"); analysis.verifyResult("name", 5, 100, "String");
// TODO: there is only one document with 'someBinData'. Why variety returns 5/100% instead of 1/20% ? analysis.verifyResult("someBinData", 1, 20, "BinData-old");
// FIXME: analysis.verifyResult("someBinData", 1, 20, "BinData-old");
} }
} }
...@@ -11,7 +11,7 @@ import org.junit.Test; ...@@ -11,7 +11,7 @@ import org.junit.Test;
public class MaxDepthAnalysisTest { public class MaxDepthAnalysisTest {
private static final double EXPECTED_PERCENTS = 100; //TODO: why documentation mentions be 16.66666666666666, when there is only one document at all ? private static final double EXPECTED_PERCENTS = 100;
private Variety variety; private Variety variety;
@Before @Before
...@@ -47,8 +47,7 @@ public class MaxDepthAnalysisTest { ...@@ -47,8 +47,7 @@ public class MaxDepthAnalysisTest {
public void testLimitedDepthAnalysis() throws Exception { public void testLimitedDepthAnalysis() throws Exception {
final VarietyAnalysis analysis = variety.withMaxDepth(3).runAnalysis(); final VarietyAnalysis analysis = variety.withMaxDepth(3).runAnalysis();
// TODO: depth 3 means 'someNestedObject.a.b' or 'someNestedObject.a.b.c'? Documentation describes the first variant, variety counts also second. Assert.assertEquals("Variety results have not correct count of entries", 5, analysis.getResultsCollection().count()); // 5 results, including '_id' and 'name'
// FIXME: Assert.assertEquals("Variety results have not correct count of entries", 5, analysis.getResultsCollection().count()); // 5 results, including '_id' and 'name'
analysis.verifyResult("_id", 1, EXPECTED_PERCENTS, "ObjectId"); analysis.verifyResult("_id", 1, EXPECTED_PERCENTS, "ObjectId");
analysis.verifyResult("name", 1, EXPECTED_PERCENTS, "String"); analysis.verifyResult("name", 1, EXPECTED_PERCENTS, "String");
......
...@@ -27,9 +27,9 @@ public class QueryLimitedAnalysisTest { ...@@ -27,9 +27,9 @@ public class QueryLimitedAnalysisTest {
final VarietyAnalysis analysis = variety.withQuery("{someBinData:{$exists: true}}").runAnalysis(); final VarietyAnalysis analysis = variety.withQuery("{someBinData:{$exists: true}}").runAnalysis();
Assert.assertEquals(3, analysis.getResultsCollection().count()); Assert.assertEquals(3, analysis.getResultsCollection().count());
// TODO: are those percentContaining numbers correct? Should percents be limited to all data or query data? // we analyzed only the keys of objects defined by query. But total counts and percents are computed from the complete collection
analysis.verifyResult("_id", 1, 20, "ObjectId"); analysis.verifyResult("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 1, 20, "String"); analysis.verifyResult("name", 5, 100, "String");
analysis.verifyResult("someBinData", 1, 20, "BinData-old"); analysis.verifyResult("someBinData", 1, 20, "BinData-old");
} }
......
...@@ -49,14 +49,11 @@ public class SortedAnalysisTest { ...@@ -49,14 +49,11 @@ public class SortedAnalysisTest {
Assert.assertEquals(5, analysis.getResultsCollection().count()); Assert.assertEquals(5, analysis.getResultsCollection().count());
// TODO: are those percentContaining numbers correct? Should percents be limited to all data or query data?
// Why total counts are always 5, when 'someWeirdLegacyKey' has only one object?
// Keys and types are correct, total count and percents seems not right.
analysis.verifyResult("_id", 5, 100, "ObjectId"); analysis.verifyResult("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String"); analysis.verifyResult("name", 5, 100, "String");
analysis.verifyResult("bio", 5, 100, "String"); analysis.verifyResult("bio", 3, 60, "String");
analysis.verifyResult("pets", 5, 100, "Array"); analysis.verifyResult("pets", 2, 40, "Array");
analysis.verifyResult("someWeirdLegacyKey", 5, 100, "String"); analysis.verifyResult("someWeirdLegacyKey", 1, 20, "String");
} }
} }
...@@ -5,6 +5,7 @@ import com.github.variety.VarietyAnalysis; ...@@ -5,6 +5,7 @@ import com.github.variety.VarietyAnalysis;
import com.mongodb.DBObject; import com.mongodb.DBObject;
import com.mongodb.util.JSON; import com.mongodb.util.JSON;
import org.junit.After; import org.junit.After;
import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
...@@ -33,20 +34,15 @@ public class UnnamedObjectsAnalysisTest { ...@@ -33,20 +34,15 @@ public class UnnamedObjectsAnalysisTest {
public void testUnnamedObjects() throws Exception { public void testUnnamedObjects() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis(); final VarietyAnalysis analysis = variety.runAnalysis();
Assert.assertEquals(6, analysis.getResultsCollection().count());
analysis.verifyResult("_id", 2, 100, "ObjectId"); analysis.verifyResult("_id", 2, 100, "ObjectId");
analysis.verifyResult("title", 2, 100, "String"); analysis.verifyResult("title", 2, 100, "String");
analysis.verifyResult("comments", 2, 100, "Array"); analysis.verifyResult("comments", 2, 100, "Array");
// unnamed objects are prefixed with .XX key
// TODO: current version of variety is not able to handle unnamed inside objects. Earlier they were marked with XX. key prefix. analysis.verifyResult("comments.XX.author", 2, 100, "String");
// Now the unnamed object are skipped and not analysed at all. Example of earlier version results can be seen analysis.verifyResult("comments.XX.body", 2, 100, "String");
// in issue https://github.com/variety/variety/issues/29 analysis.verifyResult("comments.XX.visible", 1, 50, "Boolean");
// There should be 6 different keys: _id, title, comments and three from anonymous objects: comments.XX.author, comments.XX.body, comments.XX.visible
// FIXME: Assert.assertEquals(6, analysis.getResultsCollection().count());
// FIXME: analysis.verifyResult("comments.XX.author", 2, 100, "String");
// FIXME: analysis.verifyResult("comments.XX.body", 2, 100, "String");
// FIXME: analysis.verifyResult("comments.XX.visible", 1, 50, "Boolean");
} }
} }
...@@ -128,7 +128,7 @@ function serializeDoc(doc, maxDepth){ ...@@ -128,7 +128,7 @@ function serializeDoc(doc, maxDepth){
//if(typeof value != 'object') //if(typeof value != 'object')
result[parentKey+key] = value; result[parentKey+key] = value;
//it's an object, recurse...only if we haven't reached max depth //it's an object, recurse...only if we haven't reached max depth
if(isHash(value) && (maxDepth > 0)) { if(isHash(value) && (maxDepth > 1)) {
serialize(value, parentKey+key+'.',maxDepth-1); serialize(value, parentKey+key+'.',maxDepth-1);
} }
} }
...@@ -209,7 +209,7 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) { ...@@ -209,7 +209,7 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) {
if(limit < numDocuments) { if(limit < numDocuments) {
var existsQuery = {}; var existsQuery = {};
existsQuery[keyName] = {$exists: true}; existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].find(query).count(existsQuery); key.totalOccurrences = db[collection].count(existsQuery);
} }
key.percentContaining = (key.totalOccurrences / numDocuments) * 100.0; key.percentContaining = (key.totalOccurrences / numDocuments) * 100.0;
resultsDB[resultsCollectionName].save(key); resultsDB[resultsCollectionName].save(key);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment