Commit 95d2e343 by Wes Freeman

Merge pull request #45 from todvora/master

Fixed maxDepth inconsistency, fixed total counts and percents
parents 97a34e7f e1aafb33
......@@ -32,7 +32,6 @@ public class LimitResultsAnalysisTest {
analysis.verifyResult("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String");
// TODO: there is only one document with 'someBinData'. Why variety returns 5/100% instead of 1/20% ?
// FIXME: analysis.verifyResult("someBinData", 1, 20, "BinData-old");
analysis.verifyResult("someBinData", 1, 20, "BinData-old");
}
}
......@@ -11,7 +11,7 @@ import org.junit.Test;
public class MaxDepthAnalysisTest {
private static final double EXPECTED_PERCENTS = 100; //TODO: why documentation mentions be 16.66666666666666, when there is only one document at all ?
private static final double EXPECTED_PERCENTS = 100;
private Variety variety;
@Before
......@@ -47,8 +47,7 @@ public class MaxDepthAnalysisTest {
public void testLimitedDepthAnalysis() throws Exception {
final VarietyAnalysis analysis = variety.withMaxDepth(3).runAnalysis();
// TODO: depth 3 means 'someNestedObject.a.b' or 'someNestedObject.a.b.c'? Documentation describes the first variant, variety counts also second.
// FIXME: Assert.assertEquals("Variety results have not correct count of entries", 5, analysis.getResultsCollection().count()); // 5 results, including '_id' and 'name'
Assert.assertEquals("Variety results have not correct count of entries", 5, analysis.getResultsCollection().count()); // 5 results, including '_id' and 'name'
analysis.verifyResult("_id", 1, EXPECTED_PERCENTS, "ObjectId");
analysis.verifyResult("name", 1, EXPECTED_PERCENTS, "String");
......
......@@ -27,9 +27,9 @@ public class QueryLimitedAnalysisTest {
final VarietyAnalysis analysis = variety.withQuery("{someBinData:{$exists: true}}").runAnalysis();
Assert.assertEquals(3, analysis.getResultsCollection().count());
// TODO: are those percentContaining numbers correct? Should percents be limited to all data or query data?
analysis.verifyResult("_id", 1, 20, "ObjectId");
analysis.verifyResult("name", 1, 20, "String");
// we analyzed only the keys of objects defined by query. But total counts and percents are computed from the complete collection
analysis.verifyResult("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String");
analysis.verifyResult("someBinData", 1, 20, "BinData-old");
}
......
......@@ -49,14 +49,11 @@ public class SortedAnalysisTest {
Assert.assertEquals(5, analysis.getResultsCollection().count());
// TODO: are those percentContaining numbers correct? Should percents be limited to all data or query data?
// Why total counts are always 5, when 'someWeirdLegacyKey' has only one object?
// Keys and types are correct, total count and percents seems not right.
analysis.verifyResult("_id", 5, 100, "ObjectId");
analysis.verifyResult("name", 5, 100, "String");
analysis.verifyResult("bio", 5, 100, "String");
analysis.verifyResult("pets", 5, 100, "Array");
analysis.verifyResult("someWeirdLegacyKey", 5, 100, "String");
analysis.verifyResult("bio", 3, 60, "String");
analysis.verifyResult("pets", 2, 40, "Array");
analysis.verifyResult("someWeirdLegacyKey", 1, 20, "String");
}
}
......@@ -5,6 +5,7 @@ import com.github.variety.VarietyAnalysis;
import com.mongodb.DBObject;
import com.mongodb.util.JSON;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
......@@ -33,20 +34,15 @@ public class UnnamedObjectsAnalysisTest {
public void testUnnamedObjects() throws Exception {
final VarietyAnalysis analysis = variety.runAnalysis();
Assert.assertEquals(6, analysis.getResultsCollection().count());
analysis.verifyResult("_id", 2, 100, "ObjectId");
analysis.verifyResult("title", 2, 100, "String");
analysis.verifyResult("comments", 2, 100, "Array");
// TODO: current version of variety is not able to handle unnamed inside objects. Earlier they were marked with XX. key prefix.
// Now the unnamed object are skipped and not analysed at all. Example of earlier version results can be seen
// in issue https://github.com/variety/variety/issues/29
// There should be 6 different keys: _id, title, comments and three from anonymous objects: comments.XX.author, comments.XX.body, comments.XX.visible
// FIXME: Assert.assertEquals(6, analysis.getResultsCollection().count());
// FIXME: analysis.verifyResult("comments.XX.author", 2, 100, "String");
// FIXME: analysis.verifyResult("comments.XX.body", 2, 100, "String");
// FIXME: analysis.verifyResult("comments.XX.visible", 1, 50, "Boolean");
// unnamed objects are prefixed with .XX key
analysis.verifyResult("comments.XX.author", 2, 100, "String");
analysis.verifyResult("comments.XX.body", 2, 100, "String");
analysis.verifyResult("comments.XX.visible", 1, 50, "Boolean");
}
}
......@@ -128,7 +128,7 @@ function serializeDoc(doc, maxDepth){
//if(typeof value != 'object')
result[parentKey+key] = value;
//it's an object, recurse...only if we haven't reached max depth
if(isHash(value) && (maxDepth > 0)) {
if(isHash(value) && (maxDepth > 1)) {
serialize(value, parentKey+key+'.',maxDepth-1);
}
}
......@@ -209,7 +209,7 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) {
if(limit < numDocuments) {
var existsQuery = {};
existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].find(query).count(existsQuery);
key.totalOccurrences = db[collection].count(existsQuery);
}
key.percentContaining = (key.totalOccurrences / numDocuments) * 100.0;
resultsDB[resultsCollectionName].save(key);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment