Commit 5c9b8bb4 by James Cropcho

Merge pull request #9 from wfreeman/master

Adding maxDepth option. And other stuff, see below.
parents a8e6ed88 bb282a5e
...@@ -57,6 +57,35 @@ We are only examining the last document here ("limit = 1"). It belongs to Genevi ...@@ -57,6 +57,35 @@ We are only examining the last document here ("limit = 1"). It belongs to Genevi
But how can totalOccurrences still reach 4? "limit" specifies how many documents to search for keys. Then, the tool calculates totalOccurrences and percentContaining from _all_ the collection's documents, even those outside the "limit". This tradeoff is meant to give the most bang for our buck, when using "limit" and learning about a collection. But how can totalOccurrences still reach 4? "limit" specifies how many documents to search for keys. Then, the tool calculates totalOccurrences and percentContaining from _all_ the collection's documents, even those outside the "limit". This tradeoff is meant to give the most bang for our buck, when using "limit" and learning about a collection.
### Analyze Documents to a Maximum Depth
Perhaps you have a potentially very deep nested object structure, and you don't want to see more than a few levels deep in the analysis.
One can apply a "maxDepth" constraint, which limits the depth variety will recursively search to find new objects.
db.users.insert({name:"Walter", someNestedObject:{a:{b:{c:{d:{e:1}}}}}});
The default will traverse all the way to the bottom of that structure:
$ mongo test --eval "var collection = 'users'" variety.js
...
{ "_id" : { "key" : "someNestedObject" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a.b" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a.b.c" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a.b.c.d" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a.b.c.d.e" }, "value" : { "types" : [ "Number" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
$ mongo test --eval "var collection = 'users', maxDepth = 3" variety.js
...
{ "_id" : { "key" : "someNestedObject" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
{ "_id" : { "key" : "someNestedObject.a.b" }, "value" : { "types" : [ "Object" ] }, "totalOccurrences" : 1, "percentContaining" : 16.66666666666666 }
As you can see, variety only traversed three levels deep.
##### "But my dad told me MongoDB is a schemaless database!" ##### ##### "But my dad told me MongoDB is a schemaless database!" #####
First of all, your father is a great guy. Moving on... First of all, your father is a great guy. Moving on...
......
...@@ -12,13 +12,45 @@ Released by Maypop Inc, © 2012, under the MIT License. */ ...@@ -12,13 +12,45 @@ Released by Maypop Inc, © 2012, under the MIT License. */
print("Variety: A MongoDB Schema Analyzer") print("Variety: A MongoDB Schema Analyzer")
print("Version 1.0.1, released 25 May 2012") print("Version 1.0.1, released 25 May 2012")
var dbs = new Array();
var emptyDbs = new Array();
db.adminCommand('listDatabases').databases.forEach(function(d){
if(db.getSisterDB(d.name).getCollectionNames().length > 0) {
dbs.push(d.name);
}
if(db.getSisterDB(d.name).getCollectionNames().length == 0) {
emptyDbs.push(d.name);
}
});
if (emptyDbs.indexOf(db.getName()) != -1) {
throw "The database specified ("+ db +") is empty.\n"+
"Possible database options are: " + dbs.join(", ") + ".";
}
if (dbs.indexOf(db.getName()) == -1) {
throw "The database specified ("+ db +") does not exist.\n"+
"Possible database options are: " + dbs.join(", ") + ".";
}
var collNames = db.getCollectionNames().join(", ");
if (typeof collection === "undefined") { if (typeof collection === "undefined") {
throw "You have to supply a 'collection' variable, à la --eval 'var collection = \"animals\"'. Please see https://github.com/JamesCropcho/variety for details."; throw "You have to supply a 'collection' variable, à la --eval 'var collection = \"animals\"'.\n"+
"Possible collection options for database specified: " + collNames + ".\n"+
"Please see https://github.com/JamesCropcho/variety for details.";
}
if (db[collection].count() == 0) {
throw "The collection specified (" + collection + ") in the database specified ("+ db +") does not exist or is empty.\n"+
"Possible collection options for database specified: " + collNames + ".";
} }
if (typeof limit === "undefined") { var limit = db[collection].count(); } if (typeof limit === "undefined") { var limit = db[collection].count(); }
print("Using limit of " + limit); print("Using limit of " + limit);
if (typeof maxDepth === "undefined") { var maxDepth = 99; }
print("Using maxDepth of " + maxDepth);
varietyCanHaveChildren = function (v) { varietyCanHaveChildren = function (v) {
var isArray = v && var isArray = v &&
typeof v === 'object' && typeof v === 'object' &&
...@@ -32,7 +64,7 @@ varietyCanHaveChildren = function (v) { ...@@ -32,7 +64,7 @@ varietyCanHaveChildren = function (v) {
} }
db.system.js.save( { _id : "varietyCanHaveChildren", value : varietyCanHaveChildren } ); db.system.js.save( { _id : "varietyCanHaveChildren", value : varietyCanHaveChildren } );
varietyMapRecursive = function(parentKey, keys) { varietyMapRecursive = function(parentKey, keys, level) {
for (var key in keys) { for (var key in keys) {
var value = keys[key]; var value = keys[key];
...@@ -40,8 +72,8 @@ varietyMapRecursive = function(parentKey, keys) { ...@@ -40,8 +72,8 @@ varietyMapRecursive = function(parentKey, keys) {
emit({key : key}, {type: varietyTypeOf(value)}); emit({key : key}, {type: varietyTypeOf(value)});
if (varietyCanHaveChildren(value)) { if (level < maxDepth - 1 && varietyCanHaveChildren(value)) {
varietyMapRecursive(key, value); varietyMapRecursive(key, value, level + 1);
} }
} }
} }
...@@ -69,7 +101,14 @@ varietyTypeOf = function(thing) { ...@@ -69,7 +101,14 @@ varietyTypeOf = function(thing) {
return "ObjectId"; return "ObjectId";
} }
else if (thing instanceof BinData) { else if (thing instanceof BinData) {
return "BinData"; var binDataTypes = {};
binDataTypes[0x00] = "generic";
binDataTypes[0x01] = "function";
binDataTypes[0x02] = "old";
binDataTypes[0x03] = "UUID";
binDataTypes[0x05] = "MD5";
binDataTypes[0x80] = "user";
return "BinData-" + binDataTypes[thing.subtype()];
} }
else { else {
return "Object"; return "Object";
...@@ -90,8 +129,8 @@ map = function() { ...@@ -90,8 +129,8 @@ map = function() {
emit({key : key}, {type: varietyTypeOf(value)}); emit({key : key}, {type: varietyTypeOf(value)});
if (varietyCanHaveChildren(value)) { if (varietyCanHaveChildren(value) && maxDepth > 1) {
varietyMapRecursive(key, value); varietyMapRecursive(key, value, 1);
} }
} }
} }
...@@ -117,7 +156,7 @@ db[collection].mapReduce(map, reduce, { ...@@ -117,7 +156,7 @@ db[collection].mapReduce(map, reduce, {
db : "varietyResults"}, db : "varietyResults"},
limit : limit, limit : limit,
sort : {_id: -1}, sort : {_id: -1},
scope : { limit : limit }}); scope : { limit : limit, maxDepth:maxDepth }});
var resultsDB = db.getMongo().getDB("varietyResults"); var resultsDB = db.getMongo().getDB("varietyResults");
...@@ -133,16 +172,17 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) { ...@@ -133,16 +172,17 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) {
return; return;
} }
if(!(keyName.match(/\.XX/) && !keyName.match(/\.XX$/))) { if(keyName.match(/\.XX/)) {
// i.e. "Unless the key's value is an array which contains arrays" -JC // exists query checks for embedded values for an array
// ...we do not support totalOccurrences for these keys because it is // ie. match {arr:[{x:1}]} with {"arr.x":{$exists:true}}
// a bit too tricky for a 'version 1'. Perhaps we'll support in the future. -JC // just need to pull out .XX in this case
var existsQuery = {}; keyName = keyName.replace(/.XX/g,"");
existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].count(existsQuery);
key.percentContaining = (key.totalOccurrences / numDocuments) * 100;
} }
var existsQuery = {};
existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].count(existsQuery);
key.percentContaining = (key.totalOccurrences / numDocuments) * 100;
resultsDB[resultsCollectionName].save(key); resultsDB[resultsCollectionName].save(key);
}); });
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment