Commit a3e9f980 by Wes Freeman

Merge pull request #24 from rugbyhead/master

Ability to pass a query to filter documents before analysis
parents 3b74a2d3 8f6f3c44
...@@ -86,6 +86,14 @@ The default will traverse all the way to the bottom of that structure: ...@@ -86,6 +86,14 @@ The default will traverse all the way to the bottom of that structure:
As you can see, variety only traversed three levels deep. As you can see, variety only traversed three levels deep.
### Analyze a subset of Documents ###
Perhaps you have a large collection, or you only care about some subset of the documents.
One can apply a "query" contraint, which takes a standard Mongo query object, to filter the set of documents required before analysis.
$ mongo test --eval "var collection = 'users', query = {'caredAbout':true}" variety.js
##### "But my dad told me MongoDB is a schemaless database!" ##### ##### "But my dad told me MongoDB is a schemaless database!" #####
First of all, your father is a great guy. Moving on... First of all, your father is a great guy. Moving on...
......
...@@ -51,7 +51,10 @@ if (db[collection].count() == 0) { ...@@ -51,7 +51,10 @@ if (db[collection].count() == 0) {
"Possible collection options for database specified: " + collNames + "."; "Possible collection options for database specified: " + collNames + ".";
} }
if (typeof limit === "undefined") { var limit = db[collection].count(); } if (typeof query === "undefined") { var query = {}; }
print("Using query of " + query.toSource());
if (typeof limit === "undefined") { var limit = db[collection].find(query).count(); }
print("Using limit of " + limit); print("Using limit of " + limit);
if (typeof maxDepth === "undefined") { var maxDepth = 99; } if (typeof maxDepth === "undefined") { var maxDepth = 99; }
...@@ -182,7 +185,7 @@ var addVarietyResults = function(result) { ...@@ -182,7 +185,7 @@ var addVarietyResults = function(result) {
} }
// main cursor // main cursor
db[collection].find().sort({_id: -1}).limit(limit).forEach(function(obj) { db[collection].find(query).sort({_id: -1}).limit(limit).forEach(function(obj) {
var recordResult = {}; var recordResult = {};
for (var key in obj) { for (var key in obj) {
if(obj.hasOwnProperty(key)) { if(obj.hasOwnProperty(key)) {
...@@ -229,7 +232,7 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) { ...@@ -229,7 +232,7 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) {
if(limit < numDocuments) { if(limit < numDocuments) {
var existsQuery = {}; var existsQuery = {};
existsQuery[keyName] = {$exists: true}; existsQuery[keyName] = {$exists: true};
key.totalOccurrences = db[collection].count(existsQuery); key.totalOccurrences = db[collection].find(query).count(existsQuery);
} }
key.percentContaining = (key.totalOccurrences / numDocuments) * 100.0; key.percentContaining = (key.totalOccurrences / numDocuments) * 100.0;
resultsDB[resultsCollectionName].save(key); resultsDB[resultsCollectionName].save(key);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment