Commit 18ec4c82 by Tomas Dvorak

Merge pull request #104 from objectrocket/master

Enumerate type counts for multiple types; enable secondary reads
parents 29d5e3ae 2313e1c2
......@@ -7,7 +7,7 @@ This lightweight tool helps you get a sense of your application's schema, as wel
_“I happen to slowly be falling in love with Variety! It is actually one of the most useful tools to get a sense for a messy/unknown data set, and I have put it in a few of our exercises at Zipfian Academy.”_
Jon Dinu
Jon Dinu
_Co-founder of [Zipfian Academy](http://www.zipfianacademy.com/)_
***
......@@ -28,23 +28,23 @@ So, let's see what we've got here:
$ mongo test --eval "var collection = 'users'" variety.js
+------------------------------------------------------------+
| key | types | occurrences | percents |
| ------------------ | ------------ | ----------- | -------- |
| _id | ObjectId | 5 | 100.0 |
| name | String | 5 | 100.0 |
| bio | String | 3 | 60.0 |
| birthday | String | 2 | 40.0 |
| pets | Array,String | 2 | 40.0 |
| someBinData | BinData-old | 1 | 20.0 |
| someWeirdLegacyKey | String | 1 | 20.0 |
+------------------------------------------------------------+
+------------------------------------------------------------------+
| key | types | occurrences | percents |
| ------------------ | ------------ | ----------- | -------- |
| _id | ObjectId | 5 | 100.0 |
| name | String | 5 | 100.0 |
| bio | String | 3 | 60.0 |
| birthday | String | 2 | 40.0 |
| pets | Array(4),String(1) | 5 | 40.0 |
| someBinData | BinData-old | 1 | 20.0 |
| someWeirdLegacyKey | String | 1 | 20.0 |
+------------------------------------------------------------------+
_("test" is the database containing the collection we are analyzing.)_
Hmm. Looks like everybody has a "name" and "_id". Most, but not all have a "bio".
Interestingly, it looks like "pets" can be either an array or a string. Will this cause any problems in the application, I wonder?
Interestingly, it looks like "pets" can be either an array or a string, but there are more arrays than strings. Will this cause any problems in the application, I wonder?
Seems like the first document created has a weird legacy key—those damn fools who built the prototype didn't clean up after themselves. If there were a thousand such early documents, I might cross-reference the codebase to confirm they are no longer used, and then delete them all. That way they'll not confuse any future developers.
......@@ -148,6 +148,12 @@ Variety can also read that option and mute unnecessary output. This is useful in
$ mongo test --quiet --eval "var collection = 'users', sort = { updated_at : -1 }" variety.js
#### Secondary Reads ####
Analyzing a large collection on a busy replica set primary could take a lot longer than if you read from a secondary. To do so, we have to tell MongoDB it's okay to perform secondary reads
by setting the ```slaveOk``` property to ```true```:
$ mongo secondary.replicaset.member:31337/somedb --eval "var collection = 'users', slaveOk = true" variety.js
### Save Results in MongoDB For Future Use ###
By default, Variety prints results only to standard output and does not store them in MongoDB itself. If you want to persist them automatically in database for later usage, you can set the parameter ```persistResults```.
Variety then stores result documents in database ```varietyResults``` and the collection name is derived from the source collection's name.
......
......@@ -23,6 +23,12 @@ log('Version 1.5.0, released 14 May 2015');
var dbs = [];
var emptyDbs = [];
if (typeof slaveOk !== 'undefined') {
if (slaveOk === true) {
db.getMongo().setSlaveOk();
}
}
var knownDatabases = db.adminCommand('listDatabases').databases;
if(typeof knownDatabases !== 'undefined') { // not authorized user receives error response (json) without databases key
knownDatabases.forEach(function(d){
......@@ -214,23 +220,33 @@ var mergeDocument = function(docResult, interimResults) {
for (var key in docResult) {
if(key in interimResults) {
var existing = interimResults[key];
for(var type in docResult[key]) {
existing.types[type] = true;
if (type in existing.types) {
existing.types[type] = existing.types[type] + 1;
} else {
existing.types[type] = 1;
}
}
existing.totalOccurrences = existing.totalOccurrences + 1;
} else {
interimResults[key] = {'types':docResult[key],'totalOccurrences':1};
var types = {};
for (var newType in docResult[key]) {
types[newType] = 1;
}
interimResults[key] = {'types': types,'totalOccurrences':1};
}
}
};
var convertResults = function(interimResults, documentsCount) {
var getKeys = function(obj) {
var keys = [];
var keys = {};
for(var key in obj) {
keys.push(key);
keys[key] = obj[key];
}
return keys.sort();
return keys;
//return keys.sort();
};
var varietyResults = [];
//now convert the interimResults into the proper format
......@@ -301,7 +317,18 @@ var createAsciiTable = function(results) {
var maxDigits = varietyResults.map(function(value){return significantDigits(value.percentContaining);}).reduce(function(acc,val){return acc>val?acc:val;});
var rows = results.map(function(row) {
return [row._id.key, row.value.types, row.totalOccurrences, row.percentContaining.toFixed(maxDigits)];
var types = [];
var typeKeys = Object.keys(row.value.types);
if (typeKeys.length > 1) {
for (var type in row.value.types) {
var typestring = type + ' (' + row.value.types[type] + ')';
types.push(typestring);
}
} else {
types = typeKeys;
}
return [row._id.key, types, row.totalOccurrences, row.percentContaining.toFixed(maxDigits)];
});
var table = [headers, headers.map(function(){return '';})].concat(rows);
var colMaxWidth = function(arr, index) {return Math.max.apply(null, arr.map(function(row){return row[index].toString().length;}));};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment