Commit 634e1d16 by predictive

secondary reads, enumerate multiple types

parent 29d5e3ae
......@@ -28,23 +28,23 @@ So, let's see what we've got here:
$ mongo test --eval "var collection = 'users'" variety.js
+------------------------------------------------------------+
+------------------------------------------------------------------+
| key | types | occurrences | percents |
| ------------------ | ------------ | ----------- | -------- |
| _id | ObjectId | 5 | 100.0 |
| name | String | 5 | 100.0 |
| bio | String | 3 | 60.0 |
| birthday | String | 2 | 40.0 |
| pets | Array,String | 2 | 40.0 |
| pets | Array(4),String(1) | 5 | 40.0 |
| someBinData | BinData-old | 1 | 20.0 |
| someWeirdLegacyKey | String | 1 | 20.0 |
+------------------------------------------------------------+
+------------------------------------------------------------------+
_("test" is the database containing the collection we are analyzing.)_
Hmm. Looks like everybody has a "name" and "_id". Most, but not all have a "bio".
Interestingly, it looks like "pets" can be either an array or a string. Will this cause any problems in the application, I wonder?
Interestingly, it looks like "pets" can be either an array or a string, but there are more strings than arrays. Will this cause any problems in the application, I wonder?
Seems like the first document created has a weird legacy key—those damn fools who built the prototype didn't clean up after themselves. If there were a thousand such early documents, I might cross-reference the codebase to confirm they are no longer used, and then delete them all. That way they'll not confuse any future developers.
......@@ -148,6 +148,12 @@ Variety can also read that option and mute unnecessary output. This is useful in
$ mongo test --quiet --eval "var collection = 'users', sort = { updated_at : -1 }" variety.js
#### Secondary Reads ####
Analyzing a large collection on a busy replica set primary could take a lot longer than if you read from a secondary. To do so, we have to tell MongoDB it's okay to perform secondary reads
by setting the ```slaveOk``` property to ```true```:
$ mongo test --quiet --eval "var collection = 'users', slaveOk = true" variety.js
### Save Results in MongoDB For Future Use ###
By default, Variety prints results only to standard output and does not store them in MongoDB itself. If you want to persist them automatically in database for later usage, you can set the parameter ```persistResults```.
Variety then stores result documents in database ```varietyResults``` and the collection name is derived from the source collection's name.
......
......@@ -23,6 +23,12 @@ log('Version 1.5.0, released 14 May 2015');
var dbs = [];
var emptyDbs = [];
if (typeof slaveOk !== 'undefined') {
if (slaveOk === true) {
db.getMongo().setSlaveOk();
}
}
var knownDatabases = db.adminCommand('listDatabases').databases;
if(typeof knownDatabases !== 'undefined') { // not authorized user receives error response (json) without databases key
knownDatabases.forEach(function(d){
......@@ -214,23 +220,33 @@ var mergeDocument = function(docResult, interimResults) {
for (var key in docResult) {
if(key in interimResults) {
var existing = interimResults[key];
for(var type in docResult[key]) {
existing.types[type] = true;
if (type in existing.types) {
existing.types[type] = existing.types[type] + 1;
} else {
existing.types[type] = 1;
}
}
existing.totalOccurrences = existing.totalOccurrences + 1;
} else {
interimResults[key] = {'types':docResult[key],'totalOccurrences':1};
var types = {};
for (var newType in docResult[key]) {
types[newType] = 1;
}
interimResults[key] = {'types': types,'totalOccurrences':1};
}
}
};
var convertResults = function(interimResults, documentsCount) {
var getKeys = function(obj) {
var keys = [];
var keys = {};
for(var key in obj) {
keys.push(key);
keys[key] = obj[key];
}
return keys.sort();
return keys;
//return keys.sort();
};
var varietyResults = [];
//now convert the interimResults into the proper format
......@@ -301,12 +317,24 @@ var createAsciiTable = function(results) {
var maxDigits = varietyResults.map(function(value){return significantDigits(value.percentContaining);}).reduce(function(acc,val){return acc>val?acc:val;});
var rows = results.map(function(row) {
return [row._id.key, row.value.types, row.totalOccurrences, row.percentContaining.toFixed(maxDigits)];
var types = [];
var typeKeys = Object.keys(row.value.types);
if (typeKeys.length > 1) {
for (var type in row.value.types) {
var typestring = type + ' (' + row.value.types[type] + ')';
types.push(typestring);
}
} else {
types = typeKeys;
}
return [row._id.key, types, row.totalOccurrences, row.percentContaining.toFixed(maxDigits)];
});
var table = [headers, headers.map(function(){return '';})].concat(rows);
var colMaxWidth = function(arr, index) {return Math.max.apply(null, arr.map(function(row){return row[index].toString().length;}));};
var pad = function(width, string, symbol) { return width <= string.length ? string : pad(width, isNaN(string) ? string + symbol : symbol + string, symbol); };
table = table.map(function(row, ri){
//return '| ' + row.map(function(cell, i) {return pad(colMaxWidth(table, i), cell.toString(), ri === 1 ? '-' : ' ');}).join(' | ') + ' |';
return '| ' + row.map(function(cell, i) {return pad(colMaxWidth(table, i), cell.toString(), ri === 1 ? '-' : ' ');}).join(' | ') + ' |';
});
var border = '+' + pad(table[0].length - 2, '', '-') + '+';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment