Commit 7dc3c73c by Tomas Dvorak

Merge branch 'TimLudwinski-master'

parents ec8a9b77 b70f3e90
...@@ -153,6 +153,29 @@ Sometimes you want to see the keys and types come in as it happens. Maybe you h ...@@ -153,6 +153,29 @@ Sometimes you want to see the keys and types come in as it happens. Maybe you h
$ mongo test --eval "var collection = 'users', sort = { updated_at : -1 }, logKeysContinuously = true" variety.js $ mongo test --eval "var collection = 'users', sort = { updated_at : -1 }, logKeysContinuously = true" variety.js
#### Exclude Subkeys ####
Sometimes you inherit a database full of junk. Maybe the previous developer put data in the database keys, which causes Variety to go out of memory when run. After you've run the `logKeysContinuously` to figure out which subkeys may be a problem, you can use this option to run Variety without those subkeys.
db.users.insert({name:"Walter", someNestedObject:{a:{b:{c:{d:{e:1}}}}}, otherNestedObject:{a:{b:{c:{d:{e:1}}}}}});
$ mongo test --eval "var collection = 'users', sort = { updated_at : -1 }, excludeSubkeys = [ 'someNestedObject.a.b' ]" variety.js
+-----------------------------------------------------------------+
| key | types | occurrences | percents |
| --------------------------- | -------- | ----------- | -------- |
| _id | ObjectId | 1 | 100.0 |
| name | String | 1 | 100.0 |
| someNestedObject | Object | 1 | 100.0 |
| someNestedObject.a | Object | 1 | 100.0 |
| someNestedObject.a.b | Object | 1 | 100.0 |
| otherNestedObject | Object | 1 | 100.0 |
| otherNestedObject.a | Object | 1 | 100.0 |
| otherNestedObject.a.b | Object | 1 | 100.0 |
| otherNestedObject.a.b.c | Object | 1 | 100.0 |
| otherNestedObject.a.b.c.d | Object | 1 | 100.0 |
| otherNestedObject.a.b.c.d.e | Number | 1 | 100.0 |
+-----------------------------------------------------------------+
#### Secondary Reads #### #### Secondary Reads ####
Analyzing a large collection on a busy replica set primary could take a lot longer than if you read from a secondary. To do so, we have to tell MongoDB it's okay to perform secondary reads Analyzing a large collection on a busy replica set primary could take a lot longer than if you read from a secondary. To do so, we have to tell MongoDB it's okay to perform secondary reads
by setting the ```slaveOk``` property to ```true```: by setting the ```slaveOk``` property to ```true```:
...@@ -177,6 +200,11 @@ To persist to an alternate MongoDB database, you may specify the following param ...@@ -177,6 +200,11 @@ To persist to an alternate MongoDB database, you may specify the following param
$ mongo test --quiet --eval "var collection = 'users', persistResults=true, resultsDatabase='db.example.com/variety' variety.js $ mongo test --quiet --eval "var collection = 'users', persistResults=true, resultsDatabase='db.example.com/variety' variety.js
``` ```
### Reserved Keys ###
Variety expects keys to be well formed, not having any '.'s in them (mongo 2.4 allows dots in certain cases). Also mongo uses the pseudo keys 'XX' and keys coresponding to the regex 'XX\d+XX.*' for use with arrays. You can change the string XX in these patterns to whatever you like if there is a conflict in your database using the `arrayEscape` parameter.
$ mongo test --quiet --eval "var collection = 'users', arrayEscape = 'YY'" variety.js
### Command Line Interface ### Command Line Interface
Variety itself is command line friendly, as shown on examples above. Variety itself is command line friendly, as shown on examples above.
But if you are a NPM and Node.js user, you could prefer the But if you are a NPM and Node.js user, you could prefer the
......
...@@ -83,6 +83,12 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */ ...@@ -83,6 +83,12 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */
read('resultsUser', null); read('resultsUser', null);
read('resultsPass', null); read('resultsPass', null);
read('logKeysContinuously', false); read('logKeysContinuously', false);
read('excludeSubkeys', []);
read('arrayEscape', 'XX');
//Translate excludeSubkeys to set like object... using an object for compatibility...
config.excludeSubkeys = config.excludeSubkeys.reduce(function (result, item) { result[item+'.'] = true; return result; }, {});
return config; return config;
}; };
...@@ -167,7 +173,7 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */ ...@@ -167,7 +173,7 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */
//flattens object keys to 1D. i.e. {'key1':1,{'key2':{'key3':2}}} becomes {'key1':1,'key2.key3':2} //flattens object keys to 1D. i.e. {'key1':1,{'key2':{'key3':2}}} becomes {'key1':1,'key2.key3':2}
//we assume no '.' characters in the keys, which is an OK assumption for MongoDB //we assume no '.' characters in the keys, which is an OK assumption for MongoDB
var serializeDoc = function(doc, maxDepth) { var serializeDoc = function(doc, maxDepth, excludeSubkeys) {
var result = {}; var result = {};
//determining if an object is a Hash vs Array vs something else is hard //determining if an object is a Hash vs Array vs something else is hard
...@@ -180,20 +186,24 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */ ...@@ -180,20 +186,24 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */
v instanceof BinData; v instanceof BinData;
return !specialObject && (isArray || isObject); return !specialObject && (isArray || isObject);
} }
function serialize(document, parentKey, maxDepth){ var arrayRegex = new RegExp('\\.' + config.arrayEscape + '\\d+' + config.arrayEscape + '\\.', 'g');
for(var key in document){
function serialize(document, parentKey, maxDepth) {
if(Object.prototype.hasOwnProperty.call(excludeSubkeys, parentKey.replace(arrayRegex, '.')))
return;
for(var key in document) {
//skip over inherited properties such as string, length, etch //skip over inherited properties such as string, length, etch
if(!document.hasOwnProperty(key)) { if(!document.hasOwnProperty(key)) {
continue; continue;
} }
var value = document[key]; var value = document[key];
//objects are skipped here and recursed into later if(Array.isArray(document))
//if(typeof value != 'object') key = config.arrayEscape + key + config.arrayEscape; //translate unnamed object key from {_parent_name_}.{_index_} to {_parent_name_}.arrayEscape{_index_}arrayEscape.
result[parentKey+key] = value; result[parentKey+key] = value;
//it's an object, recurse...only if we haven't reached max depth //it's an object, recurse...only if we haven't reached max depth
if(isHash(value) && maxDepth > 1) { if(isHash(value) && maxDepth > 1) {
serialize(value, parentKey+key+'.',maxDepth-1); serialize(value, parentKey+key+'.', maxDepth-1);
} }
} }
} }
...@@ -204,10 +214,10 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */ ...@@ -204,10 +214,10 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */
// convert document to key-value map, where value is always an array with types as plain strings // convert document to key-value map, where value is always an array with types as plain strings
var analyseDocument = function(document) { var analyseDocument = function(document) {
var result = {}; var result = {};
var arrayRegex = new RegExp('\\.' + config.arrayEscape + '\\d+' + config.arrayEscape, 'g');
for (var key in document) { for (var key in document) {
var value = document[key]; var value = document[key];
//translate unnamed object key from {_parent_name_}.{_index_} to {_parent_name_}.XX key = key.replace(arrayRegex, '.' + config.arrayEscape);
key = key.replace(/\.\d+/g,'.XX');
if(typeof result[key] === 'undefined') { if(typeof result[key] === 'undefined') {
result[key] = {}; result[key] = {};
} }
...@@ -271,15 +281,16 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */ ...@@ -271,15 +281,16 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */
// Merge the keys and types of current object into accumulator object // Merge the keys and types of current object into accumulator object
var reduceDocuments = function(accumulator, object) { var reduceDocuments = function(accumulator, object) {
var docResult = analyseDocument(serializeDoc(object, config.maxDepth)); var docResult = analyseDocument(serializeDoc(object, config.maxDepth, config.excludeSubkeys));
mergeDocument(docResult, accumulator); mergeDocument(docResult, accumulator);
return accumulator; return accumulator;
}; };
// We throw away keys which end in an array index, since they are not useful // We throw away keys which end in an array index, since they are not useful
// for our analysis. (We still keep the key of their parent array, though.) -JC // for our analysis. (We still keep the key of their parent array, though.) -JC
var arrayRegex = new RegExp('\\.' + config.arrayEscape + '$', 'g');
var filter = function(item) { var filter = function(item) {
return !item._id.key.match(/\.XX$/); return !item._id.key.match(arrayRegex);
}; };
// sort desc by totalOccurrences or by key asc if occurrences equal // sort desc by totalOccurrences or by key asc if occurrences equal
...@@ -347,7 +358,7 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */ ...@@ -347,7 +358,7 @@ Released by Maypop Inc, © 2012-2016, under the MIT License. */
types = typeKeys; types = typeKeys;
} }
return [row._id.key, types, row.totalOccurrences, row.percentContaining.toFixed(maxDigits)]; return [row._id.key, types, row.totalOccurrences, row.percentContaining.toFixed(Math.min(maxDigits, 20))];
}); });
var table = [headers, headers.map(function(){return '';})].concat(rows); var table = [headers, headers.map(function(){return '';})].concat(rows);
var colMaxWidth = function(arr, index) {return Math.max.apply(null, arr.map(function(row){return row[index].toString().length;}));}; var colMaxWidth = function(arr, index) {return Math.max.apply(null, arr.map(function(row){return row[index].toString().length;}));};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment