Commit 512f0fdc by James Cropcho

well, that's pretty inefficient, but I have a better idea...

parent 7b6fc8c5
use popplet; if (typeof collection == "undefined") {
throw "You have to supply a 'collection' variable, a la \"--eval 'var limit = 10'\"";
}
if (typeof limit == "undefined") { limit = db[collection].count(); }
print("Using limit of " + limit);
canHaveChildren = function (v) { var alreadyEmitted = new Array(); // global
var isArray = ( v &&
typeof v === 'object' &&
typeof v.length === 'number' &&
!(v.propertyIsEnumerable('length')));
schemaAnalyzerKeyIsPresent = function(toBeEmitted, alreadyEmitted) {
for(key in alreadyEmitted) {
if(key["key"] === toBeEmitted["key"] && key["type"] === toBeEmitted["type"]) {
return true;
}
}
return false;
}
db.system.js.save( { _id : "schemaAnalyzerKeyIsPresent", value : schemaAnalyzerKeyIsPresent } );
schemaAnalyzerCanHaveChildren = function (v) {
var isArray = v &&
typeof v === 'object' &&
typeof v.length === 'number' &&
!(v.propertyIsEnumerable('length'));
var isObject = typeof v === 'object'; var isObject = typeof v === 'object';
return isArray || isObject; return isArray || isObject;
} }
db.system.js.save( { _id : "canHaveChildren", value : canHaveChildren } ); db.system.js.save( { _id : "schemaAnalyzerCanHaveChildren", value : schemaAnalyzerCanHaveChildren } );
mapRecursive = function(parentKey, keys){ schemaAnalyzerMapRecursive = function(parentKey, keys) {
for (var key in keys) { for (var key in keys) {
var value = keys[key]; var value = keys[key];
key = key.replace(/\d+/g,'XX'); key = (parentKey + "." + key).replace(/\.\d+/g,'.XX');
emit({"key": parentKey + "." + key, "typee": typeof value}, {occurrences: 1}); toBeEmitted = {key: key, type: typeof value};
if(!schemaAnalyzerKeyIsPresent(toBeEmitted,alreadyEmitted)) {
emit(toBeEmitted, {occurrences: 1});
alreadyEmitted.push(toBeEmitted);
}
if (canHaveChildren(value)) { if (schemaAnalyzerCanHaveChildren(value)) {
mapRecursive(parentKey + "." + key, value); schemaAnalyzerMapRecursive(key, value);
} }
} }
} }
db.system.js.save({_id: "mapRecursive", value: mapRecursive}); db.system.js.save({_id: "schemaAnalyzerMapRecursive", value: schemaAnalyzerMapRecursive});
map = function() { map = function() {
var keys = this; var keys = this;
var alreadyEmitted = new Array(); // reset global variable
for (var key in keys) { for (var key in keys) {
var value = keys[key]; var value = keys[key];
key = key.replace(/\d+/g,'XX'); key = key.replace(/\.\d+/g,'.XX');
emit({"key": key, "typee": typeof value}, {occurrences: 1}); toBeEmitted = {key : key, type : typeof value};
if(!schemaAnalyzerKeyIsPresent(toBeEmitted,alreadyEmitted)) {
emit(toBeEmitted, {occurrences: 1});
alreadyEmitted.push(toBeEmitted);
}
if (canHaveChildren(value)) { if (schemaAnalyzerCanHaveChildren(value)) {
mapRecursive(key, value); schemaAnalyzerMapRecursive(key, value);
} }
} }
} }
...@@ -52,16 +79,25 @@ reduce = function(key, values){ ...@@ -52,16 +79,25 @@ reduce = function(key, values){
} }
finalize = function(key, value) { finalize = function(key, value) {
value.percentage = value.occurrences / 1000 * 100.0; value.percentage = value.occurrences / limit * 100.0;
return value; return value;
} }
db.usersKeyNames.drop(); var resultsCollectionName = collection + "KeyNames";
db.users.mapReduce(map, reduce, { finalize: finalize,
out: 'usersKeyNames', db[collection].mapReduce(map, reduce, { finalize: finalize,
verbose: true, out: {
limit: 1000, replace : resultsCollectionName,
sort: {_id: -1}}); db : "schemaAnalyzerResults"},
db.usersKeyNames.find() limit : limit,
sort : {_id: -1},
scope : { limit : limit }});
var resultsDB = db.getMongo().getDB("schemaAnalyzerResults");
var keyNames = resultsDB[resultsCollectionName].find().sort({ "value.percentage": -1});
keyNames.forEach(function(keyName) {
print(tojson(keyName, '', true));
});
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment