Commit eed2fedf by Wes Freeman

Merge pull request #37 from JacobGH111/master

PR for Jacob's variety
parents 8ac865d4 1927a515
...@@ -63,17 +63,7 @@ print("Using maxDepth of " + maxDepth); ...@@ -63,17 +63,7 @@ print("Using maxDepth of " + maxDepth);
if (typeof sort === "undefined") { var sort = {_id: -1}; } if (typeof sort === "undefined") { var sort = {_id: -1}; }
print("Using sort of " + tojson(sort)); print("Using sort of " + tojson(sort));
varietyCanHaveChildren = function (v) {
var isArray = v &&
typeof v === 'object' &&
typeof v.length === 'number' &&
!(v.propertyIsEnumerable('length'));
var isObject = typeof v === 'object';
var specialObject = v instanceof Date ||
v instanceof ObjectId ||
v instanceof BinData;
return !specialObject && (isArray || isObject);
};
varietyTypeOf = function(thing) { varietyTypeOf = function(thing) {
if (typeof thing === "undefined") { throw "varietyTypeOf() requires an argument"; } if (typeof thing === "undefined") { throw "varietyTypeOf() requires an argument"; }
...@@ -112,96 +102,76 @@ varietyTypeOf = function(thing) { ...@@ -112,96 +102,76 @@ varietyTypeOf = function(thing) {
} }
}; };
var addTypeToArray = function(arr, value) { //flattens object keys to 1D. i.e. {'key1':1,{'key2':{'key3':2}}} becomes {'key1':1,'key2.key3':2}
var t = varietyTypeOf(value); //we assume no '.' characters in the keys, which is an OK assumption for MongoDB
var found = false; function serializeDoc(doc, maxDepth){
for(var i=0; i< arr.length; i++) { var result = {};
if(arr[i] === t) {
found = true;
break;
}
}
if(!found) {
arr.push(t);
}
};
var addRecordResult = function(key, value, result) { //determining if an object is a Hash vs Array vs something else is hard
cur = result[key]; function isHash(v) {
if(!cur) { var isArray = Array.isArray(v);
result[key] = {"_id":{"key":key},"value": {"type": varietyTypeOf(value)}, totalOccurrences:1}; var isObject = typeof v === 'object';
} else { var specialObject = v instanceof Date ||
var type = varietyTypeOf(value); v instanceof ObjectId ||
if(cur.value.type !== type) { v instanceof BinData;
cur.value.types = [cur.value.type]; return !specialObject && !isArray && isObject;
delete cur.value.type; };
addTypeToArray(cur.value.types, type);
} else if(!cur.value.type) { function serialize(document, parentKey, maxDepth){
addTypeToArray(cur.value.types, type); for(var key in document){
} //skip over inherited properties such as string, length, etch
result[key] = cur; if(!(document.hasOwnProperty(key)))
} continue
}; var value = document[key];
//objects are skipped here and recursed into later
var mapRecursive = function(parentKey, obj, level, result) { //if(typeof value != "object")
for (var key in obj) { result[parentKey+key] = value;
if(obj.hasOwnProperty(key)) { //it's an object, recurse...only if we haven't reached max depth
var value = obj[key]; if(isHash(value) && (maxDepth > 0)){
key = (parentKey + "." + key).replace(/\.\d+/g,'.XX'); serialize(value, parentKey+key+".",maxDepth-1);
addRecordResult(key, value, result); }
if (level < maxDepth - 1 && varietyCanHaveChildren(value)) { }
mapRecursive(key, value, level + 1, result); }
} serialize(doc, "", maxDepth)
} return result
} }
};
// store results here (no map reduce limit!)
var varietyResults = {};
var addVarietyResults = function(result) {
for(var key in result) {
if(result.hasOwnProperty(key)) {
cur = varietyResults[key];
var value = result[key];
if(!cur) {
varietyResults[key] = value;
} else {
if(value.type && value.type === cur.value.type) {
} else {
for(var type in value.types) {
if(cur.value.type !== type) {
cur.value.types = [cur.value.type];
delete cur.value.type;
addTypeToArray(cur.value.types, type);
} else if(!cur.value.type) {
addTypeToArray(cur.value.types, type);
}
}
}
cur.totalOccurrences++;
varietyResults[key] = cur;
}
}
}
};
var interimResults = {} //hold results here until converted to final format
// main cursor // main cursor
db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) { db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) {
var recordResult = {}; //printjson(obj)
for (var key in obj) { flattened = serializeDoc(obj, maxDepth);
if(obj.hasOwnProperty(key)) { //printjson(flattened)
var value = obj[key]; for (key in flattened){
addRecordResult(key, value, recordResult); var value = flattened[key];
if (maxDepth > 1 && varietyCanHaveChildren(value)) { var valueType = varietyTypeOf(value);
mapRecursive(key, value, 1, recordResult); if(!(key in interimResults)){ //if it's a new key we haven't seen yet
} //for the moment, store 'types' as a dictionary. An easy way to prevent duplicates
var newEntry = {'types':{},'totalOccurrences':1};
newEntry['types'][valueType] = true;
interimResults[key] = newEntry;
}
else{ //we've seen this key before
interimResults[key]['types'][valueType] = true;
interimResults[key]['totalOccurrences']++;
} }
} }
addVarietyResults(recordResult);
}); });
var varietyResults = {};
//now convert the interimResults into the proper format
for(key in interimResults){
var entry = interimResults[key];
var newEntry = {};
newEntry['_id'] = {'key':key};
newEntry['value'] = {'types':Object.keys(entry['types'])};
newEntry['totalOccurrences'] = entry['totalOccurrences'];
newEntry['percentContaining'] = entry['totalOccurrences']*100/limit;
varietyResults[key] = newEntry;
}
var resultsDB = db.getMongo().getDB("varietyResults"); var resultsDB = db.getMongo().getDB("varietyResults");
var resultsCollectionName = collection + "Keys"; var resultsCollectionName = collection + "Keys";
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment