Commit a24d38c8 by Tomas Dvorak

Code refactoring: processing pipe instead of forEach cycles

parent dd9b069a
...@@ -195,57 +195,50 @@ var analyseDocument = function(document) { ...@@ -195,57 +195,50 @@ var analyseDocument = function(document) {
return result; return result;
}; };
var interimResults = {}; //hold results here until converted to final format
var numDocuments = 0;
// main cursor
db[collection].find($query).sort($sort).limit($limit).forEach(function(obj) {
var docResult = analyseDocument(serializeDoc(obj, $maxDepth));
for (var key in docResult) {
if(key in interimResults) {
var existing = interimResults[key];
interimResults[key] = {'types':mergeArrays(docResult[key], existing.types),'totalOccurrences':existing.totalOccurrences + 1};
} else {
interimResults[key] = {'types':docResult[key],'totalOccurrences':1};
}
}
numDocuments++;
});
var varietyResults = [];
//now convert the interimResults into the proper format
for(var key in interimResults){
var entry = interimResults[key];
var newEntry = {};
newEntry['_id'] = {'key':key};
newEntry['value'] = {'types':entry['types']};
newEntry['totalOccurrences'] = entry['totalOccurrences'];
newEntry['percentContaining'] = entry['totalOccurrences']*100/$limit;
varietyResults.push(newEntry);
}
// We throw away keys which end in an array index, since they are not useful // We throw away keys which end in an array index, since they are not useful
// for our analysis. (We still keep the key of their parent array, though.) -JC // for our analysis. (We still keep the key of their parent array, though.) -JC
var filter = function(item) { var filter = function(item) {
return !item._id.key.match(/\.XX$/); return !item._id.key.match(/\.XX$/);
}; };
var map = function(item) {
// we don't need to set it if limit isn't being used. (it's set above.)
if($limit < numDocuments) {
item.totalOccurrences = db[collection].count($query);
}
item.percentContaining = (item.totalOccurrences / numDocuments) * 100.0;
return item;
};
// sort desc by totalOccurrences or by key asc if occurrences equal // sort desc by totalOccurrences or by key asc if occurrences equal
var comparator = function(a, b) { var comparator = function(a, b) {
var countsDiff = b.totalOccurrences - a.totalOccurrences; var countsDiff = b.totalOccurrences - a.totalOccurrences;
return countsDiff !== 0 ? countsDiff : a._id.key.localeCompare(b._id.key); return countsDiff !== 0 ? countsDiff : a._id.key.localeCompare(b._id.key);
}; };
log('removing leaf arrays in results collection, and getting percentages'); var reduceDocuments = function(accumulator, docResult, index, array) {
varietyResults = varietyResults.filter(filter).map(map).sort(comparator); var duplicityCheck = function(item){return item.key === key;};
for (var key in docResult) {
var known = accumulator.filter(duplicityCheck);
if(known.length > 0) {
var existing = known[0];
existing.types = mergeArrays(docResult[key], existing.types);
existing.totalOccurrences = existing.totalOccurrences + 1;
} else {
accumulator.push({'key':key, 'types':docResult[key], 'totalOccurrences':1});
}
}
return accumulator;
};
var computePercentages = function(entry){
return {
'_id':{'key':entry.key},
'value': {'types':entry.types},
'totalOccurrences': entry.totalOccurrences,
'percentContaining': entry.totalOccurrences*100/$limit
};
};
// the main processing pipe
var varietyResults = db[collection].find($query).sort($sort).limit($limit) // read data from the mongodb
.map(function(obj) {return serializeDoc(obj, $maxDepth);}) // flatten structure, create compound keys
.map(analyseDocument) // analyse keys and types of document, filtering duplicities
.reduce(reduceDocuments, []) // merge all keys and types
.map(computePercentages) // add percentages, reformat results to expected structure
.filter(filter) // throw away keys which end in an array index
.sort(comparator); // sort by occurrences and alphabet
if($persistResults) { if($persistResults) {
var resultsDB = db.getMongo().getDB('varietyResults'); var resultsDB = db.getMongo().getDB('varietyResults');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment