Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
variety
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
variety
Commits
384ecdf0
Commit
384ecdf0
authored
Feb 28, 2015
by
James Cropcho
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #75 from todvora/master
Performance fix
parents
7ceeed3e
88d00d28
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
63 additions
and
38 deletions
+63
-38
variety.js
variety.js
+63
-38
No files found.
variety.js
View file @
384ecdf0
...
...
@@ -175,26 +175,65 @@ var serializeDoc = function(doc, maxDepth) {
return
result
;
};
var
mergeArrays
=
function
(
a
,
b
)
{
if
(
typeof
a
===
'undefined'
)
{
a
=
[];}
return
a
.
concat
(
b
)
// merge two arrays into one, including duplications
.
filter
(
function
(
item
,
pos
,
self
){
return
self
.
indexOf
(
item
)
==
pos
;})
// remove duplications
.
sort
();
// sort alphabetically
};
// convert document to key-value map, where value is always an array with types as plain strings
var
analyseDocument
=
function
(
document
)
{
var
result
=
{};
for
(
var
key
in
document
)
{
var
value
=
document
[
key
];
//translate unnamed object key from {_parent_name_}.{_index_} to {_parent_name_}.XX
key
=
key
.
replace
(
/
\.\d
+/g
,
'.XX'
);
result
[
key
]
=
mergeArrays
(
result
[
key
],
varietyTypeOf
(
value
));
if
(
typeof
result
[
key
]
===
'undefined'
)
{
result
[
key
]
=
{};
}
var
type
=
varietyTypeOf
(
value
);
result
[
key
][
type
]
=
true
;
}
return
result
;
};
var
mergeDocument
=
function
(
docResult
,
interimResults
)
{
for
(
var
key
in
docResult
)
{
if
(
key
in
interimResults
)
{
var
existing
=
interimResults
[
key
];
for
(
var
type
in
docResult
[
key
])
{
existing
.
types
[
type
]
=
true
;
}
existing
.
totalOccurrences
=
existing
.
totalOccurrences
+
1
;
}
else
{
interimResults
[
key
]
=
{
'types'
:
docResult
[
key
],
'totalOccurrences'
:
1
};
}
}
};
var
convertResults
=
function
(
interimResults
)
{
var
getKeys
=
function
(
obj
)
{
var
keys
=
[];
for
(
var
key
in
obj
)
{
keys
.
push
(
key
);
}
return
keys
.
sort
();
};
var
varietyResults
=
[];
//now convert the interimResults into the proper format
for
(
var
key
in
interimResults
)
{
var
entry
=
interimResults
[
key
];
varietyResults
.
push
({
'_id'
:
{
'key'
:
key
},
'value'
:
{
'types'
:
getKeys
(
entry
.
types
)},
'totalOccurrences'
:
entry
[
'totalOccurrences'
],
'percentContaining'
:
entry
[
'totalOccurrences'
]
*
100
/
$limit
});
}
return
varietyResults
;
};
// Merge the keys and types of current object into accumulator object
var
reduceDocuments
=
function
(
accumulator
,
object
)
{
var
docResult
=
analyseDocument
(
serializeDoc
(
object
,
$maxDepth
));
mergeDocument
(
docResult
,
accumulator
);
return
accumulator
;
};
// We throw away keys which end in an array index, since they are not useful
// for our analysis. (We still keep the key of their parent array, though.) -JC
var
filter
=
function
(
item
)
{
...
...
@@ -207,38 +246,24 @@ var comparator = function(a, b) {
return
countsDiff
!==
0
?
countsDiff
:
a
.
_id
.
key
.
localeCompare
(
b
.
_id
.
key
);
};
var
reduceDocuments
=
function
(
accumulator
,
docResult
,
index
,
array
)
{
var
duplicityCheck
=
function
(
item
){
return
item
.
key
===
key
;};
for
(
var
key
in
docResult
)
{
var
known
=
accumulator
.
filter
(
duplicityCheck
);
if
(
known
.
length
>
0
)
{
var
existing
=
known
[
0
];
existing
.
types
=
mergeArrays
(
docResult
[
key
],
existing
.
types
);
existing
.
totalOccurrences
=
existing
.
totalOccurrences
+
1
;
}
else
{
accumulator
.
push
({
'key'
:
key
,
'types'
:
docResult
[
key
],
'totalOccurrences'
:
1
});
}
}
return
accumulator
;
// extend standard MongoDB cursor of reduce method - call forEach and combine the results
DBQuery
.
prototype
.
reduce
=
function
(
callback
,
initialValue
)
{
var
result
=
initialValue
;
this
.
forEach
(
function
(
obj
){
result
=
callback
(
result
,
obj
);
});
return
result
;
};
var
computePercentages
=
function
(
entry
){
return
{
'_id'
:{
'key'
:
entry
.
key
},
'value'
:
{
'types'
:
entry
.
types
},
'totalOccurrences'
:
entry
.
totalOccurrences
,
'percentContaining'
:
entry
.
totalOccurrences
*
100
/
$limit
};
};
var
interimResults
=
db
[
collection
]
.
find
(
$query
)
.
sort
(
$sort
)
.
limit
(
$limit
)
.
reduce
(
reduceDocuments
,
{});
// the main processing pipe
var
varietyResults
=
db
[
collection
].
find
(
$query
).
sort
(
$sort
).
limit
(
$limit
)
// read data from the mongodb
.
map
(
function
(
obj
)
{
return
serializeDoc
(
obj
,
$maxDepth
);})
// flatten structure, create compound keys
.
map
(
analyseDocument
)
// analyse keys and types of document, filtering duplicities
.
reduce
(
reduceDocuments
,
[])
// merge all keys and types
.
map
(
computePercentages
)
// add percentages, reformat results to expected structure
.
filter
(
filter
)
// throw away keys which end in an array index
.
sort
(
comparator
);
// sort by occurrences and alphabet
var
varietyResults
=
convertResults
(
interimResults
)
.
filter
(
filter
)
.
sort
(
comparator
);
if
(
$persistResults
)
{
var
resultsDB
=
db
.
getMongo
().
getDB
(
'varietyResults'
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment