Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
variety
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
variety
Commits
9d190be6
Commit
9d190be6
authored
Nov 19, 2014
by
Tomas Dvorak
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Data processing refactored, possibility to not persist results in mongodb
parent
4ee4004a
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
29 deletions
+35
-29
variety.js
variety.js
+35
-29
No files found.
variety.js
View file @
9d190be6
...
...
@@ -72,6 +72,8 @@ log('Using sort of ' + tojson(sort));
if
(
typeof
outputFormat
===
'undefined'
)
{
var
outputFormat
=
'ascii'
;
}
log
(
'Using outputFormat of '
+
outputFormat
);
if
(
typeof
persistResults
===
'undefined'
)
{
var
persistResults
=
true
;
}
log
(
'Using persistResults of '
+
persistResults
);
varietyTypeOf
=
function
(
thing
)
{
if
(
typeof
thing
===
'undefined'
)
{
throw
'varietyTypeOf() requires an argument'
;
}
...
...
@@ -173,7 +175,7 @@ db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) {
});
var
varietyResults
=
{}
;
var
varietyResults
=
[]
;
//now convert the interimResults into the proper format
for
(
var
key
in
interimResults
){
var
entry
=
interimResults
[
key
];
...
...
@@ -182,32 +184,19 @@ for(var key in interimResults){
newEntry
[
'value'
]
=
{
'types'
:
Object
.
keys
(
entry
[
'types'
])};
newEntry
[
'totalOccurrences'
]
=
entry
[
'totalOccurrences'
];
newEntry
[
'percentContaining'
]
=
entry
[
'totalOccurrences'
]
*
100
/
limit
;
varietyResults
[
key
]
=
newEntry
;
}
var
resultsDB
=
db
.
getMongo
().
getDB
(
'varietyResults'
);
var
resultsCollectionName
=
collection
+
'Keys'
;
// replace results collection
log
(
'creating results collection: '
+
resultsCollectionName
);
resultsDB
[
resultsCollectionName
].
drop
();
for
(
var
result
in
varietyResults
)
{
resultsDB
[
resultsCollectionName
].
insert
(
varietyResults
[
result
]);
varietyResults
.
push
(
newEntry
);
}
var
numDocuments
=
db
[
collection
].
count
();
log
(
'removing leaf arrays in results collection, and getting percentages'
);
resultsDB
[
resultsCollectionName
].
find
({}).
forEach
(
function
(
key
)
{
var
keyName
=
key
.
_id
.
key
;
// We throw away keys which end in an array index, since they are not useful
// for our analysis. (We still keep the key of their parent array, though.) -JC
if
(
keyName
.
match
(
/
\.
XX$/
))
{
resultsDB
[
resultsCollectionName
].
remove
({
'_id'
:
key
.
_id
});
return
;
}
// We throw away keys which end in an array index, since they are not useful
// for our analysis. (We still keep the key of their parent array, though.) -JC
var
filter
=
function
(
item
)
{
return
!
item
.
_id
.
key
.
match
(
/
\.
XX$/
);
};
var
map
=
function
(
item
)
{
var
keyName
=
item
.
_id
.
key
;
if
(
keyName
.
match
(
/
\.
XX/
))
{
// exists query checks for embedded values for an array
// ie. match {arr:[{x:1}]} with {'arr.x':{$exists:true}}
...
...
@@ -218,19 +207,36 @@ resultsDB[resultsCollectionName].find({}).forEach(function(key) {
if
(
limit
<
numDocuments
)
{
var
existsQuery
=
{};
existsQuery
[
keyName
]
=
{
$exists
:
true
};
key
.
totalOccurrences
=
db
[
collection
].
count
(
existsQuery
);
item
.
totalOccurrences
=
db
[
collection
].
count
(
existsQuery
);
}
key
.
percentContaining
=
(
key
.
totalOccurrences
/
numDocuments
)
*
100.0
;
resultsDB
[
resultsCollectionName
].
save
(
key
);
});
item
.
percentContaining
=
(
item
.
totalOccurrences
/
numDocuments
)
*
100.0
;
return
item
;
};
// sort desc by totalOccurrences or by key asc if occurrences equal
var
comparator
=
function
(
a
,
b
)
{
var
countsDiff
=
b
.
totalOccurrences
-
a
.
totalOccurrences
;
return
countsDiff
!==
0
?
countsDiff
:
a
.
_id
.
key
.
localeCompare
(
b
.
_id
.
key
);
};
var
sortedKeys
=
resultsDB
[
resultsCollectionName
].
find
({}).
sort
({
totalOccurrences
:
-
1
,
'_id.key'
:
1
});
// occurrences count & alphabetical order
log
(
'removing leaf arrays in results collection, and getting percentages'
);
varietyResults
=
varietyResults
.
filter
(
filter
).
map
(
map
).
sort
(
comparator
);
if
(
persistResults
)
{
var
resultsDB
=
db
.
getMongo
().
getDB
(
'varietyResults'
);
var
resultsCollectionName
=
collection
+
'Keys'
;
// replace results collection
log
(
'creating results collection: '
+
resultsCollectionName
);
resultsDB
[
resultsCollectionName
].
drop
();
resultsDB
[
resultsCollectionName
].
insert
(
varietyResults
);
}
if
(
outputFormat
===
'json'
)
{
printjson
(
sortedKeys
.
toArray
()
);
// valid formatted json output, compressed variant is printjsononeline()
printjson
(
varietyResults
);
// valid formatted json output, compressed variant is printjsononeline()
}
else
{
// output nice ascii table with results
var
table
=
[[
'key'
,
'types'
,
'occurrences'
,
'percents'
],
[
''
,
''
,
''
,
''
]];
// header + delimiter rows
sortedKey
s
.
forEach
(
function
(
key
)
{
varietyResult
s
.
forEach
(
function
(
key
)
{
table
.
push
([
key
.
_id
.
key
,
key
.
value
.
types
.
toString
(),
key
.
totalOccurrences
.
toString
(),
key
.
percentContaining
.
toString
()]);
});
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment