Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
V
variety
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
fact-gitdep
variety
Commits
9d190be6
Commit
9d190be6
authored
Nov 19, 2014
by
Tomas Dvorak
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Data processing refactored, possibility to not persist results in mongodb
parent
4ee4004a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
38 additions
and
32 deletions
+38
-32
variety.js
variety.js
+38
-32
No files found.
variety.js
View file @
9d190be6
...
@@ -72,6 +72,8 @@ log('Using sort of ' + tojson(sort));
...
@@ -72,6 +72,8 @@ log('Using sort of ' + tojson(sort));
if
(
typeof
outputFormat
===
'undefined'
)
{
var
outputFormat
=
'ascii'
;
}
if
(
typeof
outputFormat
===
'undefined'
)
{
var
outputFormat
=
'ascii'
;
}
log
(
'Using outputFormat of '
+
outputFormat
);
log
(
'Using outputFormat of '
+
outputFormat
);
if
(
typeof
persistResults
===
'undefined'
)
{
var
persistResults
=
true
;
}
log
(
'Using persistResults of '
+
persistResults
);
varietyTypeOf
=
function
(
thing
)
{
varietyTypeOf
=
function
(
thing
)
{
if
(
typeof
thing
===
'undefined'
)
{
throw
'varietyTypeOf() requires an argument'
;
}
if
(
typeof
thing
===
'undefined'
)
{
throw
'varietyTypeOf() requires an argument'
;
}
...
@@ -173,7 +175,7 @@ db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) {
...
@@ -173,7 +175,7 @@ db[collection].find(query).sort(sort).limit(limit).forEach(function(obj) {
});
});
var
varietyResults
=
{}
;
var
varietyResults
=
[]
;
//now convert the interimResults into the proper format
//now convert the interimResults into the proper format
for
(
var
key
in
interimResults
){
for
(
var
key
in
interimResults
){
var
entry
=
interimResults
[
key
];
var
entry
=
interimResults
[
key
];
...
@@ -182,55 +184,59 @@ for(var key in interimResults){
...
@@ -182,55 +184,59 @@ for(var key in interimResults){
newEntry
[
'value'
]
=
{
'types'
:
Object
.
keys
(
entry
[
'types'
])};
newEntry
[
'value'
]
=
{
'types'
:
Object
.
keys
(
entry
[
'types'
])};
newEntry
[
'totalOccurrences'
]
=
entry
[
'totalOccurrences'
];
newEntry
[
'totalOccurrences'
]
=
entry
[
'totalOccurrences'
];
newEntry
[
'percentContaining'
]
=
entry
[
'totalOccurrences'
]
*
100
/
limit
;
newEntry
[
'percentContaining'
]
=
entry
[
'totalOccurrences'
]
*
100
/
limit
;
varietyResults
[
key
]
=
newEntry
;
varietyResults
.
push
(
newEntry
);
}
var
resultsDB
=
db
.
getMongo
().
getDB
(
'varietyResults'
);
var
resultsCollectionName
=
collection
+
'Keys'
;
// replace results collection
log
(
'creating results collection: '
+
resultsCollectionName
);
resultsDB
[
resultsCollectionName
].
drop
();
for
(
var
result
in
varietyResults
)
{
resultsDB
[
resultsCollectionName
].
insert
(
varietyResults
[
result
]);
}
}
var
numDocuments
=
db
[
collection
].
count
();
var
numDocuments
=
db
[
collection
].
count
();
log
(
'removing leaf arrays in results collection, and getting percentages'
);
// We throw away keys which end in an array index, since they are not useful
resultsDB
[
resultsCollectionName
].
find
({}).
forEach
(
function
(
key
)
{
// for our analysis. (We still keep the key of their parent array, though.) -JC
var
keyName
=
key
.
_id
.
key
;
var
filter
=
function
(
item
)
{
return
!
item
.
_id
.
key
.
match
(
/
\.
XX$/
);
// We throw away keys which end in an array index, since they are not useful
};
// for our analysis. (We still keep the key of their parent array, though.) -JC
if
(
keyName
.
match
(
/
\.
XX$/
))
{
resultsDB
[
resultsCollectionName
].
remove
({
'_id'
:
key
.
_id
});
return
;
}
var
map
=
function
(
item
)
{
var
keyName
=
item
.
_id
.
key
;
if
(
keyName
.
match
(
/
\.
XX/
))
{
if
(
keyName
.
match
(
/
\.
XX/
))
{
// exists query checks for embedded values for an array
// exists query checks for embedded values for an array
// ie. match {arr:[{x:1}]} with {'arr.x':{$exists:true}}
// ie. match {arr:[{x:1}]} with {'arr.x':{$exists:true}}
// just need to pull out .XX in this case
// just need to pull out .XX in this case
keyName
=
keyName
.
replace
(
/.XX/g
,
''
);
keyName
=
keyName
.
replace
(
/.XX/g
,
''
);
}
}
// we don't need to set it if limit isn't being used. (it's set above.)
// we don't need to set it if limit isn't being used. (it's set above.)
if
(
limit
<
numDocuments
)
{
if
(
limit
<
numDocuments
)
{
var
existsQuery
=
{};
var
existsQuery
=
{};
existsQuery
[
keyName
]
=
{
$exists
:
true
};
existsQuery
[
keyName
]
=
{
$exists
:
true
};
key
.
totalOccurrences
=
db
[
collection
].
count
(
existsQuery
);
item
.
totalOccurrences
=
db
[
collection
].
count
(
existsQuery
);
}
}
key
.
percentContaining
=
(
key
.
totalOccurrences
/
numDocuments
)
*
100.0
;
item
.
percentContaining
=
(
item
.
totalOccurrences
/
numDocuments
)
*
100.0
;
resultsDB
[
resultsCollectionName
].
save
(
key
);
return
item
;
});
};
// sort desc by totalOccurrences or by key asc if occurrences equal
var
comparator
=
function
(
a
,
b
)
{
var
countsDiff
=
b
.
totalOccurrences
-
a
.
totalOccurrences
;
return
countsDiff
!==
0
?
countsDiff
:
a
.
_id
.
key
.
localeCompare
(
b
.
_id
.
key
);
};
var
sortedKeys
=
resultsDB
[
resultsCollectionName
].
find
({}).
sort
({
totalOccurrences
:
-
1
,
'_id.key'
:
1
});
// occurrences count & alphabetical order
log
(
'removing leaf arrays in results collection, and getting percentages'
);
varietyResults
=
varietyResults
.
filter
(
filter
).
map
(
map
).
sort
(
comparator
);
if
(
persistResults
)
{
var
resultsDB
=
db
.
getMongo
().
getDB
(
'varietyResults'
);
var
resultsCollectionName
=
collection
+
'Keys'
;
// replace results collection
log
(
'creating results collection: '
+
resultsCollectionName
);
resultsDB
[
resultsCollectionName
].
drop
();
resultsDB
[
resultsCollectionName
].
insert
(
varietyResults
);
}
if
(
outputFormat
===
'json'
)
{
if
(
outputFormat
===
'json'
)
{
printjson
(
sortedKeys
.
toArray
()
);
// valid formatted json output, compressed variant is printjsononeline()
printjson
(
varietyResults
);
// valid formatted json output, compressed variant is printjsononeline()
}
else
{
// output nice ascii table with results
}
else
{
// output nice ascii table with results
var
table
=
[[
'key'
,
'types'
,
'occurrences'
,
'percents'
],
[
''
,
''
,
''
,
''
]];
// header + delimiter rows
var
table
=
[[
'key'
,
'types'
,
'occurrences'
,
'percents'
],
[
''
,
''
,
''
,
''
]];
// header + delimiter rows
sortedKey
s
.
forEach
(
function
(
key
)
{
varietyResult
s
.
forEach
(
function
(
key
)
{
table
.
push
([
key
.
_id
.
key
,
key
.
value
.
types
.
toString
(),
key
.
totalOccurrences
.
toString
(),
key
.
percentContaining
.
toString
()]);
table
.
push
([
key
.
_id
.
key
,
key
.
value
.
types
.
toString
(),
key
.
totalOccurrences
.
toString
(),
key
.
percentContaining
.
toString
()]);
});
});
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment