/**
* Client side filtering using crossfilter
* Due to limitation of crossfilter with array (or data that has no natrual ordering), this will not work as expected:
* * dimension: `function (d) {return [d.x, d.y, d.z]}`
* * group: `function (d) {return [d.x / 10 , d.y / 10, d.z / 10]}`
*
* Therefore, we preform grouping already in the dimension itself, and join the array to a string.
* Strings have a natural ordering and thus can be used as dimension value.
* * dimension: `function (d) -> "d.x/10|d.y/10|d.z/10"`
* * group: `function (d) {return d;}`
*
* @module driver/client
*/
var moment = require('moment-timezone');
var utildx = require('../util/crossfilter');
var misval = require('../util/misval');
var grpIdxToName = {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'};
var aggRankToName = {1: 'aa', 2: 'bb', 3: 'cc', 4: 'dd', 5: 'ee'};
/**
* setMinMax sets the range of a continuous or time facet
*
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setMinMax (dataset, facet) {
// we need the value just before a transformation, so baseValueFn
var valFn = utildx.baseValueFn(facet);
// to be able to mark the value as missing we need it unprocessed, so rawValueFn
var rawValFn = utildx.rawValueFn(facet);
var lessFn;
var moreFn;
if (facet.isDatetime) {
lessFn = function (a, b) { return (b === misval || a.isBefore(b)); };
moreFn = function (a, b) { return (b === misval || b.isBefore(a)); };
} else {
lessFn = function (a, b) { return (b === misval || a < b); };
moreFn = function (a, b) { return (b === misval || a > b); };
}
var minval = misval;
var rawMin = misval;
var maxval = misval;
var rawMax = misval;
dataset.data.forEach(function (d) {
var rawV = rawValFn(d);
var v = valFn(d);
if (v !== misval) {
if (lessFn(v, minval)) {
minval = v;
rawMin = rawV;
}
if (moreFn(v, maxval)) {
maxval = v;
rawMax = rawV;
}
}
});
if (minval !== misval) {
if (facet.isContinuous) {
facet.minvalAsText = minval.toString();
} else if (facet.isDatetime) {
facet.minvalAsText = minval.toISOString();
} else if (facet.isDuration) {
facet.minvalAsText = minval.toISOString();
}
facet.rawMinval = rawMin;
} else {
facet.minvalAsText = '';
facet.rawMinval = misval;
}
if (maxval !== misval) {
if (facet.isContinuous) {
facet.maxvalAsText = maxval.toString();
} else if (facet.isDatetime) {
facet.maxvalAsText = maxval.toISOString();
} else if (facet.isDuration) {
facet.maxvalAsText = maxval.toISOString();
}
facet.rawMaxval = rawMax;
} else {
facet.maxvalAsText = '';
facet.rawMaxval = misval;
}
}
/**
* setCategories finds finds all values on an ordinal (categorial) axis
* Updates the categorialTransform of the facet
*
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setCategories (dataset, facet) {
// we need the value just before a transformation, so baseValueFn
var valFn = utildx.baseValueFn(facet);
var p = {};
var p_length = 0;
dataset.data.forEach(function (d) {
var vals = valFn(d);
if (vals instanceof Array) {
vals.forEach(function (val) {
if (p.hasOwnProperty(val)) {
p[val]++;
} else {
if (p_length < 75) { // NOTE: limit to maximally 75 categories
p[val] = 1;
p_length++;
}
}
});
} else {
if (p.hasOwnProperty(vals)) {
p[vals]++;
} else {
if (p_length < 75) { // NOTE: limit to maximally 75 categories
p[vals] = 1;
p_length++;
}
}
}
});
facet.categorialTransform.reset();
Object.keys(p).forEach(function (key) {
// TODO: missing data should be mapped to a misval from misvalAsText
var keyAsString = key.toString();
var groupAsString = keyAsString;
facet.categorialTransform.rules.add({expression: keyAsString, count: p[key], group: groupAsString});
});
}
/**
* Calculate 100 percentiles (ie. 1,2,3,4 etc.), and initialize the `facet.continuousTransform`
* to an approximate percentile mapping.
* Use the recommended method from [NIST](http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm)
* See also the discussion on [Wikipedia](https://en.wikipedia.org/wiki/Percentile)
*
* @param {Dataset} dataset
* @param {Facet} facet
*/
function setPercentiles (dataset, facet) {
// we need the value just before a transformation, so baseValueFn
var basevalueFn = utildx.baseValueFn(facet);
var data = dataset.data;
data.sort(function (a, b) {
var valA = basevalueFn(a);
var valB = basevalueFn(b);
if (valA === valB) {
return 0;
}
if (valA === misval) {
return -1;
}
if (valB === misval) {
return 1;
}
if (valA < valB) {
return -1;
} else {
return 1;
}
});
var tf = facet.continuousTransform;
var x, i;
// drop missing values, which should be sorted at the start of the array
i = 0;
while (basevalueFn(data[i]) === misval && i < data.length) {
i++;
}
data.splice(0, i);
// start clean
tf.reset();
// add minimum value as control points p0 and p1
tf.cps.add({x: basevalueFn(data[0]), fx: 0});
tf.cps.add({x: basevalueFn(data[0]), fx: 0});
var p, value;
for (p = 1; p < 100; p++) {
x = (p * 0.01) * (data.length + 1) - 1; // indexing starts at zero, not at one
i = Math.trunc(x);
value = (1 - x + i) * basevalueFn(data[i]) + (x - i) * basevalueFn(data[i + 1]);
tf.cps.add({x: value, fx: p});
}
// add maximum value as p101 and p102
tf.cps.add({x: basevalueFn(data[data.length - 1]), fx: 100});
tf.cps.add({x: basevalueFn(data[data.length - 1]), fx: 100});
tf.type = 'percentiles';
}
/**
* Autoconfigure a dataset:
* 1. pick 10 random elements
* 2. create facets for their properties
* 3. add facets' values over the sample to the facet.description
* 4. set range or categories
*
* @param {Dataset} dataset
*/
function scan (dataset) {
function facetExists (facets, path) {
var exists = false;
facets.forEach(function (f) {
if (f.accessor === path || f.accessor === path + '[]') {
exists = true;
}
});
return exists;
}
function addValue (values, v, missing) {
if (v === misval) {
v = missing;
}
if (values.indexOf(v) === -1) {
values.push(v);
}
}
function guessType (values) {
var mytype = {
continuous: 0,
text: 0,
datetime: 0,
duration: 0,
categorial: 0
};
values.forEach(function (value) {
if (moment(value, moment.ISO_8601).isValid()) {
// "2016-08-17 17:25:00+01"
mytype.datetime++;
} else if (
(moment.duration(value).asMilliseconds() !== 0) &&
(typeof value === 'string') &&
(value[0].toLowerCase() === 'p')) {
// "P10Y"
mytype.duration++;
} else if (value == +value) { // eslint-disable-line eqeqeq
// "10" or 10
mytype.continuous++;
} else {
// "hello world"
mytype.categorial++;
}
});
// get facetType with highest count
var max = -1;
var facetType;
Object.keys(mytype).forEach(function (key) {
if (mytype[key] > max) {
facetType = key;
max = mytype[key];
}
});
return facetType;
}
function tryFacet (facets, data, path, value) {
// Check for existence
if (facetExists(facets, path)) {
return;
}
// Create a new facet
var facet = facets.add({
name: path,
accessor: path,
type: 'text'
});
// Sample values
var baseValueFn = utildx.baseValueFn(facet);
var values = [];
var isArray = false;
data.forEach(function (d) {
var value = baseValueFn(d);
if (value instanceof Array) {
isArray = true;
value.forEach(function (v) {
addValue(values, v, facet.misval[0]);
});
} else {
addValue(values, value, facet.misval[0]);
}
});
// Reconfigure facet
facet.accessor = isArray ? facet.accessor + '[]' : facet.accessor;
facet.type = guessType(values);
facet.description = values.join(', ').match('^.{0,40}') + '...';
facet.isActive = true;
}
function recurse (facets, data, path, tree) {
var props = Object.getOwnPropertyNames(tree);
props.forEach(function (name) {
var subpath;
if (path) {
subpath = path + '##' + name;
} else {
subpath = name;
}
if (tree[name] instanceof Array) {
// add an array as a itself as a facet, ie. labelset, to prevent adding each element as separate facet
// also add the array length as facet
tryFacet(facets, data, subpath, tree[name]);
tryFacet(facets, data, subpath + '.length', tree[name].length);
} else if (tree[name] instanceof Object) {
// recurse into objects
recurse(facets, data, subpath, tree[name]);
} else {
// add strings and numbers as facets
tryFacet(facets, data, subpath, tree[name]);
}
});
}
// Add facets
var data = dataset.data.slice(0, 10);
data.forEach(function (d) {
recurse(dataset.facets, data, '', d);
});
dataset.facets.forEach(function (facet) {
if (facet.isCategorial) {
setCategories(dataset, facet);
} else if (facet.isContinuous || facet.isDatetime) {
setMinMax(dataset, facet);
}
});
dataset.trigger('syncFacets');
}
/**
* Initialize the data filter, and construct the getData callback function on the filter.
* @param {Dataview} dataview
* @param {Filter} filter
*/
function initDataFilter (dataview, filter) {
var facet;
// use the partitions as groups:
var groupFns = [];
filter.partitions.forEach(function (partition) {
facet = dataview.facets.get(partition.facetName, 'name');
var valueFn = utildx.valueFn(facet);
var groupFn = utildx.groupFn(partition);
var rank = partition.rank;
groupFns[rank - 1] = function (d) {
return groupFn(valueFn(d));
};
});
// and then create keys from the group values
var groupsKeys = function (d) {
var keys = [];
groupFns.forEach(function (groupFn) {
var result = groupFn(d);
var newKeys = [];
if (keys.length === 0) {
if (result instanceof Array) {
newKeys = result;
} else {
newKeys = [result];
}
} else {
if (result instanceof Array) {
keys.forEach(function (oldKey) {
result.forEach(function (key) {
newKeys.push(oldKey + '|' + key);
});
});
} else {
keys.forEach(function (oldKey) {
newKeys.push(oldKey + '|' + result);
});
}
}
keys = newKeys;
});
return keys;
};
// set up the facet valueFns to aggregate over
// and the reduction functions for them
var aggregateFns = [];
var aggregateRanks = [];
var reduceFns = [];
filter.aggregates.forEach(function (aggregate) {
facet = dataview.facets.get(aggregate.facetName, 'name');
aggregateRanks.push(aggregate.rank);
aggregateFns.push(utildx.valueFn(facet));
reduceFns.push(utildx.reduceFn(aggregate));
});
// setup the crossfilter dimensions and groups
filter.dimension = dataview.crossfilter.dimension(function (d) {
return groupsKeys(d);
}, true);
var crossfilterGroup = filter.dimension.group(function (d) { return d; });
crossfilterGroup.reduce(
// add
function (p, d) {
if (aggregateFns.length === 0) {
p[0] = p[0] ? p[0] : {count: 0};
p[0].count += 1;
}
aggregateFns.forEach(function (aggregateFn, i) {
var val = aggregateFn(d);
if (val !== misval) {
val = parseFloat(val);
p[i] = p[i] || {count: 0, sum: 0, sumsquares: 0};
p[i].count += 1;
p[i].sum += val;
p[i].sumsquares += val * val;
}
});
return p;
},
// subtract
function (p, d) {
if (aggregateFns.length === 0) {
p[0] = p[0] ? p[0] : {count: 0};
p[0].count -= 1;
}
aggregateFns.forEach(function (aggregateFn, i) {
var val = aggregateFn(d);
if (val !== misval) {
val = parseFloat(val);
p[i] = p[i] || {count: 0, sum: 0, sumsquares: 0};
p[i].count -= 1;
p[i].sum -= val;
p[i].sumsquares -= val * val;
}
});
return p;
},
// initialize
function () {
return [];
}
);
filter.getData = function () {
filter.data = [];
// Get data from crossfilter
var groups = crossfilterGroup.all();
// { key: "group1|group2|...",
// value: [ {count: agg1, sum: agg1}
// {count: agg2, sum: agg2}
// {count: agg3, sum: agg3}
// ... ]}
groups.forEach(function (group) {
var item = {};
// turn the string back into individual group values
var groupsKeys;
if (typeof group.key === 'string') {
groupsKeys = group.key.split('|');
} else {
// shortcut for numeric non-partitioned case
groupsKeys = [group.key];
}
// add paritioning data to the item
groupsKeys.forEach(function (subkey, i) {
item[grpIdxToName[i]] = subkey;
});
// add aggregated data to the item
reduceFns.forEach(function (reduceFn, i) {
var name = aggRankToName[aggregateRanks[i]];
item[name] = reduceFn(group.value[i]);
});
// add an overall count
// becuase the filtering removes missing data points, this is the same as
// the count for any one of the aggregates
item.count = group.value[0] ? group.value[0].count : 0;
filter.data.push(item);
});
};
}
/**
* The opposite or initDataFilter, it should remove the filter and deallocate other configuration
* related to the filter.
* @param {Dataview} dataview
* @param {Filter} filter
*/
function releaseDataFilter (dataview, filter) {
if (filter.dimension) {
filter.dimension.filterAll();
filter.dimension.dispose();
delete filter.dimension;
delete filter.getData;
}
}
/**
* Change the filter parameters for an initialized filter
* @param {Filter} filter
*/
function updateDataFilter (filter) {
if (filter.dimension) {
filter.dimension.filterFunction(filter.filterFunction());
}
}
/**
* Get data for every filter, and trigger a 'newData' event
*
* Returns a Promise that resolves to the dataview when all data and metadata has been updated
*
* @param {Dataview} dataview
* @returns {Promise}
*/
function getData (dataview) {
dataview.filters.forEach(function (filter) {
if (filter.isInitialized) {
filter.getData();
filter.trigger('newData');
}
});
// update counts
dataview.dataTotal = dataview.crossfilter.size();
dataview.dataSelected = dataview.countGroup.value();
dataview.trigger('newMetaData');
return Promise.resolve(dataview);
}
module.exports = {
driverType: 'client',
scan: scan,
setMinMax: setMinMax,
setCategories: setCategories,
setPercentiles: setPercentiles,
initDataFilter: initDataFilter,
releaseDataFilter: releaseDataFilter,
updateDataFilter: updateDataFilter,
getData: getData
};