...
Code Block |
---|
|
AnyType
avg_var_transition::run(AnyType& args) {
// get current state value
AvgVarTransitionState<MutableArrayHandle<double> > state = args[0];
// update state getwith current row value
double x = args[1].getAs<double>();
doublestate d += (x - state.avg);
// online update mean
state.avgnumRows += d / static_cast<double>(state.numRows + 1);
double new_d = (x - state.avg);
double a = static_cast<double>(state.numRows) / static_cast<double>(state.numRows + 1);
// online update variance
state.var = state.var * a + d * new_d / static_cast<double>(state.numRows + 1);
state.numRows ++;
return state;
} |
- There are two arguments for
avg_var_transition
, as specified in avg_var.sql_in
. The first one is an array of SQ: double type, corresponding to the current mean, variance, and number of rows traversed, and the second one is a double representing the current tuple value.
- We will describe
classAvgVarTransitionState
later. Basically it takes args[0]
, a SQL double array, passes the data to the appropriate C++ types and stores them in the state
instance.
- Both the mean and the variance are updated in an online manner to avoid accumulating a large intermediate sum.
Merge function
Code Block |
---|
|
AnyType
avg_var_merge_states::run(AnyType& args) {
AvgVarTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
AvgVarTransitionState<ArrayHandle<double> > stateRight = args[1];
// Merge states together and return
stateLeft += stateRight;
return stateLeft;
} |
- Again: the arguments contained in
AnyType& args
are defined in avg_var.sql_in
. - The details are hidden in the method of class
AvgVarTransitionState
which overloads the operator +=
Final function
Code Block |
---|
|
AnyType
avg_var_final::run(AnyType& args) {
AvgVarTransitionState<MutableArrayHandle<double> > state = args[0];
// If we haven't seen any data, just return Null. This is the standard
// behavior of aggregate function on empty data sets (compare, e.g.,
// how PostgreSQL handles sum or avg on empty inputs)
if (state.numRows == 0)
return Null();
return state;
} |
- Class
AvgVarTransitionState
overloads the AnyType()
operator such that we can directly return state, an instance of AvgVarTransitionState
, while the function is expected to return a AnyType
.
Bridging class
Below is the method that overloads the operator +=
for the bridging class AvgVarTransitionState
:
- There are two arguments for
avg_var_transition
, as specified in avg_var.sql_in
. The first one is an array of SQL double type, corresponding to the current mean, variance, and number of rows traversed, and the second one is a double representing the current tuple value.
- We will describe
class AvgVarTransitionState
later. Basically it takes args[0]
, a SQL double array, passes the data to the appropriate C++ types and stores them in the state
instance.
- We compute the average and variance in an on-line manner by overloading the operator
+=
in the class AvgVarTransitionState.
Merge function
Code Block |
---|
|
AnyType
avg_var_merge_states::run(AnyType& args) {
AvgVarTransitionState<MutableArrayHandle<double> > stateLeft = args[0];
AvgVarTransitionState<ArrayHandle<double> > stateRight = args[1];
// Merge states together and return
stateLeft += stateRight;
return stateLeft;
} |
- Again: the arguments contained in
AnyType& args
are defined in avg_var.sql_in
. - The details are hidden in the method of class
AvgVarTransitionState
which overloads the operator +=
Final function
Code Block |
---|
|
AnyType
avg_var_final::run(AnyType& args) {
AvgVarTransitionState<MutableArrayHandle<double> > state = args[0];
// If we haven't seen any data, just return Null. This is the standard
// behavior of aggregate function on empty data sets (compare, e.g.,
// how PostgreSQL handles sum or avg on empty inputs)
if (state.numRows == 0)
return Null();
return state;
} |
- Class
AvgVarTransitionState
overloads the AnyType()
operator such that we can directly return state, an instance of AvgVarTransitionState
, while the function is expected to return a AnyType
.
Bridging class
Below are the methods that overload the operator +=
for the bridging class AvgVarTransitionState
:
Code Block |
---|
|
/**
* @brief Update state with a new data point
*/
template <class OtherHandle>
AvgVarTransitionState &operator+=(const double x){
double diff = (x - avg);
double normalizer = static_cast<double>(numRows + 1);
// online update mean
this.avg += diff / normalizer;
// online update variance
double new_diff = (x - avg);
double a = static_cast<double>(state.numRows) / normalizer;
this.var = (var * a) + (diff * new_diff) / normalizer;
}
|
Code Block |
---|
|
/**
* @brief Merge with another State object
*
* We update mean and variance in a online fashion
* to avoid intermediate large sum.
*/
template <class OtherHandle>
AvgVarTransitionState &operator+=(
const AvgVarTransitionState<OtherHandle> &inOtherState) {
if (mStorage.size() != inOtherState.mStorage.size())
throw std::logic_error("Internal error: Incompatible transition "
"states");
double avg_ = inOtherState.avg;
double var_ = inOtherState.var;
uint16_t numRows_ = static_cast<uint16_t>(inOtherState.numRows);
double totalNumRows = static_cast<double>(numRows + numRows_);
double p = static_cast<double>(numRows) / totalNumRows;
double p_ = static_cast<double>(numRows_) / totalNumRows;
double totalAvg = avg * p + avg_ * p_;
double a = avg - totalAvg;
double a_ = avg_ - totalAvg;
numRows += numRows_;
var = p * var + p_ * var_ + p * a * a + p_ * a_ * a_;
avg = totalAvg;
return *this;
} |
...