THIS IS A TEST INSTANCE. ALL YOUR CHANGES WILL BE LOST!!!!
This guide explains all of the elements needed to successfully develop and plug in a new MADlib® module.
...
Code Block | ||
---|---|---|
| ||
/**
* @brief Update state with a new data point
*/
template <class OtherHandle>
AvgVarTransitionState &operator+=(const double x){
double diff = (x - avg);
double normalizer = static_cast<double>(numRows + 1);
// online update mean
this->avg += diff / normalizer;
// online update variance
double new_diff = (x - avg);
double a = static_cast<double>(numRows) / normalizer;
this->var = (var * a) + (diff * new_diff) / normalizer;
}
/**
* @brief Merge with another State object
*
* We update mean and variance in a online fashion
* to avoid intermediate large sum.
*/
template <class OtherHandle>
AvgVarTransitionState &operator+=(
const AvgVarTransitionState<OtherHandle> &inOtherState) {
if (mStorage.size() != inOtherState.mStorage.size())
throw std::logic_error("Internal error: Incompatible transition "
"states");
double avg_ = inOtherState.avg;
double var_ = inOtherState.var;
uint64_t numRows_ = static_cast<uint64_t>(inOtherState.numRows);
double totalNumRows = static_cast<double>(numRows + numRows_);
double p = static_cast<double>(numRows) / totalNumRows;
double p_ = static_cast<double>(numRows_) / totalNumRows;
double totalAvg = avg * p + avg_ * p_;
double a = avg - totalAvg;
double a_ = avg_ - totalAvg;
numRows += numRows_;
var = p * var + p_ * var_ + p * a * a + p_ * a_ * a_;
avg = totalAvg;
return *this;
} |
...
Code Block | ||
---|---|---|
| ||
SELECT madlib.avg_var(second_attack) FROM patients; -- ************ -- -- Result -- -- ************ -- +-------------------+ | avg_var | |-------------------| | [0.5, 0.25, 20.0] | +-------------------+ -- (average, variance, count) -- |
...
Anchor | ||||
---|---|---|---|---|
|
...
The example below demonstrates the usage of madlib.logregr_simple_train
on the patients
table we used earlier. The trained classification model is stored in the table called logreg_mdl
and can be viewed using standard SQL query.
Code Block | ||
---|---|---|
| ||
SELECT madlib.logregr_simple_train( 'patients', -- source table 'logreg_mdl', -- output table 'second_attack', -- labels 'ARRAY[1, treatment, trait_anxiety]'); -- features SELECT * FROM logreg_mdl; -- ************ -- -- Result -- -- ************ -- +--------------------------------------------------+------------------+ | coef | log_likelihood | |--------------------------------------------------+------------------| | [-6.27176619714, -0.84168872422, 0.116267554551] | -9.42379 | +--------------------------------------------------+------------------+ |
...