/* ----------------------------------------------------------------------- *//** * * @file linear.cpp * * @brief Linear-regression functions * *//* ----------------------------------------------------------------------- */ #include #include "LinearRegression_proto.hpp" #include "LinearRegression_impl.hpp" #include "linear.hpp" namespace madlib { namespace modules { namespace regress { // ----------------------------------------------------------------------- // Linear regression // ----------------------------------------------------------------------- typedef LinearRegressionAccumulator LinRegrState; typedef LinearRegressionAccumulator MutableLinRegrState; AnyType linregr_transition::run(AnyType& args) { MutableLinRegrState state = args[0].getAs(); if (args[1].isNull() || args[2].isNull()) { return args[0]; } double y = args[1].getAs(); MappedColumnVector x; try { MappedColumnVector xx = args[2].getAs(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } state << MutableLinRegrState::tuple_type(x, y); return state.storage(); } AnyType linregr_merge_states::run(AnyType& args) { MutableLinRegrState stateLeft = args[0].getAs(); LinRegrState stateRight = args[1].getAs(); stateLeft << stateRight; return stateLeft.storage(); } AnyType linregr_final::run(AnyType& args) { LinRegrState state = args[0].getAs(); // If we haven't seen any data, just return Null. This is the standard // behavior of aggregate function on empty data sets (compare, e.g., // how PostgreSQL handles sum or avg on empty inputs) if (state.numRows == 0) return Null(); AnyType tuple; LinearRegression result(state); tuple << result.coef << result.r2 << result.stdErr << result.tStats << (state.numRows > state.widthOfX ? result.pValues : Null()) << sqrt(result.conditionNo) << static_cast(state.numRows) << result.vcov; return tuple; } // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- // Robust linear regression variance estimate using the Huber-White estimator // ----------------------------------------------------------------------- typedef RobustLinearRegressionAccumulator RobustLinRegrState; typedef RobustLinearRegressionAccumulator MutableRobustLinRegrState; AnyType robust_linregr_transition::run(AnyType& args) { MutableRobustLinRegrState state = args[0].getAs(); if (args[1].isNull() || args[2].isNull()) { return args[0]; } double y = args[1].getAs(); MappedColumnVector x; try { MappedColumnVector xx = args[2].getAs(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } MappedColumnVector coef = args[3].getAs(); state << RobustLinRegrState::tuple_type(x, y, coef); return state.storage(); } AnyType robust_linregr_merge_states::run(AnyType& args) { MutableRobustLinRegrState stateLeft = args[0].getAs(); RobustLinRegrState stateRight = args[1].getAs(); // We first handle the trivial case where this function is called with one // of the states being the initial state if (stateLeft.numRows == 0) { return stateRight.storage(); } else if (stateRight.numRows == 0) { return stateLeft.storage(); } stateLeft << stateRight; return stateLeft.storage(); } AnyType robust_linregr_final::run(AnyType& args) { RobustLinRegrState state = args[0].getAs(); // If we haven't seen any data, just return Null. This is the standard // behavior of aggregate function on empty data sets (compare, e.g., // how PostgreSQL handles sum or avg on empty inputs) if (state.numRows == 0) return Null(); AnyType tuple; RobustLinearRegression result(state); tuple << result.coef << result.stdErr << result.tStats << (state.numRows > state.widthOfX ? result.pValues : Null()); return tuple; } // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- // Breusch–Pagan test for heteroskedasticity. // This is the first step of the test and does not include correction for the // standard errors if the data is heteroskedastic. // ----------------------------------------------------------------------- typedef HeteroLinearRegressionAccumulator HeteroLinRegrState; typedef HeteroLinearRegressionAccumulator MutableHeteroLinRegrState; AnyType hetero_linregr_transition::run(AnyType& args) { MutableHeteroLinRegrState state = args[0].getAs(); if (args[1].isNull() || args[2].isNull()) { return args[0]; } double y = args[1].getAs(); MappedColumnVector x; try { MappedColumnVector xx = args[2].getAs(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } MappedColumnVector coef = args[3].getAs(); state << MutableHeteroLinRegrState::hetero_tuple_type(x, y, coef); return state.storage(); } AnyType hetero_linregr_merge_states::run(AnyType& args) { MutableHeteroLinRegrState stateLeft = args[0].getAs(); HeteroLinRegrState stateRight = args[1].getAs(); // We first handle the trivial case where this function is called with one // of the states being the initial state if (stateLeft.numRows == 0) { return stateRight.storage(); } else if (stateRight.numRows == 0) { return stateLeft.storage(); } stateLeft << stateRight; return stateLeft.storage(); } AnyType hetero_linregr_final::run(AnyType& args) { HeteroLinRegrState state = args[0].getAs(); // If we haven't seen any data, just return Null. This is the standard // behavior of aggregate function on empty data sets (compare, e.g., // how PostgreSQL handles sum or avg on empty inputs) if (state.numRows == 0) return Null(); AnyType tuple; HeteroLinearRegression result(state); tuple << result.test_statistic << result.pValue; return tuple; } // ----------------------------------------------------------------------- } // namespace regress } // namespace modules } // namespace madlib