/* ----------------------------------------------------------------------- *//** * * @file avgerage.cpp * * @brief Compute the average of vectors * *//* ----------------------------------------------------------------------- */ #include #include #include "average.hpp" namespace madlib { // Use Eigen using namespace dbal::eigen_integration; namespace modules { namespace linalg { /** * @brief Transition state for computing average of vectors * * Note: We assume that the DOUBLE PRECISION array is initialized by the * database with length 3, and all elemenets are 0. Handle::operator[] will * perform bounds checking. */ template class AvgVectorState { template friend class AvgVectorState; public: AvgVectorState(const AnyType &inArray) : mStorage(inArray.getAs()) { rebind(static_cast(mStorage[1])); } inline operator AnyType() const { return mStorage; } inline void initialize(const Allocator &inAllocator, uint32_t inNumDimensions) { mStorage = inAllocator.allocateArray(arraySize(inNumDimensions)); rebind(inNumDimensions); numDimensions = inNumDimensions; } template AvgVectorState &operator+=( const AvgVectorState &inOtherState) { numRows += inOtherState.numRows; sumOfVectors += inOtherState.sumOfVectors; return *this; } private: static inline size_t arraySize(uint32_t inNumDimensions) { return static_cast(2 + inNumDimensions); } /** * @brief Rebind to a new storage array * * @param inNumDimensions The number of dimensions * * Array layout (iteration refers to one aggregate-function call): * Inter-iteration components (updated in final function): * - 0: numRows (number of rows already processed in this iteration) * - 1: numDimensions (dimension of space that points are from) * - 2: sumOfPoints (vector with \c numDimensions rows) */ void rebind(uint32_t inNumDimensions) { numRows.rebind(&mStorage[0]); numDimensions.rebind(&mStorage[1]); sumOfVectors.rebind(&mStorage[2], inNumDimensions); madlib_assert(mStorage.size() >= arraySize(inNumDimensions), std::runtime_error("Out-of-bounds array access detected.")); } Handle mStorage; public: typename HandleTraits::ReferenceToUInt64 numRows; typename HandleTraits::ReferenceToUInt32 numDimensions; typename HandleTraits::ColumnVectorTransparentHandleMap sumOfVectors; }; AnyType avg_vector_transition::run(AnyType& args) { AvgVectorState > state = args[0]; if (args[1].isNull()) { return args[0]; } MappedColumnVector x; try { MappedColumnVector xx = args[1].getAs(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } if (state.numRows == 0) state.initialize(*this, static_cast(x.size())); else if (x.size() != state.sumOfVectors.size() || state.numDimensions != static_cast(state.sumOfVectors.size())) throw std::invalid_argument("Invalid arguments: Dimensions of points " "not consistent."); ++state.numRows; state.sumOfVectors += x; return state; } AnyType avg_vector_merge::run(AnyType& args) { AvgVectorState > stateLeft = args[0]; AvgVectorState > stateRight = args[1]; // We first handle the trivial case where this function is called with one // of the states being the initial state if (stateLeft.numRows == 0) return stateRight; else if (stateRight.numRows == 0) return stateLeft; if (stateLeft.numDimensions != stateRight.numDimensions) { throw std::invalid_argument("Invalid arguments: Dimensions of points " "not consistent."); } stateLeft += stateRight; return stateLeft; } AnyType avg_vector_final::run(AnyType& args) { AvgVectorState > state = args[0]; MutableNativeColumnVector avgVector(allocateArray( state.sumOfVectors.size())); avgVector = state.sumOfVectors / static_cast(state.numRows); return avgVector; } AnyType normalized_avg_vector_transition::run(AnyType& args) { AvgVectorState > state = args[0]; if (args[1].isNull()) { return args[0]; } MappedColumnVector x; try { MappedColumnVector xx = args[1].getAs(); x.rebind(xx.memoryHandle(), xx.size()); } catch (const ArrayWithNullException &e) { return args[0]; } if (state.numRows == 0) state.initialize(*this, static_cast(x.size())); else if (x.size() != state.sumOfVectors.size() || state.numDimensions != static_cast(state.sumOfVectors.size())) throw std::invalid_argument("Invalid arguments: Dimensions of points " "not consistent."); ++state.numRows; state.sumOfVectors += x.normalized(); return state; } AnyType normalized_avg_vector_final::run(AnyType& args) { AvgVectorState > state = args[0]; MutableNativeColumnVector avgVector(allocateArray( state.sumOfVectors.size())); avgVector = (state.sumOfVectors / static_cast(state.numRows)).normalized(); return avgVector; } } // namespace linalg } // namespace modules } // namespace regress