/** Functions that compute array average and standard deviation */ #include "dbconnector/dbconnector.hpp" #include "modules/shared/HandleTraits.hpp" #include "utils_regularization.hpp" #include namespace madlib { namespace modules { namespace convex { using namespace madlib::dbal::eigen_integration; template class ScalesState { template friend class ScalesState; public: ScalesState (const AnyType& inArray): mStorage(inArray.getAs()) { rebind(); } inline operator AnyType () const { return mStorage; } inline void allocate (const Allocator& inAllocator, uint32_t inDimension) { mStorage = inAllocator.allocateArray( arraySize(inDimension)); dimension.rebind(&mStorage[0]); dimension = inDimension; rebind(); } template ScalesState& operator= (const ScalesState& inOtherState) { for (size_t i = 0; i < mStorage.size(); i++) mStorage[i] = inOtherState.mStorage[i]; return *this; } static inline uint32_t arraySize (const uint32_t inDimension) { return 2 + 2 * inDimension; } protected: void rebind () { dimension.rebind(&mStorage[0]); numRows.rebind(&mStorage[1]); mean.rebind(&mStorage[2], dimension); std.rebind(&mStorage[2 + dimension], dimension); } Handle mStorage; public: typename HandleTraits::ReferenceToUInt32 dimension; typename HandleTraits::ReferenceToUInt64 numRows; typename HandleTraits::ColumnVectorTransparentHandleMap mean; typename HandleTraits::ColumnVectorTransparentHandleMap std; }; // ------------------------------------------------------------------------ bool close_to_zero(double number) { if(fabs(number - 0.0) <= DBL_EPSILON*fabs(number)) { return true; } return false; } AnyType utils_var_scales_transition::run (AnyType& args) { ScalesState > state = args[0]; MappedColumnVector x(NULL); try { new (&x) MappedColumnVector(args[1].getAs()); } catch(const ArrayWithNullException &e) { // warning("Input array contains NULL values, skipping it for mean and std computations"); return state; } if (state.numRows == 0) { int dimension = args[2].getAs(); state.allocate(*this, dimension); state.dimension = dimension; state.numRows = 0; state.mean.setZero(); state.std.setZero(); } for (uint32_t i = 0; i < state.dimension; i++) { state.mean(i) += x(i); state.std(i) += x(i) * x(i); } state.numRows++; return state; } // ------------------------------------------------------------------------ AnyType utils_var_scales_merge::run (AnyType& args) { ScalesState > state1 = args[0]; ScalesState > state2 = args[1]; if (state1.numRows == 0) return state2; else if (state2.numRows == 0) return state1; state1.mean += state2.mean; state1.std += state2.std; state1.numRows += state2.numRows; return state1; } // ------------------------------------------------------------------------ AnyType utils_var_scales_final::run (AnyType& args) { ScalesState > state = args[0]; if (state.numRows == 0) return Null(); state.mean /= static_cast(state.numRows); state.std /= static_cast(state.numRows); for (uint32_t i = 0; i < state.dimension; i++) state.std(i) = sqrt(state.std(i) - state.mean(i) * state.mean(i)); return state; } // ------------------------------------------------------------------------ /* This final function is used when we want to have a standard deviation of 1.0 instead of 0.0 for any dimension. This is most likely to be used by neural networks, since in other modules such as elastic_net, a std value of 0.0 results in setting the corresponding coefficient value to 0.0 as well. But in neural networks, we will end up using a std value of 1.0 instead. */ AnyType utils_var_scales_non_zero_std_final::run (AnyType& args) { ScalesState > state = args[0]; if (state.numRows == 0) return Null(); state.mean /= static_cast(state.numRows); state.std /= static_cast(state.numRows); for (uint32_t i = 0; i < state.dimension; i++) { if (state.numRows == 1) { state.mean(i) = 0.0; state.std(i) = 1.0; } else { state.std(i) = sqrt(state.std(i) - state.mean(i) * state.mean(i)); if(close_to_zero(state.std(i))) state.std(i) = 1.0; } } return state; } // ------------------------------------------------------------------------ AnyType __utils_var_scales_result::run (AnyType& args) { ScalesState > state = args[0]; AnyType tuple; tuple << state.mean << state.std; return tuple; } // ------------------------------------------------------------------------ AnyType utils_normalize_data::run (AnyType& args) { MutableMappedColumnVector x(NULL); try { new (&x) MutableMappedColumnVector(args[0].getAs()); } catch(const ArrayWithNullException &e) { // warning("Input array contains NULL values, skipping this array normalization"); return Null(); } MappedColumnVector mean = args[1].getAs(); MappedColumnVector std = args[2].getAs(); for (int i = 0; i < x.size(); i++) { if(close_to_zero(std(i))) x(i) = x(i) - mean(i); else x(i) = (x(i) - mean(i)) / std(i); } return x; } } } }