/* ----------------------------------------------------------------------- *//** * @file matrix_ops.cpp * * @date May 8, 2013 *//* ----------------------------------------------------------------------- */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "matrix_ops.hpp" namespace madlib { namespace modules { namespace linalg { using madlib::dbconnector::postgres::madlib_construct_array; using madlib::dbconnector::postgres::madlib_construct_md_array; // Use Eigen using namespace dbal::eigen_integration; AnyType matrix_densify_sfunc::run(AnyType & args) { int32_t col_dim = args[1].getAs(); int32_t col = args[2].getAs(); double val = args[3].getAs(); if(col_dim < 1){ std::stringstream errorMsg; errorMsg << "invalid argument - col (" << col << ") should be positive"; throw std::invalid_argument(errorMsg.str()); } if(col < 1 || col > col_dim){ std::stringstream errorMsg; errorMsg << "invalid argument - col (" << col << ") should be in the range of [1, " << col_dim << "]"; throw std::invalid_argument(errorMsg.str()); } MutableArrayHandle state(NULL); if (args[0].isNull()){ state = madlib_construct_array( NULL, col_dim, FLOAT8OID, sizeof(double), true, 'd'); }else{ state = args[0].getAs >(); } // we use col - 1 since // database expects col in [1, col_dim]; c++ expects col in [0, col_dim - 1] *(state.ptr() + col - 1) = val; return state; } AnyType matrix_mem_sum_sfunc::run(AnyType & args) { ArrayHandle m = args[1].getAs >(); if (m.dims() !=2){ throw std::invalid_argument( "invalid argument - 2-d array expected"); } int row_m = static_cast(m.sizeOfDim(0)); int col_m = static_cast(m.sizeOfDim(1)); // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. MutableArrayHandle state(NULL); if (args[0].isNull()){ int dims[2] = {row_m, col_m}; int lbs[2] = {1, 1}; state = madlib_construct_md_array( NULL, NULL, 2, dims, lbs, FLOAT8OID, sizeof(double), true, 'd'); }else{ state = args[0].getAs >(); } for(int i = 0; i < row_m; i++){ for(int j = 0; j < col_m; j++){ *(state.ptr() + i * col_m + j) += *(m.ptr() + i * col_m + j); } } return state; } AnyType matrix_blockize_sfunc::run(AnyType & args) { if(args[1].isNull()) return args[0]; int32_t row_id = args[1].getAs(); ArrayHandle row_vec = args[2].getAs >(); int32_t rsize = args[3].getAs(); int32_t csize = static_cast(row_vec.sizeOfDim(0)); if(rsize < 1){ throw std::invalid_argument( "invalid argument - block size should be positive"); } // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. MutableArrayHandle state(NULL); if (args[0].isNull()){ int32_t dims[2] = {rsize, csize}; int lbs[2] = {1, 1}; state = madlib_construct_md_array( NULL, NULL, 2, dims, lbs, FLOAT8OID, sizeof(double), true, 'd'); }else{ state = args[0].getAs >(); } // database represents row_id in [1, row_dim] memcpy(state.ptr() + ((row_id - 1) % rsize) * csize, row_vec.ptr(), csize * sizeof(double)); return state; } AnyType matrix_mem_mult::run(AnyType & args) { ArrayHandle a = args[0].getAs >(); ArrayHandle b = args[1].getAs >(); bool trans_b = args[2].getAs(); if (a.dims() != 2 || b.dims() !=2){ throw std::invalid_argument( "invalid argument - 2-d array expected"); } int row_a = static_cast(a.sizeOfDim(0)); int col_a = static_cast(a.sizeOfDim(1)); int row_b = static_cast(b.sizeOfDim(0)); int col_b = static_cast(b.sizeOfDim(1)); if ((!trans_b && col_a != row_b) || (trans_b && col_a != col_b)){ throw std::invalid_argument( "invalid argument - dimension mismatch"); } int dims[2] = {row_a, col_b}; if (trans_b) dims[1] = row_b; int lbs[2] = {1, 1}; // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. MutableArrayHandle r = madlib_construct_md_array( NULL, NULL, 2, dims, lbs, FLOAT8OID, sizeof(double), true, 'd'); for (int i = 0; i < row_a; i++){ for(int j = 0; j < col_a; j++){ for(int k = 0; k < dims[1]; k++){ *(r.ptr() + i * dims[1] + k) += trans_b ? *(a.ptr() + i * col_a + j) * *(b.ptr() + k * col_b + j) : *(a.ptr() + i * col_a + j) * *(b.ptr() + j * col_b + k); } } } return r; } AnyType matrix_mem_trans::run(AnyType & args) { ArrayHandle m = args[0].getAs >(); if (m.dims() != 2){ throw std::invalid_argument( "invalid argument - 2-d array expected"); } int row_m = static_cast(m.sizeOfDim(0)); int col_m = static_cast(m.sizeOfDim(1)); // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. int dims[2] = {col_m, row_m}; int lbs[2] = {1, 1}; MutableArrayHandle r = madlib_construct_md_array( NULL, NULL, 2, dims, lbs, FLOAT8OID, sizeof(double), true, 'd'); for (int i = 0; i < row_m; i++){ for(int j = 0; j < col_m; j++){ *(r.ptr() + j * row_m + i) = *(m.ptr() + i * col_m + j); } } return r; } AnyType rand_vector::run(AnyType & args) { int dim = args[0].getAs(); if (dim < 1) { throw std::invalid_argument("invalid argument - dim should be positive"); } MutableArrayHandle r = madlib_construct_array( NULL, dim, INT4OID, sizeof(int32_t), true, 'i'); for (int i = 0; i < dim; i++){ *(r.ptr() + i) = (int)(drand48() * 1000); } return r; } AnyType normal_vector::run(AnyType & args) { int dim = args[0].getAs(); double mu = args[1].getAs(); double sigma = args[2].getAs(); int seed = args[3].getAs(); if (dim < 1) { throw std::invalid_argument("invalid argument - dim should be positive"); } ColumnVector res(dim); boost::minstd_rand generator(seed); boost::normal_distribution<> nd_dist(mu, sigma); boost::variate_generator > nd(generator, nd_dist); for (int i = 0; i < dim; i++){ res(i) = (double)nd(); } return res; } AnyType bernoulli_vector::run(AnyType & args) { int dim = args[0].getAs(); double upper_val = args[1].getAs(); double lower_val = args[2].getAs(); double prob = args[3].getAs(); int seed = args[4].getAs(); if (dim < 1) { throw std::invalid_argument("invalid argument - dim should be positive"); } if (prob > 1 || prob < 0) { throw std::invalid_argument("invalid argument - probability should be in [0,1]"); } ColumnVector res(dim); boost::minstd_rand generator(seed); boost::bernoulli_distribution<> bn_dist(prob); boost::variate_generator > bn(generator, bn_dist); for (int i = 0; i < dim; i++) { res(i) = bn() ? upper_val : lower_val; } return res; } AnyType uniform_vector::run(AnyType & args) { int dim = args[0].getAs(); double min_ = args[1].getAs(); double max_ = args[2].getAs(); int seed = args[3].getAs(); if (dim < 1) { throw std::invalid_argument("invalid argument - dim should be positive"); } ColumnVector res(dim); boost::minstd_rand generator(seed); boost::uniform_real<> uni_dist(min_, max_); boost::variate_generator > uni(generator, uni_dist); for (int i = 0; i < dim; i++){ res(i) = (double)uni(); } return res; } AnyType matrix_vec_mult_in_mem_2d::run(AnyType & args){ MappedColumnVector vec = args[0].getAs(); MappedMatrix mat = args[1].getAs(); // Note mat is constructed in the column-first order // which means that mat is actually transposed if(vec.size() != mat.cols()){ throw std::invalid_argument( "dimensions mismatch: vec.size() != matrix.rows()"); }; // trans(vec) * trans(mat) = mat * vec Matrix r = mat * vec; ColumnVector v = r.col(0); return v; } AnyType matrix_vec_mult_in_mem_1d::run(AnyType & args){ MappedColumnVector vec1 = args[0].getAs(); // matrix stored as a 1-d array MappedColumnVector vec2 = args[1].getAs(); if(vec2.size() % vec1.size() != 0){ throw std::invalid_argument( "dimensions mismatch: matrix.size() is not multiples of vec.size()"); }; MappedMatrix mat; // the rebinding happens in column-major mat.rebind(vec2.memoryHandle(), vec1.size(), vec2.size()/vec1.size()); Matrix r = trans(mat) * vec1; ColumnVector v = r.col(0); return v; } AnyType row_fold::run(AnyType & args){ MappedColumnVector vec = args[0].getAs(); MappedIntegerVector pat = args[1].getAs(); if (vec.size() != pat.sum()) { throw std::invalid_argument( "dimensions mismatch: row_in.size() != pattern.sum()"); } ColumnVector r(pat.size()); for (int i = 0, j = 0; i < pat.size(); j += pat[i++]) r[i] = vec.segment(j, pat[i]).prod(); return r; } AnyType rand_block::run(AnyType & args) { int row_dim = args[0].getAs(); int col_dim = args[1].getAs(); if (row_dim < 1 || col_dim < 1) { throw std::invalid_argument("invalid argument - row_dim and col_dim \ should be positive"); } // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. int dims[2] = {row_dim, col_dim}; int lbs[2] = {1, 1}; MutableArrayHandle r = madlib_construct_md_array( NULL, NULL, 2, dims, lbs, INT4OID, sizeof(int32_t), true, 'i'); for (int i = 0; i < row_dim; i++){ for(int j = 0; j < col_dim; j++){ *(r.ptr() + i * col_dim + j) = (int32_t)(drand48() * 1000); } } return r; } typedef struct __sr_ctx1{ const double * inarray; int32_t dim; int32_t maxcall; int32_t size; int32_t curcall; } sr_ctx1; // @brief: split a vector into multiple vectors void * row_split::SRF_init(AnyType &args) { // vector to be split ArrayHandle inarray = args[0].getAs >(); // size of each split int32_t size = args[1].getAs(); if (size < 1) { // throw std::invalid_argument will cause crash elog(ERROR, "invalid argument - the spliting size should be positive"); } sr_ctx1 * ctx = new sr_ctx1; ctx->inarray = inarray.ptr(); // length of inarray ctx->dim = static_cast(inarray.sizeOfDim(0)); ctx->size = size; // max number of splits to be formed ctx->maxcall = static_cast( ceil(static_cast(ctx->dim) / size)); // current split index ctx->curcall = 0; return ctx; } /** * @brief The function is used to return the next row by the SRF. **/ AnyType row_split::SRF_next(void *user_fctx, bool *is_last_call) { sr_ctx1 * ctx = (sr_ctx1 *) user_fctx; if (ctx->maxcall == 0) { *is_last_call = true; return Null(); } int32_t size = ctx->size; if (ctx->maxcall == 1 && ctx->dim % ctx->size != 0) { // for last split we might not have enough elements to fill the whole split // in this case we update size to the number of residual elements in last split size = ctx->dim % ctx->size; } // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. MutableArrayHandle outarray( madlib_construct_array( NULL, size, FLOAT8OID, sizeof(double), true, 'd')); memcpy(outarray.ptr(), ctx->inarray + ctx->curcall * ctx->size, size * sizeof(double)); ctx->curcall++; ctx->maxcall--; *is_last_call = false; return outarray; } AnyType matrix_unblockize_sfunc::run(AnyType & args) { if (args[1].isNull() || args[2].isNull() || args[3].isNull()) return args[0]; int32_t total_col_dim = args[1].getAs(); int32_t col_id = args[2].getAs(); ArrayHandle row_vec = args[3].getAs >(); int32_t col_dim = static_cast(row_vec.sizeOfDim(0)); if(total_col_dim < 1){ throw std::invalid_argument( "invalid argument - total_col_dim should be positive"); } if(col_id <= 0){ throw std::invalid_argument( "invalid argument - col_id should be positive"); } if(col_id > total_col_dim){ throw std::invalid_argument( "invalid argument - col_id should be in the range of [1, total_col_dim]"); } // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. MutableArrayHandle state(NULL); if (args[0].isNull()){ state = madlib_construct_array( NULL, total_col_dim, FLOAT8OID, sizeof(double), true, 'd'); }else{ state = args[0].getAs >(); } memcpy(state.ptr() + col_id - 1, row_vec.ptr(), col_dim * sizeof(double)); return state; } typedef struct __sr_ctx2{ const double * inarray; int32_t maxcall; int32_t dim; int32_t curcall; } sr_ctx2; void * unnest_block::SRF_init(AnyType &args) { ArrayHandle inarray = args[0].getAs >(); if(inarray.dims() != 2) throw std::invalid_argument("invalid dimension"); sr_ctx2 * ctx = new sr_ctx2; ctx->inarray = inarray.ptr(); ctx->maxcall = static_cast(inarray.sizeOfDim(0)); ctx->dim = static_cast(inarray.sizeOfDim(1)); ctx->curcall = 0; return ctx; } AnyType unnest_block::SRF_next(void *user_fctx, bool *is_last_call) { sr_ctx2 * ctx = (sr_ctx2 *) user_fctx; if (ctx->maxcall == 0) { *is_last_call = true; return Null(); } // FIXME: construct_array functions circumvent the abstraction layer. These // should be replaced with appropriate Allocator:: calls. MutableArrayHandle outarray( madlib_construct_array( NULL, ctx->dim, FLOAT8OID, sizeof(double), true, 'd')); memcpy( outarray.ptr(), ctx->inarray + ctx->curcall * ctx->dim, ctx->dim * sizeof(double)); ctx->curcall++; ctx->maxcall--; *is_last_call = false; return outarray; } } } }