/* ----------------------------------------------------------------------- *//** * * @file assoc_rules.cpp * * @brief Functions for Apriori algorithm in MADlib * * @date August 1, 2012 *//* ----------------------------------------------------------------------- */ #include #include "assoc_rules.hpp" namespace madlib { namespace modules { namespace assoc_rules { using madlib::dbconnector::postgres::madlib_get_typlenbyvalalign; typedef struct perm_fctx { bool* flags; char* positions; int32 pos_len; int32 num_elems; int32 max_LHS_size; int32 max_RHS_size; int32 num_calls; /* type information for the result type*/ int16 typlen; bool typbyval; char typalign; } perm_fctx; /** * @brief The init function for gen_rules_from_cfp. * * @param args Two-element array. * args[0] is the text form of a closed frequent pattern. * args[1] is the number of items in the pattern. args[2] is the max number of elements in the lhs of the rule args[3] is the max number of elements in the rhs of the rule * @param max_call The number of will be generated. * * @return The struct including the variables which will be used * in the next call. * */ void * gen_rules_from_cfp::SRF_init(AnyType &args) { perm_fctx *myfctx = NULL; char *positions = args[0].getAs(); // allocate memory for user context myfctx = new perm_fctx(); myfctx->positions = positions; myfctx->pos_len = static_cast(strlen(positions)); myfctx->num_elems = args[1].getAs(); myfctx->max_LHS_size = args[2].getAs(); myfctx->max_RHS_size = args[3].getAs(); myfctx->num_calls = (1 << myfctx->num_elems) - 2; myfctx->flags = new bool[myfctx->num_elems]; memset(myfctx->flags, 0, sizeof(bool) * myfctx->num_elems); // return type id is TEXTOID, get the related information madlib_get_typlenbyvalalign (TEXTOID, &myfctx->typlen, &myfctx->typbyval, &myfctx->typalign); return myfctx; } /** * @brief The next function for gen_rules_from_cfp. * * @param user_fctx The pointer points to the struct including the * variables will be used in this function. * @param is_last_call Indicates if it's the last call. * * @return A two-element array, corresponding to * the left and right parts of an association rule. * */ AnyType gen_rules_from_cfp::SRF_next(void *user_fctx, bool *is_last_call) { perm_fctx *myfctx = (perm_fctx*)user_fctx; int i = 0; char *pre_text; char *post_text; char *p_pre; char *p_post; char *p_begin; char *p_cur; bool is_cont; Datum *result; char **p_sel_pos; int len = 0; if (!is_last_call) throw std::invalid_argument("the parameter is_last_class should not be null"); if (myfctx->num_calls <= 0) { *is_last_call = true; return Null(); } *is_last_call = false; // find the next permutation of the closed frequent pattern for (i = 0; i < myfctx->num_elems; ++i) { if (!myfctx->flags[i]) { myfctx->flags[i] = true; break; } else { myfctx->flags[i] = false; } } // If the target max size is greater than the current number of elements to // consider, there is no need to actually check for lhs or rhs sizes. if (myfctx->max_LHS_size <= myfctx->num_elems || myfctx->max_RHS_size <= myfctx->num_elems){ int countLHS = 0; int countRHS = 0; // flags[i]=True means that element is on the lhs (and vice versa) for (i = 0; i < myfctx->num_elems; ++i) { if (!myfctx->flags[i]) { countRHS ++; } else { countLHS ++; } } // If this rule is not viable (one side is larger than the limit) // Reduce the num_calls to indicate that it is processed and // return Null to skip the operation if (countLHS > myfctx->max_LHS_size || countRHS > myfctx->max_RHS_size){ --myfctx->num_calls; return Null(); } } pre_text = new char[myfctx->pos_len]; post_text = new char[myfctx->pos_len]; result = new Datum[2]; p_pre = pre_text; p_post = post_text; p_begin = myfctx->positions; p_cur = p_begin; is_cont = myfctx->flags[0]; memset(pre_text, 0, myfctx->pos_len * sizeof(char)); memset(post_text, 0, myfctx->pos_len * sizeof(char)); // get the left and right parts of the association rule, corresponding // to the current permutation for (i = 1; i < myfctx->num_elems; ++i) { /* move to the next comma */ for (; *p_cur != ','; ++p_cur) ; /* the same with the last one, no need to copy */ if (myfctx->flags[i] == is_cont) { ++p_cur; continue; } else { ++p_cur; len = static_cast(p_cur - p_begin); p_sel_pos = is_cont ? &p_pre : &p_post; memcpy(*p_sel_pos, p_begin, len * sizeof(char)); *p_sel_pos += len; p_begin = p_cur; is_cont = !is_cont; } } if (is_cont) { p_sel_pos = &p_pre; // remove the last comma *(p_post - 1) = '\0'; } else { p_sel_pos = &p_post; // remove the last comma *(p_pre - 1) = '\0'; } /* copy the remind text */ memcpy(*p_sel_pos, p_begin, (myfctx->pos_len - (p_begin - myfctx->positions)) * sizeof(char)); result[0] = PointerGetDatum(cstring_to_text(pre_text)); result[1] = PointerGetDatum(cstring_to_text(post_text)); ArrayHandle arr(construct_array(result, 2, TEXTOID, myfctx->typlen, myfctx->typbyval, myfctx->typalign)); delete[] pre_text; delete[] post_text; --myfctx->num_calls; return arr; } } // namespace assoc_rules } // namespace modules } // namespace madlib