/* ----------------------------------------------------------------------- *//** * * @file linear_crf.cpp * * @brief Linear-chain Conditional Random Field functions * * We implement limited-memory BFGS method. * *//* ----------------------------------------------------------------------- */ #include #include #include #include "linear_crf.hpp" namespace madlib { // Use Eigen using namespace dbal::eigen_integration; namespace modules { // Import names from other MADlib modules using dbal::NoSolutionFoundException; namespace crf { // Internal functions AnyType stateToResult(const Allocator &inAllocator, const HandleMap > &incoef, double loglikelihood, const uint32_t &iterations); /** * @brief Inter- and intra-iteration state for lbfgs method for * linear-chain conditional random field * * TransitionState encapsualtes the transition state during the * linear-chain crf aggregate function. To the database, the state is * exposed as a single DOUBLE PRECISION array, to the C++ code it is a proper * object containing scalars and vectors. * * Note: We assume that the DOUBLE PRECISION array is initialized by the * database with length at least 58, and all elemenets are 0. */ template class LinCrfLBFGSTransitionState { template friend class LinCrfLBFGSTransitionState; public: LinCrfLBFGSTransitionState(const AnyType &inArray) : mStorage(inArray.getAs()) { rebind(static_cast(mStorage[1])); } /** * @brief Convert to backend representation * * We define this function so that we can use State in the * argument list and as a return type. */ inline operator AnyType() const { return mStorage; } /** * @brief Initialize the lbfgs state. * * This function is only called for the first row. */ inline void initialize(const Allocator &inAllocator, uint32_t inWidthOfX, uint32_t tagSize) { mStorage = inAllocator.allocateArray(arraySize(inWidthOfX)); rebind(inWidthOfX); num_features = inWidthOfX; num_labels = tagSize; if(iteration == 0) diag.fill(1); } /** * @brief We need to support assigning the previous state */ template LinCrfLBFGSTransitionState &operator=( const LinCrfLBFGSTransitionState &inOtherState) { for (size_t i = 0; i < mStorage.size(); i++) mStorage[i] = inOtherState.mStorage[i]; return *this; } /** * @brief Merge with another State object by copying the intra-iteration * fields */ template LinCrfLBFGSTransitionState &operator+=( const LinCrfLBFGSTransitionState &inOtherState) { if (mStorage.size() != inOtherState.mStorage.size()) throw std::logic_error("Internal error: Incompatible transition " "states"); numRows += inOtherState.numRows; grad += inOtherState.grad; loglikelihood += inOtherState.loglikelihood; return *this; } /** * @brief Reset the inter-iteration fields. */ inline void reset() { numRows = 0; grad.fill(0); loglikelihood = 0; } static const int m=7;// The number of corrections used in the LBFGS update. private: static inline uint32_t arraySize(const uint32_t num_features) { return 52 + 3 * num_features + num_features*(2*m+1)+2*m; } void rebind(uint32_t inWidthOfFeature) { iteration.rebind(&mStorage[0]); num_features.rebind(&mStorage[1]); num_labels.rebind(&mStorage[2]); coef.rebind(&mStorage[3], inWidthOfFeature); diag.rebind(&mStorage[3 + inWidthOfFeature], inWidthOfFeature); grad.rebind(&mStorage[3 + 2 * inWidthOfFeature], inWidthOfFeature); ws.rebind(&mStorage[3 + 3 * inWidthOfFeature], inWidthOfFeature*(2*m+1)+2*m); numRows.rebind(&mStorage[3 + 3 * inWidthOfFeature + inWidthOfFeature*(2*m+1)+2*m]); loglikelihood.rebind(&mStorage[4 + 3 * inWidthOfFeature + inWidthOfFeature*(2*m+1)+2*m]); lbfgs_state.rebind(&mStorage[5 + 3 * inWidthOfFeature + inWidthOfFeature*(2*m+1)+2*m], 21); mcsrch_state.rebind(&mStorage[26 + 3 * inWidthOfFeature + inWidthOfFeature*(2*m+1)+2*m], 25); } Handle mStorage; public: typename HandleTraits::ReferenceToUInt32 iteration; typename HandleTraits::ReferenceToUInt32 num_features; typename HandleTraits::ReferenceToUInt32 num_labels; typename HandleTraits::ColumnVectorTransparentHandleMap coef; typename HandleTraits::ColumnVectorTransparentHandleMap diag; typename HandleTraits::ColumnVectorTransparentHandleMap grad; typename HandleTraits::ColumnVectorTransparentHandleMap ws; typename HandleTraits::ReferenceToUInt64 numRows; typename HandleTraits::ReferenceToDouble loglikelihood; typename HandleTraits::ColumnVectorTransparentHandleMap lbfgs_state; typename HandleTraits::ColumnVectorTransparentHandleMap mcsrch_state; }; /** This class contains code for the limited-memory Broyden-Fletcher-Goldfarb-Shanno * (LBFGS) algorithm for large-scale multidimensional unconstrained minimization problems. * This following class is a translation of Fortran code written by Jorge Nocedal. */ class LBFGS { public: // shared variable in lbfgs double stp1, ftol, stp, sq, yr, beta; // iflag A return with iflag < 0 indicates an error, // and iflag = 0 indicates that the routine has // terminated without detecting errors. On a return with // iflag = 1, the user must evaluate the function // f and gradient g. On a return with // iflag is negative , lbfgs failed int iflag, iter, nfun, point, ispt, iypt, maxfev, info, bound, npt, cp, nfev, inmc, iycn, iscn; // shared varibles in mcscrch int infoc; double dg, dgm, dginit, dgtest, dgx, dgxm, dgy, dgym, finit, ftest1, fm, fx, fxm, fy, fym, p5, p66, stx, sty, stmin, stmax, width, width1, xtrapf; bool brackt, stage1, finish; ColumnVector w;// ColumnVector x;// solution vector ColumnVector diag; LBFGS(LinCrfLBFGSTransitionState >&); void save_state(LinCrfLBFGSTransitionState > &state); void mcstep (double&, double& , double&, double&, double& , double&, double&, double, double, bool&, double, double, int&); void mcsrch (int, Eigen::VectorXd&, double, Eigen::VectorXd&, const Eigen::VectorXd&, double&, double, double, int, int&, int&, Eigen::VectorXd&); void lbfgs(int, int, double, Eigen::VectorXd, double, double); }; /** *@brief initialize state of current lbfgs iteration with the state of last iteration */ LBFGS::LBFGS(LinCrfLBFGSTransitionState >& state) { w = state.ws; diag = state.diag; x = state.coef; stp1 = state.lbfgs_state(0); ftol = state.lbfgs_state(1); stp = state.lbfgs_state(2); sq = state.lbfgs_state(3); yr = state.lbfgs_state(4); beta = state.lbfgs_state(5); iflag = static_cast(state.lbfgs_state(6)); iter = static_cast(state.lbfgs_state(7)); nfun = static_cast(state.lbfgs_state(8)); point = static_cast(state.lbfgs_state(9)); ispt = static_cast(state.lbfgs_state(10)); iypt = static_cast(state.lbfgs_state(11)); maxfev = static_cast(state.lbfgs_state(12)); info = static_cast(state.lbfgs_state(13)); bound = static_cast(state.lbfgs_state(14)); npt = static_cast(state.lbfgs_state(15)); cp = static_cast(state.lbfgs_state(16)); nfev = static_cast(state.lbfgs_state(17)); inmc = static_cast(state.lbfgs_state(18)); iycn = static_cast(state.lbfgs_state(19)); iscn = static_cast(state.lbfgs_state(20)); infoc = static_cast(state.mcsrch_state(0)); dg = state.mcsrch_state(1); dgm = state.mcsrch_state(2); dginit = state.mcsrch_state(3); dgtest = state.mcsrch_state(4); dgx = state.mcsrch_state(5); dgxm = state.mcsrch_state(6); dgy = state.mcsrch_state(7); dgym = state.mcsrch_state(8); finit = state.mcsrch_state(9); ftest1 = state.mcsrch_state(10); fm = state.mcsrch_state(11); fx = state.mcsrch_state(12); fxm = state.mcsrch_state(13); fy = state.mcsrch_state(14); fym = state.mcsrch_state(15); stx = state.mcsrch_state(16); sty = state.mcsrch_state(17); stmin = state.mcsrch_state(18); stmax = state.mcsrch_state(19); width = state.mcsrch_state(20); width1 = state.mcsrch_state(21); brackt = (state.mcsrch_state(22) == 1.0 ? true: false); stage1 = (state.mcsrch_state(23) == 1.0 ? true: false); finish = (state.mcsrch_state(24) == 1.0 ? true: false); } /** *@brief save current lbfgs state for the next lbfgs iteration */ void LBFGS::save_state(LinCrfLBFGSTransitionState > &state) { state.ws = w ; state.diag = diag ; state.coef = x ; state.lbfgs_state(0) = stp1; state.lbfgs_state(1) = ftol; state.lbfgs_state(2) = stp; state.lbfgs_state(3) = sq; state.lbfgs_state(4) = yr; state.lbfgs_state(5) = beta; state.lbfgs_state(6) = iflag; state.lbfgs_state(7) = iter; state.lbfgs_state(8) = nfun; state.lbfgs_state(9) = point; state.lbfgs_state(10) = ispt; state.lbfgs_state(11) = iypt; state.lbfgs_state(12) = maxfev; state.lbfgs_state(13) = info; state.lbfgs_state(14) = bound; state.lbfgs_state(15) = npt; state.lbfgs_state(16) = cp; state.lbfgs_state(17) = nfev; state.lbfgs_state(18) = inmc; state.lbfgs_state(19) = iycn; state.lbfgs_state(20) = iscn; state.mcsrch_state(0) = infoc; state.mcsrch_state(1) = dg; state.mcsrch_state(2) = dgm; state.mcsrch_state(3) = dginit; state.mcsrch_state(4) = dgtest; state.mcsrch_state(5) = dgx; state.mcsrch_state(6) = dgxm; state.mcsrch_state(7) = dgy; state.mcsrch_state(8) = dgym; state.mcsrch_state(9) = finit; state.mcsrch_state(10) = ftest1; state.mcsrch_state(11) = fm; state.mcsrch_state(12) = fx; state.mcsrch_state(13) = fxm; state.mcsrch_state(14) = fy; state.mcsrch_state(15) = fym; state.mcsrch_state(16) = stx; state.mcsrch_state(17) = sty; state.mcsrch_state(18) = stmin; state.mcsrch_state(19) = stmax; state.mcsrch_state(20) = width; state.mcsrch_state(21) = width1; state.mcsrch_state(22) = (brackt == true ? 1.0 : 0.0); state.mcsrch_state(23) = (stage1 == true ? 1.0 : 0.0); state.mcsrch_state(24) = (finish == true ? 1.0 : 0.0); } void LBFGS::mcstep(double& stx, double& fx, double& dx, double& sty, double& fy, double& dy, double& stp, double fp, double dp, bool& brackt, double stmin, double stmax, int& info) { bool bound; double gamma, p, q, r, sgnd, stpc, stpf, stpq, theta, s; info = 0; if ((brackt && ((stp <= std::min(stx, sty)) || (stp >= std::max(stx, sty)))) || (dx * (stp - stx) >= 0) || (stmax < stmin)) { return; } sgnd = dp*(dx/fabs(dx)); if (fp > fx) { info = 1; bound = true; theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp; s = std::max(fabs(theta), std::max(fabs(dx), fabs(dp))); gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s)); if (stp < stx) { gamma = -gamma; } p = gamma - dx + theta; q = gamma - dx + gamma + dp; r = p / q; stpc = stx + r * (stp - stx); stpq = stx + dx/((fx - fp)/(stp - stx) + dx)/2 * (stp - stx); if (fabs(stpc - stx) < fabs(stpq - stx)) { stpf = stpc; } else { stpf = stpc + (stpq - stpc)/2; } brackt = true; } else if (sgnd < 0.0) { info = 2; bound = false; theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp; s = std::max(fabs(theta), std::max(fabs(dx), fabs(dp))); gamma = s * sqrt((theta / s) * (theta / s) - (dx / s) * (dp / s)); if (stp > stx) { gamma = -gamma; } p = gamma - dp + theta; q = gamma - dp + gamma + dx; r = p / q; stpc = stp + r * (stx - stp); stpq = stp + dp / (dp - dx) * (stx - stp); stpf = (fabs(stpc - stp) > fabs(stpq - stp)) ? stpc : stpq; brackt = true; } else if (fabs(dp) < fabs(dx)) { info = 3; bound = true; theta = 3.0 * (fx - fp) / (stp - stx) + dx + dp; s = std::max(fabs(theta), std::max(fabs(dx), fabs(dp))); gamma = s * sqrt(std::max(0.0, (theta/s)*(theta/s) - (dx/s)*(dp/s))); if (stp > stx) { gamma = -gamma; } p = gamma - dp + theta; q = gamma + (dx - dp) + gamma; r = p / q; if ((r < 0.0) && (gamma != 0.0)) { stpc = stp + r * (stx - stp); } else { stpc = (stp > stx) ? stmax : stmin; } stpq = stp + dp / (dp - dx) * (stx - stp); if (brackt) { stpf = (fabs(stp - stpc) < fabs(stp - stpq)) ? stpc : stpq; } else { stpf = (fabs(stp - stpc) > fabs(stp - stpq)) ? stpc : stpq; } } else { info = 4; bound = false; if (brackt) { theta = 3.0 * (fp - fy) / (sty - stp) + dy + dp; s = std::max(fabs(theta), std::max(fabs(dy), fabs(dp))); gamma = s * sqrt((theta/s)*(theta/s) - (dy/s)*(dp/s)); if (stp > sty) { gamma = -gamma; } p = gamma - dp + theta; q = gamma - dp + gamma + dy; r = p / q; stpc = stp + r * (sty - stp); stpf = stpc; } else { stpf = (stp > stx) ? stmax : stmin; } } if (fp > fx) { sty = stp; fy = fp; dy = dp; } else { if (sgnd < 0.0) { sty = stx; fy = fx; dy = dx; } stx = stp; fx = fp; dx = dp; } stp = std::max(stmin, std::min(stmax, stpf)); if (brackt && bound) { if (sty > stx) { stp = std::min(stx + 0.66*(sty - stx), stp); } else { stp = std::max(stx + 0.66*(sty - stx), stp); } } return; } void LBFGS::mcsrch(int n, Eigen::VectorXd& x, double f, Eigen::VectorXd& g, const Eigen::VectorXd& s, double& stp, double ftol, double xtol, int maxfev, int& info, int& nfev, Eigen::VectorXd& wa) { double stpmin = 1e-20; double stpmax = 1e20; double p5 = 0.5; double p66 = 0.66; double xtrapf = 4.0; double gtol = 0.9; if(info != -1) { infoc = 1; if (n <= 0 || stp <= 0 || ftol < 0 || gtol < 0 || xtol < 0 || stpmin < 0 || stpmax < stpmin || maxfev <= 0 ) return; dginit = g.dot(s); if (dginit >= 0.0) { std::cout << "The search direction is not a descent direction." << std::endl; return; } brackt = false; stage1 = true; nfev = 0; finit = f; dgtest = ftol * dginit; width = stpmax - stpmin; width1 = width/p5; wa = x; stx = 0.0; fx = finit; dgx = dginit; sty = 0.0; fy = finit; dgy = dginit; } while(true) { if(info != -1) { if (brackt) { if (stx < sty) { stmin = stx; stmax = sty; } else { stmin = sty; stmax = stx; } } else { stmin = stx; stmax = stp + xtrapf * (stp - stx); } stp = std::max(stpmin, std::min(stpmax, stp)); if ((brackt && ((stp <= stmin) || (stp >= stmax))) || (nfev == maxfev - 1) || (!infoc) || (brackt && ((stmax - stmin) <= xtol * stmax))) { stp = stx; } x = wa + stp * s; info = -1; return; } info = 0; nfev= nfev + 1; dg = g.dot(s); ftest1 = finit + stp * dgtest; if ((brackt && ((stp <= stmin) || (stp >= stmax))) || (!infoc)) { info = 6; } if ((stp == stpmax) && (f <= ftest1) && (dg <= dgtest)) { info = 5; } if ((stp == stpmin) && ((f >= ftest1) || (dg >= dgtest))) { info = 4; } if (nfev >= maxfev) { info = 3; } if (brackt && (stmax - stmin <= xtol * stmax)) { info = 2; } if ((f <= ftest1) && (fabs(dg) <= -gtol * dginit)) { info = 1; } if (info !=0 ) return ; if ( stage1 && f <= ftest1 && dg >= std::min(ftol , gtol) * dginit ) stage1 = false; if (stage1 && f <= fx && f > ftest1) { fm = f - stp * dgtest; fxm = fx - stx * dgtest; fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; mcstep(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, brackt, stmin, stmax, infoc); fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { mcstep(stx, fx, dgx, sty, fy, dgy, stp, f, dg, brackt, stmin, stmax, infoc); } if (brackt) { if (fabs(sty - stx) >= p66 * width1) { stp = stx + p5 * (sty - stx); } width1 = width; width = fabs(sty - stx); } } } void LBFGS::lbfgs(int n, int m, double f, Eigen::VectorXd g, double eps, double xtol) { bool execute_entire_while_loop = false; if(iflag == 0) { iter=0; if ( n <= 0 || m <= 0 ) { iflag= -3; } nfun= 1; point = 0; finish = false; ispt = n + 2*m; iypt = ispt + n*m; npt = 0; w.segment(ispt, n) = (-g).cwiseProduct(diag); stp1 = 1.0 / g.norm(); ftol= 0.0001; maxfev= 20; execute_entire_while_loop = true; } while(true) { if(execute_entire_while_loop) { iter++; info = 0; bound=iter-1; if (iter!=1) { if (iter > m) bound = m; double ys = w.segment(iypt + npt, n).dot(w.segment(ispt + npt, n)); double yy = w.segment(iypt + npt, n).squaredNorm(); diag.setConstant(ys / yy); cp = point; if (point ==0 ) cp =m; w[n + cp-1] = 1.0 / ys; w.head(n) = -g; cp = point; for (int i = 0; i < bound; i++) { cp -= 1; if (cp == -1) { cp = m - 1; } sq = w.segment(ispt + cp *n,n).dot(w.head(n)); inmc = n + m + cp; iycn = iypt + cp * n; w[inmc] = sq * w[n + cp]; w.head(n) -= w[inmc] * w.segment(iycn, n); } w.head(n)=w.head(n).cwiseProduct(diag); for (int i = 0; i < bound; i++) { yr = w.segment(iypt + cp * n, n).dot(w.head(n)); inmc = n + m + cp; beta = w[inmc] - w[n + cp] * yr; iscn = ispt + cp * n; w.head(n) += beta * w.segment(iscn, n); cp += 1; if (cp == m) { cp = 0; } } w.segment(ispt + point * n, n) = w.head(n); } nfev = 0; stp = (iter == 1) ? stp1 : 1.0; w.head(n) = g; } mcsrch(n, x, f, g, w.segment(ispt + point * n, n), stp, ftol, xtol, maxfev, info, nfev, diag); if(info == -1) { iflag = 1; return; } else { iflag = -1; } nfun = nfun + nfev; npt = point * n; w.segment(ispt + npt,n) *=stp; w.segment(iypt + npt,n) =g - w.head(n); point = point + 1; if (point == m) { point = 0; } if(g.norm()/std::max(1.0,x.norm())<=eps) { finish = true; } if (finish) { iflag = 0; return; } execute_entire_while_loop = true; } } /** *@brief compute exponential of Mi and Vi */ void compute_exp_Mi(int num_labels, Eigen::MatrixXd &Mi, Eigen::VectorXd &Vi) { // take exponential operator for (int m = 0; m < num_labels; m++) { Vi(m) = std::exp(Vi(m)); for (int n = 0; n < num_labels; n++) { Mi(m, n) = std::exp(Mi(m, n)); } } } Eigen::VectorXd mult(Eigen::MatrixXd Mi, Eigen::VectorXd Vi, bool trans, int num_label) { int i=0,j=0,r=0,c=0; Eigen::VectorXd z(num_label); z.fill(0); for (i = 0; i < num_label; i++ ) { for (j=0; j< num_label; j++) { r= i; c=j; if(trans) { r= j; c=i; } z(r)+=Mi(i,j)*Vi(c); } } return z; } /** *@brief compute loglikelihood and gradient using forward-backward algorithm */ void validate_label(int label_id, int num_labels) { if ((label_id < 0) || (label_id >= num_labels)) throw std::runtime_error("Out of bound label ids found in feature table."); } void compute_logli_gradient(LinCrfLBFGSTransitionState >& state, MappedColumnVector& sparse_r, MappedColumnVector& dense_m, MappedColumnVector& sparse_m) { int r_size = static_cast(sparse_r.size()); int sparse_m_size = static_cast(sparse_m.size()); int seq_len = static_cast(sparse_r(r_size-2)) + 1; Eigen::MatrixXd betas(static_cast(state.num_labels), seq_len); Eigen::VectorXd scale(seq_len); Eigen::MatrixXd Mi(static_cast(state.num_labels), static_cast(state.num_labels)); Eigen::VectorXd Vi(state.num_labels); Eigen::VectorXd alpha(state.num_labels); Eigen::VectorXd next_alpha(state.num_labels); Eigen::VectorXd temp(state.num_labels); Eigen::VectorXd ExpF(state.num_features); betas.fill(0); scale.fill(0); alpha.fill(1); next_alpha.fill(0); temp.fill(0); ExpF.fill(0); // compute beta values in a backward fashion // also scale beta-values to 1 to avoid numerical problems scale(seq_len - 1) = state.num_labels; betas.col(seq_len - 1).fill(1.0 / scale(seq_len - 1)); int index = r_size-1; for (int i = seq_len - 1; i > 0; i--) { Mi.fill(0); Vi.fill(0); // examine all features at position "pos" //(prev_labe, curr_label, f_index, start_pos, exist) while (index-4>=0 && sparse_r(index-1) == i) { int curr_label = static_cast(sparse_r(index-3)); validate_label(curr_label, state.num_labels); int f_index = static_cast(sparse_r(index-2)); Vi(curr_label) += state.coef(f_index); index-=5; } //(f_index, prev_label, curr_label) for(int n=0; n+2(sparse_m(n+1)); int curr_label = static_cast(sparse_m(n+2)); validate_label(prev_label, state.num_labels); validate_label(curr_label, state.num_labels); Mi(prev_label, curr_label) += state.coef(static_cast(sparse_m(n))); } compute_exp_Mi(state.num_labels, Mi, Vi); temp=betas.col(i); temp=temp.cwiseProduct(Vi); betas.col(i -1) = mult(Mi,temp,false,state.num_labels); // scale for the next (backward) beta values scale(i - 1)=betas.col(i-1).sum(); betas.col(i - 1)*=(1.0 / scale(i - 1)); } // end of beta values computation index = 0; // start to compute the log-likelihood of the current sequence for (int j = 0; j < seq_len; j++) { Mi.fill(0); Vi.fill(0); // examine all features at position "pos" int ori_index = index; while (((index+4) <= (r_size-1)) && sparse_r(index+3) == j) { int curr_label = static_cast(sparse_r(index+1)); int f_index = static_cast(sparse_r(index+2)); Vi(curr_label) += state.coef(f_index); index+=5; } if(j>=1) { for(int n=0; n+2(sparse_m(n+1)); int curr_label = static_cast(sparse_m(n+2)); Mi(prev_label, curr_label) += state.coef(static_cast(sparse_m(n))); } } compute_exp_Mi(state.num_labels, Mi, Vi); if(j>0) { temp = alpha; next_alpha=mult(Mi,temp,true,state.num_labels); next_alpha=next_alpha.cwiseProduct(Vi); } else { next_alpha=Vi; } index = ori_index; while (((index+4) <= (r_size-1)) && sparse_r(index+3) == j) { int curr_label = (int)sparse_r(index+1); int f_index = (int)sparse_r(index+2); int exist = (int)sparse_r(index+4); if (exist == 1) { state.grad(f_index) += 1; state.loglikelihood += state.coef(f_index); } ExpF(f_index) += next_alpha(curr_label) * betas(curr_label,j); index+=5; } // Edge feature if(j>=1) { int f_index = (int)dense_m((j-1)*5+2); state.grad(f_index) += 1; state.loglikelihood += state.coef(f_index); //(f_index, prev_label, curr_label) for(int n=0; n+2(sparse_m(n+1)); int curr_label = static_cast(sparse_m(n+2)); ExpF(f_index) += alpha[prev_label] * Vi(curr_label) * Mi(prev_label,curr_label) * betas(curr_label, j); } } alpha = next_alpha; alpha*=(1.0 / scale(j)); } // Zx = sum(alpha_i_n) where i = 1..num_labels, n = seq_len double Zx = alpha.sum(); state.loglikelihood -= std::log(Zx); // re-correct the value of seq_logli because Zx was computed from // scaled alpha values for (int k = 0; k < seq_len; k++) { state.loglikelihood -= std::log(scale(k)); } // update the gradient vector for (size_t k = 0; k < state.num_features; k++) { state.grad(k) -= ExpF(k) / Zx; } } /** * @brief Compute the log likelihood and gradient vector for each tuple */ AnyType lincrf_lbfgs_step_transition::run(AnyType &args) { LinCrfLBFGSTransitionState > state = args[0]; MappedColumnVector sparse_r = args[1].getAs(); MappedColumnVector dense_m = args[2].getAs(); MappedColumnVector sparse_m = args[3].getAs(); if (state.numRows == 0) { state.initialize(*this, static_cast(args[4].getAs()), static_cast(args[5].getAs())); if (!args[6].isNull()) { LinCrfLBFGSTransitionState > previousState = args[6]; state = previousState; state.reset(); } } state.numRows++; compute_logli_gradient(state, sparse_r, dense_m, sparse_m); return state; } /** * @brief Perform the perliminary aggregation function: Merge transition states */ AnyType lincrf_lbfgs_step_merge_states::run(AnyType &args) { LinCrfLBFGSTransitionState > stateLeft = args[0]; LinCrfLBFGSTransitionState > stateRight = args[1]; // We first handle the trivial case where this function is called with one // of the states being the initial state if (stateLeft.numRows == 0) return stateRight; else if (stateRight.numRows == 0) return stateLeft; // Merge states together and return stateLeft += stateRight; return stateLeft; } /** * @brief Perform the licrf_lbfgs final step */ AnyType lincrf_lbfgs_step_final::run(AnyType &args) { // We request a mutable object. Depending on the backend, this might perform // a deep copy. LinCrfLBFGSTransitionState > state = args[0]; // Aggregates that haven't seen any data just return Null. if (state.numRows == 0) return Null(); // To avoid overfitting, penalize the likelihood with a spherical Gaussian // weight prior double sigma_square = 100; state.loglikelihood -= (state.coef.dot(state.coef)/(2 * sigma_square)); state.grad -= state.coef/sigma_square; // the lbfgs minimize function, we want to maximize loglikelihood state.loglikelihood = state.loglikelihood * -1; state.grad = -state.grad; double eps = 0.001; //accuracy of the solution to be found double xtol = 1.0e-16; //an estimate of the machine precision assert((state.m > 0) && (state.m <= state.num_features) && (eps >= 0.0)); LBFGS instance(state);// initialize the lbfgs with state of last iteration instance.lbfgs(state.num_features, state.m, state.loglikelihood, state.grad, eps, xtol);// lbfgs optimization instance.save_state(state);//save current state for the next iteration of lbfgs switch(instance.iflag) { case -1: throw std::logic_error("The line search rountine mcsch failed"); break; case -2: throw std::logic_error("The i-th diagonal element of the diagonal inverse Hessian" "approximation, given in DIAG, is not positive"); break; case -3: throw std::logic_error("Improper input parameters for LBFGS n or m are not positive"); break; } if(!state.coef.is_finite()) throw NoSolutionFoundException("Over- or underflow in " "L-BFGS step, while updating coefficients. Input data " "is likely of poor numerical condition."); state.iteration++; return state; } /** * @brief Return iflag which indicates whether L-BFGS converge or not */ AnyType internal_lincrf_lbfgs_converge::run(AnyType &args) { LinCrfLBFGSTransitionState > state = args[0]; return state.lbfgs_state(6); } /** * @brief Return the coefficients and diagnostic statistics of the state */ AnyType internal_lincrf_lbfgs_result::run(AnyType &args) { LinCrfLBFGSTransitionState > state = args[0]; return stateToResult(*this, state.coef, state.loglikelihood, state.iteration); } /** * @brief Return the results for the L-BFGS method. */ AnyType stateToResult( const Allocator &inAllocator, const HandleMap > &incoef, double loglikelihood, const uint32_t &iterations) { // FIXME: We currently need to copy the coefficient to a native array // This should be transparent to user code MutableNativeColumnVector coef( inAllocator.allocateArray(incoef.size())); coef = incoef; // Return all coefficients, loglikelihood in a tuple AnyType tuple; tuple << coef << loglikelihood << iterations; return tuple; } } // namespace crf } // namespace modules } // namespace madlib