/* ----------------------------------------------------------------------- *//** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * * @file logistic.sql_in * * @brief SQL functions for logistic regression * @date January 2011 * * @sa For a brief introduction to logistic regression, see the * module description \ref grp_logreg. * *//* ----------------------------------------------------------------------- */ m4_include(`SQLCommon.m4') DROP TYPE IF EXISTS MADLIB_SCHEMA.__logregr_simple_result CASCADE; CREATE TYPE MADLIB_SCHEMA.__logregr_simple_result AS ( coef DOUBLE PRECISION[], log_likelihood DOUBLE PRECISION, std_err DOUBLE PRECISION[], z_stats DOUBLE PRECISION[], p_values DOUBLE PRECISION[], odds_ratios DOUBLE PRECISION[], vcov DOUBLE PRECISION[], condition_no DOUBLE PRECISION, status INTEGER, num_processed BIGINT, num_iterations INTEGER ); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__logregr_simple_step_transition( DOUBLE PRECISION[], BOOLEAN, /*+ y */ DOUBLE PRECISION[], /*+ x */ DOUBLE PRECISION[]) /*+ previous state */ RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'logregr_simple_step_transition' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__logregr_simple_step_merge_states( state1 DOUBLE PRECISION[], state2 DOUBLE PRECISION[]) RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'logregr_simple_step_merge_states' LANGUAGE C IMMUTABLE STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__logregr_simple_step_final( state DOUBLE PRECISION[]) RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'logregr_simple_step_final' LANGUAGE C IMMUTABLE STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------ /** * @internal * @brief Perform one iteration of the conjugate-gradient method for computing * logistic regression */ DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__logregr_simple_step( BOOLEAN, DOUBLE PRECISION[], DOUBLE PRECISION[]); CREATE AGGREGATE MADLIB_SCHEMA.__logregr_simple_step( /*+ y */ BOOLEAN, /*+ x */ DOUBLE PRECISION[], /*+ previous_state */ DOUBLE PRECISION[]) ( STYPE=DOUBLE PRECISION[], SFUNC=MADLIB_SCHEMA.__logregr_simple_step_transition, m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.__logregr_simple_step_merge_states,') FINALFUNC=MADLIB_SCHEMA.__logregr_simple_step_final, INITCOND='{0,0,0,0,0,0}' ); ------------------------------------------------------------------------ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__logregr_simple_step_distance( /*+ state1 */ DOUBLE PRECISION[], /*+ state2 */ DOUBLE PRECISION[]) RETURNS DOUBLE PRECISION AS 'MODULE_PATHNAME', 'internal_logregr_simple_step_distance' LANGUAGE c IMMUTABLE STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__logregr_simple_finalizer( /*+ state */ DOUBLE PRECISION[]) RETURNS MADLIB_SCHEMA.__logregr_simple_result AS 'MODULE_PATHNAME', 'internal_logregr_simple_result' LANGUAGE c IMMUTABLE STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------ /** * @brief Compute logistic-regression coefficients and diagnostic statistics * * To include an intercept in the model, set one coordinate in the * independentVariables array to 1. * * @param source_table Name of the source relation containing the training data * @param out_table Name of the output relation to store the model results * * Columns of the output relation are as follows: * - coef FLOAT8[] - Array of coefficients, \f$ \boldsymbol c \f$ * - log_likelihood FLOAT8 - Log-likelihood \f$ l(\boldsymbol c) \f$ * - std_err FLOAT8[] - Array of standard errors, * \f$ \mathit{se}(c_1), \dots, \mathit{se}(c_k) \f$ * - z_stats FLOAT8[] - Array of Wald z-statistics, \f$ \boldsymbol z \f$ * - p_values FLOAT8[] - Array of Wald p-values, \f$ \boldsymbol p \f$ * - odds_ratios FLOAT8[]: Array of odds ratios, * \f$ \mathit{odds}(c_1), \dots, \mathit{odds}(c_k) \f$ * - condition_no FLOAT8 - The condition number of * matrix \f$ X^T A X \f$ during the iteration * immediately preceding convergence * (i.e., \f$ A \f$ is computed using the coefficients * of the previous iteration) * @param dependent_varname Name of the dependent column (of type BOOLEAN) * @param independent_varname Name of the independent column (of type DOUBLE * PRECISION[]) * @param max_iter The maximum number of iterations * @param tolerance The difference between log-likelihood values in successive * iterations that should indicate convergence. This value should be * non-negative and a zero value here disables the convergence criterion, * and execution will only stop after \c maxNumIterations iterations. * @param verbose If true, any error or warning message will be printed to the * console (irrespective of the 'client_min_messages' set by server). * If false, no error/warning message is printed to console. * * * @usage * - Get vector of coefficients \f$ \boldsymbol c \f$ and all diagnostic * statistics:\n *
SELECT logregr_train('sourceName', 'outName'
 *           'dependentVariable', 'independentVariables');
 *          SELECT * from outName;
 *    
* - Get vector of coefficients \f$ \boldsymbol c \f$:\n *
SELECT coef from outName;
* - Get a subset of the output columns, e.g., only the array of coefficients * \f$ \boldsymbol c \f$, the log-likelihood of determination * \f$ l(\boldsymbol c) \f$, and the array of p-values \f$ \boldsymbol p \f$: *
SELECT coef, log_likelihood, p_values FROM outName;
* * @note This function starts an iterative algorithm. It is not an aggregate * function. Source, output, and column names have to be passed as strings * (due to limitations of the SQL syntax). * * @internal * @sa This function is a wrapper for logistic::compute_logregr(), which * sets the default values. */ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.logregr_simple_train ( source_table VARCHAR, out_table VARCHAR, dependent_varname VARCHAR, independent_varname VARCHAR, max_iter INTEGER, tolerance DOUBLE PRECISION, verbose BOOLEAN ) RETURNS VOID AS $$ PythonFunction(hello_world, simple_logistic, logregr_simple_train) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); ------------------------------------------------------------------------ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.logregr_simple_train ( source_table VARCHAR, out_table VARCHAR, dependent_varname VARCHAR, independent_varname VARCHAR) RETURNS VOID AS $$ SELECT MADLIB_SCHEMA.logregr_simple_train($1, $2, $3, $4, 20, 0.0001, False); $$ LANGUAGE sql VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); ------------------------------------------------------------------------ -- Help messages ------------------------------------------------------- CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.logregr_simple_train( message TEXT ) RETURNS TEXT AS $$ PythonFunction(hello_world, simple_logistic, logregr_simple_help_msg) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.logregr_simple_train() RETURNS TEXT AS $$ SELECT MADLIB_SCHEMA.logregr_simple_train(NULL::TEXT); $$ LANGUAGE SQL IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); -------------------------------------------------------------