/* ----------------------------------------------------------------------- *//** * * @file svec_util.sql_in * * @brief SQL utility functions for sparse vector data type * svec * * @sa For an introduction to the sparse vector implementation, see the module * description \ref grp_svec. * *//* ----------------------------------------------------------------------- */ m4_include(`SQLCommon.m4') --! Creates sparse vector representation given an array of indexes, respective values and --! size of the required vector. The function just does the bucket filling with the values --! at the respective indexes for the vector given the length. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gen_svec(BIGINT[], FLOAT8[], BIGINT) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'generate_sparse_vector' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); --! Creates the output table containing the sparse vector representation for the documents --! given the dictionary table, documents tables and names of the respective columns. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gen_doc_svecs( output_tbl TEXT, dictionary_tbl TEXT, dict_id_col TEXT, dict_term_col TEXT, documents_tbl TEXT, doc_id_col TEXT, doc_term_col TEXT, doc_term_info_col TEXT ) RETURNS TEXT AS $$ PythonFunction(svec_util, generate_svec, generate_doc_svecs) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); --! Helper function for MADLIB_SCHEMA.gen_doc_svec UDF. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gen_doc_svecs() RETURNS TEXT AS $$ PythonFunction(svec_util, generate_svec, generate_doc_svecs_help) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); --! Basic floating point scalar operator: MIN. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmin(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_min' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Basic floating point scalar operator: MAX. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmax(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_max' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Counts the number of non-zero entries in the input vector; the second argument is capped at 1, then added to the first; used as the sfunc in the svec_count_nonzero() aggregate below. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_count(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_count' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the logarithm of each element of the input SVEC. CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_log(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_log' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Returns true if two SVECs are equal, not counting zeros (zero equals anything). If the two SVECs are of different size, then the function essentially zero-pads the shorter one and performs the comparison. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq_non_zero(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq_non_zero' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Returns true if left svec contains right one, meaning that every non-zero value in the right svec equals left one --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_contains(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_contains' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the l2norm of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l2norm' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the l2norm of a float8 array. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l2norm' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the l2norm distance between two SVECs. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l2norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l2norm' LANGUAGE C STRICT IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the l1norm distance between two SVECs. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l1norm' LANGUAGE C STRICT IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the l1norm of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l1norm' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the l1norm of a float8 array. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l1norm' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the angle between two SVECs in radians. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.angle(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_angle' LANGUAGE C STRICT IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the Tanimoto distance between two SVECs. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.tanimoto_distance(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_tanimoto_distance' LANGUAGE C STRICT IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Unnests an SVEC into a table of uncompressed values --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_unnest(MADLIB_SCHEMA.svec) RETURNS setof float8 AS 'MODULE_PATHNAME', 'svec_unnest' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Appends an element to the back of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pivot(MADLIB_SCHEMA.svec,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pivot' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Sums the elements of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_summate' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Sums the elements of a float8 array. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_summate' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the median element of a float8 array. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_median' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the median element of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_median' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Compares an SVEC to a float8, and returns positions of all elements not equal to the float as an array. Element index here starts at 0. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_positions(MADLIB_SCHEMA.svec, FLOAT8) RETURNS INT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_positions' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Compares an SVEC to a float8, and returns values of all elements not equal to the float as an array. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_values(MADLIB_SCHEMA.svec, FLOAT8) RETURNS FLOAT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_values' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Returns the dimension of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dimension(MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_dimension' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Applies a given function to each element of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_lapply(text,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_lapply' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Appends a run-length block to the back of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_append(MADLIB_SCHEMA.svec,float8,int8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_append' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Projects onto an element of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_proj(MADLIB_SCHEMA.svec,int4) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_proj' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Extracts a subvector of an SVEC given the subvector's start and end indices. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_subvec(MADLIB_SCHEMA.svec,int4,int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_subvec' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Reverses the elements of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_reverse' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Replaces the subvector of a given SVEC at a given start index with another SVEC. Note that element index should start at 1. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_change(MADLIB_SCHEMA.svec,int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_change' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the hash of an SVEC. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_hash(MADLIB_SCHEMA.svec) RETURNS int4 AS 'MODULE_PATHNAME', 'svec_hash' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Computes the word-occurence vector of a document --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sfv(text[], text[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'gp_extract_feature_histogram' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Sorts an array of texts. This function should be in MADlib common. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sort(text[]) RETURNS text[] AS $$ SELECT array(SELECT unnest($1::text[]) ORDER BY 1); $$ LANGUAGE SQL m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `'); --! Converts an svec to a text string --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_to_string(MADLIB_SCHEMA.svec) RETURNS text AS 'MODULE_PATHNAME', 'svec_to_string' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Converts a text string to an svec --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_from_string(text) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_from_string' STRICT LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Transition function for mean(svec) aggregate --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_transition( FLOAT[], MADLIB_SCHEMA.svec) RETURNS FLOAT[] AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Preliminary merge function for mean(svec) aggregate --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_prefunc( FLOAT[], FLOAT[]) RETURNS FLOAT[] AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Final function for mean(svec) aggregate --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_final( FLOAT[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); --! Aggregate that computes the element-wise mean of a list of vectors. --! DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.mean( MADLIB_SCHEMA.svec) CASCADE; CREATE AGGREGATE MADLIB_SCHEMA.mean( MADLIB_SCHEMA.svec) ( SFUNC = MADLIB_SCHEMA.svec_mean_transition, m4_ifdef(`__POSTGRESQL__', `', `prefunc = MADLIB_SCHEMA.svec_mean_prefunc,') FINALFUNC = MADLIB_SCHEMA.svec_mean_final, STYPE = FLOAT[] ); --! Aggregate that provides the element-wise sum of a list of vectors. --! -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_sum(MADLIB_SCHEMA.svec); DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_sum (MADLIB_SCHEMA.svec) CASCADE; CREATE AGGREGATE MADLIB_SCHEMA.svec_sum (MADLIB_SCHEMA.svec) ( SFUNC = MADLIB_SCHEMA.svec_plus, m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.svec_plus,') INITCOND = '{1}:{0.}', -- Zero STYPE = MADLIB_SCHEMA.svec ); --! Aggregate that provides a tally of nonzero entries in a list of vectors. --! -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_count_nonzero(MADLIB_SCHEMA.svec); DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_count_nonzero (MADLIB_SCHEMA.svec) CASCADE; CREATE AGGREGATE MADLIB_SCHEMA.svec_count_nonzero (MADLIB_SCHEMA.svec) ( SFUNC = MADLIB_SCHEMA.svec_count, m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.svec_plus,') INITCOND = '{1}:{0.}', -- Zero STYPE = MADLIB_SCHEMA.svec ); --! Aggregate that turns a list of float8 values into an SVEC. --! DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_agg (float8) CASCADE; CREATE m4_ifdef(`__POSTGRESQL__', `', `ORDERED') AGGREGATE MADLIB_SCHEMA.svec_agg (float8) ( SFUNC = MADLIB_SCHEMA.svec_pivot, STYPE = MADLIB_SCHEMA.svec ); --! Aggregate that computes the median element of a list of float8 values. --! -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_median_inmemory(float8); DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_median_inmemory (float8) CASCADE; CREATE AGGREGATE MADLIB_SCHEMA.svec_median_inmemory (float8) ( SFUNC = MADLIB_SCHEMA.svec_pivot, m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.svec_concat,') FINALFUNC = MADLIB_SCHEMA.svec_median, STYPE = MADLIB_SCHEMA.svec ); --! Normalizes an SVEC that is divides all elements by its norm/magnitude. --! CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalize(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_normalize' LANGUAGE C IMMUTABLE STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');