Datum vec_to_var_samp_transfn(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(vec_to_var_samp_transfn); /** * Returns an of n elements, * which each element is the sample variance value found in that position * from all input arrays. * * We use the sample approach as the built in VAR_SAMP aggregate. * See float8_var_samp in backend/utils/adt/float.c for details. * Our transition function records N, sum(X), and sum(X^2) for each array element, * and then the final function uses those to compute the sample variance, * based on: * * s^2 = \frac{ \sum{}X^2 - \frac{(\sum{}X)^2}{N} }{N - 1} * * aka * Σ X^2 - ((Σ X)^2)/N * s^2 = ------------------- * N - 1 * * Using this formula rather than the more common one * lets us keep just 3 numbers (for each array position) as we go, * whereas to find X - mean(X) for each X * we'd have to keep all the values around until we knew the mean. * * This can over/underflow, but we report an error if that happens, * and it's no worse than the built-in aggregate. * * by Paul A. Jungwirth */ Datum vec_to_var_samp_transfn(PG_FUNCTION_ARGS) { Oid elemTypeId; int16 elemTypeWidth; bool elemTypeByValue; char elemTypeAlignmentCode; int currentLength; MemoryContext aggContext; VecArrayBuildState *state = NULL; ArrayType *currentArray; int arrayLength; Datum *currentVals; bool *currentNulls; int i; float8 tmp_f; if (!AggCheckCallContext(fcinfo, &aggContext)) { elog(ERROR, "vec_to_var_samp_transfn called in non-aggregate context"); } // PG_ARGISNULL tests for SQL NULL, // but after the first pass we can have a // value that is non-SQL-NULL but still is C NULL. if (!PG_ARGISNULL(0)) { state = (VecArrayBuildState *)PG_GETARG_POINTER(0); } if (PG_ARGISNULL(1)) { // just return the current state unchanged (possibly still NULL) PG_RETURN_POINTER(state); } currentArray = PG_GETARG_ARRAYTYPE_P(1); if (state == NULL) { // Since we have our first not-null argument // we can initialize the state to match its length. elemTypeId = ARR_ELEMTYPE(currentArray); if (elemTypeId != INT2OID && elemTypeId != INT4OID && elemTypeId != INT8OID && elemTypeId != FLOAT4OID && elemTypeId != FLOAT8OID) { ereport(ERROR, (errmsg("vec_to_var_samp input must be array of SMALLINT, INTEGER, BIGINT, REAL, or DOUBLE PRECISION"))); } if (ARR_NDIM(currentArray) != 1) { ereport(ERROR, (errmsg("One-dimensional arrays are required"))); } arrayLength = (ARR_DIMS(currentArray))[0]; // Just start with all NULLs and let the comparisons below replace them: state = initVecArrayResultWithNulls(elemTypeId, FLOAT8OID, aggContext, arrayLength); } else { elemTypeId = state->inputElementType; arrayLength = state->state.nelems; } get_typlenbyvalalign(elemTypeId, &elemTypeWidth, &elemTypeByValue, &elemTypeAlignmentCode); deconstruct_array(currentArray, elemTypeId, elemTypeWidth, elemTypeByValue, elemTypeAlignmentCode, ¤tVals, ¤tNulls, ¤tLength); if (currentLength != arrayLength) { ereport(ERROR, (errmsg("All arrays must be the same length, but we got %d vs %d", currentLength, arrayLength))); } for (i = 0; i < arrayLength; i++) { if (currentNulls[i]) { // do nothing: nulls can't change the result. } else if (state->veccounts[i] == 0) { state->veccounts[i] = 1; switch (elemTypeId) { case INT2OID: state->vecvalues[i].f8 = DatumGetInt16(currentVals[i]); break; case INT4OID: state->vecvalues[i].f8 = DatumGetInt32(currentVals[i]); break; case INT8OID: state->vecvalues[i].f8 = DatumGetInt64(currentVals[i]); break; case FLOAT4OID: state->vecvalues[i].f8 = DatumGetFloat4(currentVals[i]); break; case FLOAT8OID: state->vecvalues[i].f8 = DatumGetFloat8(currentVals[i]); break; default: elog(ERROR, "Unknown elemTypeId!"); } state->vectmpvalues[i].f8 = state->vecvalues[i].f8 * state->vecvalues[i].f8; } else { state->veccounts[i] += 1; switch (elemTypeId) { case INT2OID: tmp_f = DatumGetInt16(currentVals[i]); break; case INT4OID: tmp_f = DatumGetInt32(currentVals[i]); break; case INT8OID: tmp_f = DatumGetInt64(currentVals[i]); break; case FLOAT4OID: tmp_f = DatumGetFloat4(currentVals[i]); break; case FLOAT8OID: tmp_f = DatumGetFloat8(currentVals[i]); break; default: elog(ERROR, "Unknown elemTypeId!"); } state->vecvalues[i].f8 += tmp_f; state->vectmpvalues[i].f8 += tmp_f * tmp_f; } } PG_RETURN_POINTER(state); } Datum vec_to_var_samp_finalfn(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(vec_to_var_samp_finalfn); Datum vec_to_var_samp_finalfn(PG_FUNCTION_ARGS) { Datum result; VecArrayBuildState *state; int dims[1]; int lbs[1]; int i; float8 numerator; Assert(AggCheckCallContext(fcinfo, NULL)); state = PG_ARGISNULL(0) ? NULL : (VecArrayBuildState *)PG_GETARG_POINTER(0); if (state == NULL) PG_RETURN_NULL(); // Convert from our pgnums to Datums: for (i = 0; i < state->state.nelems; i++) { // Sample variance is undefined if N is 0 *or* 1: if (state->veccounts[i] > 1) { state->state.dnulls[i] = false; numerator = (float8)state->veccounts[i] * state->vectmpvalues[i].f8 - state->vecvalues[i].f8 * state->vecvalues[i].f8; CHECKFLOATVAL(numerator, isinf(state->vectmpvalues[i].f8) || isinf(state->vecvalues[i].f8), true); /* Watch out for roundoff error producing a negative numerator */ if (numerator <= 0.0) { state->state.dvalues[i] = Float8GetDatum(0.0); } else { state->state.dvalues[i] = Float8GetDatum(numerator / (float8)(state->veccounts[i] * (state->veccounts[i] - 1.0))); } } } dims[0] = state->state.nelems; lbs[0] = 1; result = makeMdArrayResult(&state->state, 1, dims, lbs, CurrentMemoryContext, false); PG_RETURN_DATUM(result); }