/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #include "aod_sketch_c_adapter.h" #include "allocator.h" #include "postgres_h_substitute.h" #include "kll_float_sketch_c_adapter.h" #include #include #include #include #include #include #include #include #include using update_aod_sketch_pg = datasketches::update_array_of_doubles_sketch_alloc>; using compact_aod_sketch_pg = datasketches::compact_array_of_doubles_sketch_alloc>; using aod_union_pg = datasketches::array_of_doubles_union_alloc>; // using the union policy in the intersection since this is how it is done in Druid using aod_intersection_pg = datasketches::array_of_doubles_intersection>, palloc_allocator>; using aod_a_not_b_pg = datasketches::array_of_doubles_a_not_b_alloc>; std::ostream& operator<<(std::ostream& os, const datasketches::aod>& v) { os << "("; for (size_t i = 0; i < v.size(); ++i) { if (i != 0) os << ", "; os << v[i]; } os << ")"; return os; } void* aod_sketch_new(unsigned num_values) { try { return new (palloc(sizeof(update_aod_sketch_pg))) update_aod_sketch_pg(update_aod_sketch_pg::builder(num_values).build()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_sketch_new_lgk(unsigned num_values, unsigned lg_k) { try { return new (palloc(sizeof(update_aod_sketch_pg))) update_aod_sketch_pg(update_aod_sketch_pg::builder(num_values).set_lg_k(lg_k).build()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_sketch_new_lgk_p(unsigned num_values, unsigned lg_k, float p) { try { return new (palloc(sizeof(update_aod_sketch_pg))) update_aod_sketch_pg(update_aod_sketch_pg::builder(num_values).set_lg_k(lg_k).set_p(p).build()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void update_aod_sketch_delete(void* sketchptr) { try { static_cast(sketchptr)->~update_aod_sketch_pg(); pfree(sketchptr); } catch (std::exception& e) { pg_error(e.what()); } } void compact_aod_sketch_delete(void* sketchptr) { try { static_cast(sketchptr)->~compact_aod_sketch_pg(); pfree(sketchptr); } catch (std::exception& e) { pg_error(e.what()); } } void aod_sketch_update(void* sketchptr, const void* data, unsigned length, const double* values) { try { static_cast(sketchptr)->update(data, length, values); } catch (std::exception& e) { pg_error(e.what()); } } void* aod_sketch_compact(void* sketchptr) { try { auto newptr = new (palloc(sizeof(compact_aod_sketch_pg))) compact_aod_sketch_pg(static_cast(sketchptr)->compact()); static_cast(sketchptr)->~update_aod_sketch_pg(); pfree(sketchptr); return newptr; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } double update_aod_sketch_get_estimate(const void* sketchptr) { try { return static_cast(sketchptr)->get_estimate(); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } double compact_aod_sketch_get_estimate(const void* sketchptr) { try { return static_cast(sketchptr)->get_estimate(); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } Datum* aod_sketch_get_estimate_and_bounds(const void* sketchptr, unsigned num_std_devs) { try { Datum* est_and_bounds = (Datum*) palloc(sizeof(Datum) * 3); est_and_bounds[0] = pg_float8_get_datum(static_cast(sketchptr)->get_estimate()); est_and_bounds[1] = pg_float8_get_datum(static_cast(sketchptr)->get_lower_bound(num_std_devs)); est_and_bounds[2] = pg_float8_get_datum(static_cast(sketchptr)->get_upper_bound(num_std_devs)); return est_and_bounds; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } char* aod_sketch_to_string(const void* sketchptr, bool print_entries) { try { auto str = static_cast(sketchptr)->to_string(print_entries); const size_t len = str.length() + 1; char* buffer = (char*) palloc(len); strncpy(buffer, str.c_str(), len); return buffer; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } ptr_with_size aod_sketch_serialize(const void* sketchptr, unsigned header_size) { try { ptr_with_size p; auto bytes = new (palloc(sizeof(compact_aod_sketch_pg::vector_bytes))) compact_aod_sketch_pg::vector_bytes( static_cast(sketchptr)->serialize(header_size) ); p.ptr = bytes->data(); p.size = bytes->size(); return p; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_sketch_deserialize(const char* buffer, unsigned length) { try { return new (palloc(sizeof(compact_aod_sketch_pg))) compact_aod_sketch_pg(compact_aod_sketch_pg::deserialize(buffer, length)); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_union_new(unsigned num_values) { try { return new (palloc(sizeof(aod_union_pg))) aod_union_pg(aod_union_pg::builder(num_values).build()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_union_new_lgk(unsigned num_values, unsigned lg_k) { try { return new (palloc(sizeof(aod_union_pg))) aod_union_pg(aod_union_pg::builder(num_values).set_lg_k(lg_k).build()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void aod_union_delete(void* unionptr) { try { static_cast(unionptr)->~aod_union_pg(); pfree(unionptr); } catch (std::exception& e) { pg_error(e.what()); } } void aod_union_update(void* unionptr, const void* sketchptr) { try { static_cast(unionptr)->update(std::move(*static_cast(sketchptr))); } catch (std::exception& e) { pg_error(e.what()); } } void* aod_union_get_result(const void* unionptr) { try { return new (palloc(sizeof(compact_aod_sketch_pg))) compact_aod_sketch_pg(static_cast(unionptr)->get_result()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_intersection_new(unsigned num_values) { try { return new (palloc(sizeof(aod_intersection_pg))) aod_intersection_pg(datasketches::DEFAULT_SEED, num_values); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void aod_intersection_delete(void* interptr) { try { static_cast(interptr)->~aod_intersection_pg(); pfree(interptr); } catch (std::exception& e) { pg_error(e.what()); } } void aod_intersection_update(void* interptr, const void* sketchptr) { try { static_cast(interptr)->update(*static_cast(sketchptr)); } catch (std::exception& e) { pg_error(e.what()); } } void* aod_intersection_get_result(const void* interptr) { try { return new (palloc(sizeof(compact_aod_sketch_pg))) compact_aod_sketch_pg(static_cast(interptr)->get_result()); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_a_not_b(const void* sketchptr1, const void* sketchptr2) { try { aod_a_not_b_pg a_not_b; return new (palloc(sizeof(compact_aod_sketch_pg))) compact_aod_sketch_pg(a_not_b.compute( *static_cast(sketchptr1), *static_cast(sketchptr2) )); } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } void* aod_sketch_to_kll_float_sketch(const void* sketchptr, unsigned column_index, unsigned k) { try { auto kllptr = kll_float_sketch_new(k); for (const auto& entry: *static_cast(sketchptr)) { kll_float_sketch_update(kllptr, entry.second[column_index]); } return kllptr; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } double t_test_unequal_sd(double m1, double v1, uint64_t n1, double m2, double v2, uint64_t n2) { double degrees_of_freedom = v1 / n1 + v2 / n2; degrees_of_freedom *= degrees_of_freedom; double t1 = v1 / n1; t1 *= t1; t1 /= (n1 - 1); double t2 = v2 / n2; t2 *= t2; t2 /= (n2 - 1); degrees_of_freedom /= (t1 + t2); double t_stat = (m1 - m2) / sqrt(v1 / n1 + v2 / n2); using boost::math::students_t; students_t distribution(degrees_of_freedom); return 2 * cdf(complement(distribution, fabs(t_stat))); // double to match 2-sided test in Java (commons-math3) } Datum* aod_sketch_students_t_test(const void* sketchptr1, const void* sketchptr2, unsigned* arr_len_out) { try { const auto& sketch1 = *static_cast(sketchptr1); const auto& sketch2 = *static_cast(sketchptr2); if (sketch1.get_num_values() != sketch2.get_num_values()) pg_error("aod_sketch_students_t_test: number of values mismatch"); unsigned num_values = sketch1.get_num_values(); Datum* p_values = (Datum*) palloc(sizeof(Datum) * num_values); *arr_len_out = num_values; using namespace boost::accumulators; using Accum = accumulator_set>; std::vector> stats1(num_values); for (const auto& entry: sketch1) { for (unsigned i = 0; i < num_values; ++i) stats1[i](entry.second[i]); } std::vector> stats2(num_values); for (const auto& entry: sketch2) { for (unsigned i = 0; i < num_values; ++i) stats2[i](entry.second[i]); } for (unsigned i = 0; i < num_values; ++i) { p_values[i] = pg_float8_get_datum(t_test_unequal_sd( mean(stats1[i]), variance(stats1[i]), sketch1.get_num_retained(), mean(stats2[i]), variance(stats2[i]), sketch2.get_num_retained() )); } return p_values; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } Datum* aod_sketch_to_means(const void* sketchptr, unsigned* arr_len_out) { try { const auto& sketch = *static_cast(sketchptr); unsigned num_values = sketch.get_num_values(); Datum* means = (Datum*) palloc(sizeof(Datum) * num_values); *arr_len_out = num_values; using namespace boost::accumulators; using Accum = accumulator_set>; std::vector> stats(num_values); for (const auto& entry: sketch) { for (unsigned i = 0; i < num_values; ++i) stats[i](entry.second[i]); } for (unsigned i = 0; i < num_values; ++i) { means[i] = pg_float8_get_datum(mean(stats[i])); } return means; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); } Datum* aod_sketch_to_variances(const void* sketchptr, unsigned* arr_len_out) { try { const auto& sketch = *static_cast(sketchptr); unsigned num_values = sketch.get_num_values(); Datum* variances = (Datum*) palloc(sizeof(Datum) * num_values); *arr_len_out = num_values; using namespace boost::accumulators; using Accum = accumulator_set>; std::vector> stats(num_values); for (const auto& entry: sketch) { for (unsigned i = 0; i < num_values; ++i) stats[i](entry.second[i]); } for (unsigned i = 0; i < num_values; ++i) { variances[i] = pg_float8_get_datum(variance(stats[i])); } return variances; } catch (std::exception& e) { pg_error(e.what()); } pg_unreachable(); }