/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_ #define KOLMOGOROV_SMIRNOV_IMPL_HPP_ #include #include namespace datasketches { template double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) { auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2 auto view1 = sketch1.get_sorted_view(); auto view2 = sketch2.get_sorted_view(); auto it1 = view1.begin(); auto it2 = view2.begin(); const auto n1 = sketch1.get_n(); const auto n2 = sketch2.get_n(); double delta = 0; while (it1 != view1.end() && it2 != view2.end()) { const double norm_cum_wt1 = static_cast(it1.get_cumulative_weight(false)) / n1; const double norm_cum_wt2 = static_cast(it2.get_cumulative_weight(false)) / n2; delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2)); if (comparator((*it1).first, (*it2).first)) { ++it1; } else if (comparator((*it2).first, (*it1).first)) { ++it2; } else { ++it1; ++it2; } } const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast(it1.get_cumulative_weight(false)) / n1; const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast(it2.get_cumulative_weight(false)) / n2; delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2)); return delta; } template double kolmogorov_smirnov::threshold(const Sketch& sketch1, const Sketch& sketch2, double p) { const double r1 = sketch1.get_num_retained(); const double r2 = sketch2.get_num_retained(); const double alpha_factor = sqrt(-0.5 * log(0.5 * p)); const double delta_area_threshold = alpha_factor * sqrt((r1 + r2) / (r1 * r2)); const double eps1 = sketch1.get_normalized_rank_error(false); const double eps2 = sketch2.get_normalized_rank_error(false); return delta_area_threshold + eps1 + eps2; } template bool kolmogorov_smirnov::test(const Sketch& sketch1, const Sketch& sketch2, double p) { return delta(sketch1, sketch2) > threshold(sketch1, sketch2, p); } } /* namespace datasketches */ #endif