/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #include #include namespace datasketches { using three_doubles = std::tuple; // this is needed for a test below, but should be defined here std::ostream& operator<<(std::ostream& os, const three_doubles& tuple) { os << std::get<0>(tuple) << ", " << std::get<1>(tuple) << ", " << std::get<2>(tuple); return os; } } #include #include namespace datasketches { TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") { auto builder = update_tuple_sketch::builder(); builder.set_lg_k(10).set_p(0.5f).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123); auto sketch = builder.build(); REQUIRE(sketch.get_lg_k() == 10); REQUIRE(sketch.get_theta() == 1.0); // empty sketch should have theta 1.0 REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2); REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123)); sketch.update(1, 0); REQUIRE(sketch.get_theta() == 0.5); // theta = p } TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") { auto update_sketch = update_tuple_sketch::builder().build(); std::cout << "sizeof(update_tuple_sketch)=" << sizeof(update_sketch) << std::endl; REQUIRE(update_sketch.is_empty()); REQUIRE(!update_sketch.is_estimation_mode()); REQUIRE(update_sketch.get_estimate() == 0); REQUIRE(update_sketch.get_lower_bound(1) == 0); REQUIRE(update_sketch.get_lower_bound(1, 1) == 0); REQUIRE(update_sketch.get_lower_bound(1, update_sketch.get_num_retained()) == 0); REQUIRE(update_sketch.get_lower_bound(1, update_sketch.get_num_retained()+1) == 0); REQUIRE(update_sketch.get_upper_bound(1) == 0); REQUIRE(update_sketch.get_upper_bound(1, 1) == 0); REQUIRE(update_sketch.get_upper_bound(1, update_sketch.get_num_retained()) == 0); REQUIRE(update_sketch.get_upper_bound(1, update_sketch.get_num_retained()+1) == 0); REQUIRE(update_sketch.get_theta() == 1); REQUIRE(update_sketch.get_num_retained() == 0); REQUIRE(update_sketch.is_ordered()); auto compact_sketch = update_sketch.compact(); std::cout << "sizeof(compact_tuple_sketch)=" << sizeof(compact_sketch) << std::endl; REQUIRE(compact_sketch.is_empty()); REQUIRE(!compact_sketch.is_estimation_mode()); REQUIRE(compact_sketch.get_estimate() == 0); REQUIRE(compact_sketch.get_lower_bound(1) == 0); REQUIRE(compact_sketch.get_lower_bound(1, 1) == 0); REQUIRE(compact_sketch.get_lower_bound(1, update_sketch.get_num_retained()) == 0); REQUIRE(compact_sketch.get_upper_bound(1) == 0); REQUIRE(compact_sketch.get_upper_bound(1, 1) == 0); REQUIRE(compact_sketch.get_upper_bound(1, update_sketch.get_num_retained()) == 0); REQUIRE(compact_sketch.get_theta() == 1); REQUIRE(compact_sketch.get_num_retained() == 0); REQUIRE(compact_sketch.is_ordered()); // empty is forced to be ordered REQUIRE(update_sketch.compact(false).is_ordered()); } TEST_CASE("tuple sketch: single item", "[tuple_sketch]") { auto update_sketch = update_tuple_sketch::builder().build(); update_sketch.update(1, 1.0f); REQUIRE_FALSE(update_sketch.is_empty()); REQUIRE_FALSE(update_sketch.is_estimation_mode()); REQUIRE(update_sketch.get_theta() == 1.0); REQUIRE(update_sketch.get_estimate() == 1.0); REQUIRE(update_sketch.get_lower_bound(1) == 1.0); REQUIRE(update_sketch.get_upper_bound(1) == 1.0); REQUIRE(update_sketch.is_ordered()); // one item is ordered auto compact_sketch = update_sketch.compact(); REQUIRE_FALSE(compact_sketch.is_empty()); REQUIRE_FALSE(compact_sketch.is_estimation_mode()); REQUIRE(compact_sketch.get_theta() == 1.0); REQUIRE(compact_sketch.get_estimate() == 1.0); REQUIRE(compact_sketch.get_lower_bound(1) == 1.0); REQUIRE(compact_sketch.get_upper_bound(1) == 1.0); REQUIRE(compact_sketch.is_ordered()); // single item is forced to be ordered REQUIRE(update_sketch.compact(false).is_ordered()); } TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") { auto update_sketch = update_tuple_sketch::builder().build(); update_sketch.update(1, 1.0f); update_sketch.update(2, 2.0f); update_sketch.update(1, 1.0f); // std::cout << update_sketch.to_string(true); REQUIRE_FALSE(update_sketch.is_empty()); REQUIRE_FALSE(update_sketch.is_estimation_mode()); REQUIRE(update_sketch.get_estimate() == 2); REQUIRE(update_sketch.get_lower_bound(1) == 2); REQUIRE(update_sketch.get_lower_bound(1, 1) == 1); REQUIRE(update_sketch.get_lower_bound(1, update_sketch.get_num_retained()) == 2); REQUIRE(update_sketch.get_upper_bound(1) == 2); REQUIRE(update_sketch.get_upper_bound(1, 1) == 1); REQUIRE(update_sketch.get_upper_bound(1, update_sketch.get_num_retained()) == 2); REQUIRE(update_sketch.get_theta() == 1); REQUIRE(update_sketch.get_num_retained() == 2); REQUIRE_FALSE(update_sketch.is_ordered()); int count = 0; for (const auto& entry: update_sketch) { REQUIRE(entry.second == 2); ++count; } REQUIRE(count == 2); auto compact_sketch = update_sketch.compact(); // std::cout << compact_sketch.to_string(true); REQUIRE_FALSE(compact_sketch.is_empty()); REQUIRE_FALSE(compact_sketch.is_estimation_mode()); REQUIRE(compact_sketch.get_estimate() == 2); REQUIRE(compact_sketch.get_lower_bound(1) == 2); REQUIRE(compact_sketch.get_lower_bound(1, 1) == 1); REQUIRE(compact_sketch.get_lower_bound(1, compact_sketch.get_num_retained()) == 2); REQUIRE(compact_sketch.get_upper_bound(1) == 2); REQUIRE(compact_sketch.get_upper_bound(1, 1) == 1); REQUIRE(compact_sketch.get_upper_bound(1, compact_sketch.get_num_retained()) == 2); REQUIRE(compact_sketch.get_theta() == 1); REQUIRE(compact_sketch.get_num_retained() == 2); REQUIRE(compact_sketch.is_ordered()); count = 0; for (const auto& entry: compact_sketch) { REQUIRE(entry.second == 2); ++count; } REQUIRE(count == 2); { // stream std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); compact_sketch.serialize(s); auto deserialized_sketch = compact_tuple_sketch::deserialize(s); REQUIRE(!deserialized_sketch.is_empty()); REQUIRE(!deserialized_sketch.is_estimation_mode()); REQUIRE(deserialized_sketch.get_estimate() == 2); REQUIRE(deserialized_sketch.get_lower_bound(1) == 2); REQUIRE(deserialized_sketch.get_lower_bound(1, 1) == 1); REQUIRE(deserialized_sketch.get_lower_bound(1, deserialized_sketch.get_num_retained()) == 2); REQUIRE(deserialized_sketch.get_upper_bound(1) == 2); REQUIRE(deserialized_sketch.get_upper_bound(1, 1) == 1); REQUIRE(deserialized_sketch.get_upper_bound(1, deserialized_sketch.get_num_retained()) == 2); REQUIRE(deserialized_sketch.get_theta() == 1); REQUIRE(deserialized_sketch.get_num_retained() == 2); REQUIRE(deserialized_sketch.is_ordered()); // std::cout << "deserialized sketch:" << std::endl; // std::cout << deserialized_sketch.to_string(true); } { // bytes auto bytes = compact_sketch.serialize(); auto deserialized_sketch = compact_tuple_sketch::deserialize(bytes.data(), bytes.size()); REQUIRE(!deserialized_sketch.is_empty()); REQUIRE(!deserialized_sketch.is_estimation_mode()); REQUIRE(deserialized_sketch.get_estimate() == 2); REQUIRE(deserialized_sketch.get_lower_bound(1) == 2); REQUIRE(deserialized_sketch.get_lower_bound(1, 1) == 1); REQUIRE(deserialized_sketch.get_lower_bound(1, deserialized_sketch.get_num_retained()) == 2); REQUIRE(deserialized_sketch.get_upper_bound(1) == 2); REQUIRE(deserialized_sketch.get_upper_bound(1, 1) == 1); REQUIRE(deserialized_sketch.get_upper_bound(1, deserialized_sketch.get_num_retained()) == 2); REQUIRE(deserialized_sketch.get_theta() == 1); REQUIRE(deserialized_sketch.get_num_retained() == 2); REQUIRE(deserialized_sketch.is_ordered()); // std::cout << deserialized_sketch.to_string(true); } // mixed { auto bytes = compact_sketch.serialize(); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); s.write(reinterpret_cast(bytes.data()), bytes.size()); auto deserialized_sketch = compact_tuple_sketch::deserialize(s); auto it = deserialized_sketch.begin(); for (const auto& entry: compact_sketch) { REQUIRE(entry.first == (*it).first); REQUIRE(entry.second == (*it).second); ++it; } } update_sketch.reset(); REQUIRE(update_sketch.is_empty()); REQUIRE_FALSE(update_sketch.is_estimation_mode()); REQUIRE(update_sketch.get_estimate() == 0); REQUIRE(update_sketch.get_lower_bound(1) == 0); REQUIRE(update_sketch.get_upper_bound(1) == 0); REQUIRE(update_sketch.get_theta() == 1); REQUIRE(update_sketch.get_num_retained() == 0); REQUIRE(update_sketch.is_ordered()); } template class max_value_policy { public: max_value_policy(const T& initial_value): initial_value(initial_value) {} T create() const { return initial_value; } void update(T& summary, const T& update) const { summary = std::max(summary, update); } private: T initial_value; }; using max_float_update_tuple_sketch = update_tuple_sketch>; TEST_CASE("tuple sketch: float, custom policy", "[tuple_sketch]") { auto update_sketch = max_float_update_tuple_sketch::builder(max_value_policy(5)).build(); update_sketch.update(1, 1.0f); update_sketch.update(1, 2.0f); update_sketch.update(2, 10.0f); update_sketch.update(3, 3.0f); update_sketch.update(3, 7.0f); // std::cout << update_sketch.to_string(true); int count = 0; float sum = 0; for (const auto& entry: update_sketch) { sum += entry.second; ++count; } REQUIRE(count == 3); REQUIRE(sum == 22); // 5 + 10 + 7 } struct three_doubles_update_policy { std::tuple create() const { return std::tuple(0, 0, 0); } void update(std::tuple& summary, const std::tuple& update) const { std::get<0>(summary) += std::get<0>(update); std::get<1>(summary) += std::get<1>(update); std::get<2>(summary) += std::get<2>(update); } }; TEST_CASE("tuple sketch: tuple of doubles", "[tuple_sketch]") { using three_doubles_update_tuple_sketch = update_tuple_sketch; auto update_sketch = three_doubles_update_tuple_sketch::builder().build(); update_sketch.update(1, three_doubles(1, 2, 3)); // std::cout << update_sketch.to_string(true); const auto& entry = *update_sketch.begin(); REQUIRE(std::get<0>(entry.second) == 1.0); REQUIRE(std::get<1>(entry.second) == 2.0); REQUIRE(std::get<2>(entry.second) == 3.0); auto compact_sketch = update_sketch.compact(); // std::cout << compact_sketch.to_string(true); REQUIRE(compact_sketch.get_num_retained() == 1); } TEST_CASE("tuple sketch: float, update with different types of keys", "[tuple_sketch]") { auto sketch = update_tuple_sketch::builder().build(); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 1); sketch.update(1.0, 1.0f); REQUIRE(sketch.get_num_retained() == 2); sketch.update(static_cast(1), 1.0f); REQUIRE(sketch.get_num_retained() == 2); sketch.update("a", 1.0f); REQUIRE(sketch.get_num_retained() == 3); } } /* namespace datasketches */