/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #include #include #include #include #include "frequent_items_sketch.hpp" #ifdef TEST_BINARY_INPUT_PATH static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH; #else static std::string testBinaryInputPath = "test/"; #endif namespace datasketches { TEST_CASE("frequent items: invalid k", "[frequent_items_sketch]") { REQUIRE_THROWS_AS(frequent_items_sketch(2), std::invalid_argument); } TEST_CASE("frequent items: empty", "[frequent_items_sketch]") { frequent_items_sketch sketch(3); REQUIRE(sketch.is_empty()); REQUIRE(sketch.get_num_active_items() == 0); REQUIRE(sketch.get_total_weight() == 0); } TEST_CASE("frequent items: one item", "[frequent_items_sketch]") { frequent_items_sketch sketch(3); sketch.update("a"); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_num_active_items() == 1); REQUIRE(sketch.get_total_weight() == 1); REQUIRE(sketch.get_estimate("a") == 1); REQUIRE(sketch.get_lower_bound("a") == 1); REQUIRE(sketch.get_upper_bound("a") == 1); } TEST_CASE("frequent items: several items, no resize, no purge", "[frequent_items_sketch]") { frequent_items_sketch sketch(3); sketch.update("a"); sketch.update("b"); sketch.update("c"); sketch.update("d"); sketch.update("b"); sketch.update("c"); sketch.update("b"); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_total_weight() == 7); REQUIRE(sketch.get_num_active_items() == 4); REQUIRE(sketch.get_estimate("a") == 1); REQUIRE(sketch.get_estimate("b") == 3); REQUIRE(sketch.get_estimate("c") == 2); REQUIRE(sketch.get_estimate("d") == 1); REQUIRE(sketch.get_maximum_error() == 0); } TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_items_sketch]") { frequent_items_sketch sketch(4); sketch.update("a"); sketch.update("b"); sketch.update("c"); sketch.update("d"); sketch.update("b"); sketch.update("c"); sketch.update("b"); sketch.update("e"); sketch.update("f"); sketch.update("g"); sketch.update("h"); sketch.update("i"); sketch.update("j"); sketch.update("k"); sketch.update("l"); REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_total_weight() == 15); REQUIRE(sketch.get_num_active_items() == 12); REQUIRE(sketch.get_estimate("a") == 1); REQUIRE(sketch.get_estimate("b") == 3); REQUIRE(sketch.get_estimate("c") == 2); REQUIRE(sketch.get_estimate("d") == 1); REQUIRE(sketch.get_maximum_error() == 0); } TEST_CASE("frequent items: estimation mode", "[frequent_items_sketch]") { frequent_items_sketch sketch(3); sketch.update(1, 10); sketch.update(2); sketch.update(3); sketch.update(4); sketch.update(5); sketch.update(6); sketch.update(7, 15); sketch.update(8); sketch.update(9); sketch.update(10); sketch.update(11); sketch.update(12); REQUIRE(sketch.get_maximum_error() > 0); // estimation mode REQUIRE_FALSE(sketch.is_empty()); REQUIRE(sketch.get_total_weight() == 35); auto items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES); REQUIRE(items.size() == 2); // only 2 items (1 and 7) should have counts more than 1 REQUIRE(items[0].get_item() == 7); REQUIRE(items[0].get_estimate() == 15); REQUIRE(items[1].get_item() == 1); REQUIRE(items[1].get_estimate() == 10); items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_NEGATIVES); REQUIRE(2 <= items.size()); // at least 2 items REQUIRE(12 >= items.size()); // but not more than 12 items } TEST_CASE("frequent items: merge exact mode", "[frequent_items_sketch]") { frequent_items_sketch sketch1(3); sketch1.update(1); sketch1.update(2); sketch1.update(3); sketch1.update(4); frequent_items_sketch sketch2(3); sketch1.update(2); sketch1.update(3); sketch1.update(2); sketch1.merge(sketch2); REQUIRE_FALSE(sketch1.is_empty()); REQUIRE(sketch1.get_total_weight() == 7); REQUIRE(sketch1.get_num_active_items() == 4); REQUIRE(sketch1.get_estimate(1) == 1); REQUIRE(sketch1.get_estimate(2) == 3); REQUIRE(sketch1.get_estimate(3) == 2); REQUIRE(sketch1.get_estimate(4) == 1); REQUIRE(sketch1.get_maximum_error() == 0); } TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") { frequent_items_sketch sketch1(4); sketch1.update(1, 9); // to make sure it survives the purge sketch1.update(2); sketch1.update(3); sketch1.update(4); sketch1.update(5); sketch1.update(6); sketch1.update(7); sketch1.update(8); sketch1.update(9); sketch1.update(10); sketch1.update(11); sketch1.update(12); sketch1.update(13); sketch1.update(14); REQUIRE(sketch1.get_maximum_error() > 0); // estimation mode frequent_items_sketch sketch2(4); sketch2.update(8); sketch2.update(9); sketch2.update(10); sketch2.update(11); sketch2.update(12); sketch2.update(13); sketch2.update(14); sketch2.update(15); sketch2.update(16); sketch2.update(17); sketch2.update(18); sketch2.update(19); sketch2.update(20); sketch2.update(21, 11); // to make sure it survives the purge REQUIRE(sketch2.get_maximum_error() > 0); // estimation mode sketch1.merge(sketch2); REQUIRE_FALSE(sketch1.is_empty()); REQUIRE(sketch1.get_total_weight() == 46); REQUIRE(2 <= sketch1.get_num_active_items()); auto items = sketch1.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES, 2); REQUIRE(items.size() == 2); // only 2 items (1 and 21) should be above threshold REQUIRE(items[0].get_item() == 21); REQUIRE(11 <= items[0].get_estimate()); // always overestimated REQUIRE(items[1].get_item() == 1); REQUIRE(9 <= items[1].get_estimate()); // always overestimated } TEST_CASE("frequent items: deserialize long64 stream", "[frequent_items_sketch]") { frequent_items_sketch sketch1(3); sketch1.update(1, 1); sketch1.update(2, 2); sketch1.update(3, 3); sketch1.update(4, 4); sketch1.update(5, 5); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch1.serialize(s); auto sketch2 = frequent_items_sketch::deserialize(s); REQUIRE_FALSE(sketch2.is_empty()); REQUIRE(sketch2.get_total_weight() == 15); REQUIRE(sketch2.get_num_active_items() == 5); REQUIRE(sketch2.get_estimate(1) == 1); REQUIRE(sketch2.get_estimate(2) == 2); REQUIRE(sketch2.get_estimate(3) == 3); REQUIRE(sketch2.get_estimate(4) == 4); REQUIRE(sketch2.get_estimate(5) == 5); } TEST_CASE("frequent items: serialize deserialiation long64 bytes", "[frequent_items_sketch]") { frequent_items_sketch sketch1(3); sketch1.update(1, 1); sketch1.update(2, 2); sketch1.update(3, 3); sketch1.update(4, 4); sketch1.update(5, 5); auto bytes = sketch1.serialize(); auto sketch2 = frequent_items_sketch::deserialize(bytes.data(), bytes.size()); REQUIRE_FALSE(sketch2.is_empty()); REQUIRE(sketch2.get_total_weight() == 15); REQUIRE(sketch2.get_num_active_items() == 5); REQUIRE(sketch2.get_estimate(1) == 1); REQUIRE(sketch2.get_estimate(2) == 2); REQUIRE(sketch2.get_estimate(3) == 3); REQUIRE(sketch2.get_estimate(4) == 4); REQUIRE(sketch2.get_estimate(5) == 5); } TEST_CASE("frequent items: serialize deserialize string stream", "[frequent_items_sketch]") { frequent_items_sketch sketch1(3); sketch1.update("aaaaaaaaaaaaaaaa", 1); sketch1.update("bbbbbbbbbbbbbbbb", 2); sketch1.update("cccccccccccccccc", 3); sketch1.update("dddddddddddddddd", 4); sketch1.update("eeeeeeeeeeeeeeee", 5); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch1.serialize(s); auto sketch2 = frequent_items_sketch::deserialize(s); REQUIRE_FALSE(sketch2.is_empty()); REQUIRE(sketch2.get_total_weight() == 15); REQUIRE(sketch2.get_num_active_items() == 5); REQUIRE(sketch2.get_estimate("aaaaaaaaaaaaaaaa") == 1); REQUIRE(sketch2.get_estimate("bbbbbbbbbbbbbbbb") == 2); REQUIRE(sketch2.get_estimate("cccccccccccccccc") == 3); REQUIRE(sketch2.get_estimate("dddddddddddddddd") == 4); REQUIRE(sketch2.get_estimate("eeeeeeeeeeeeeeee") == 5); } TEST_CASE("frequent items: serialize deserialize string bytes", "[frequent_items_sketch]") { frequent_items_sketch sketch1(3); sketch1.update("aaaaaaaaaaaaaaaa", 1); sketch1.update("bbbbbbbbbbbbbbbb", 2); sketch1.update("cccccccccccccccc", 3); sketch1.update("dddddddddddddddd", 4); sketch1.update("eeeeeeeeeeeeeeee", 5); auto bytes = sketch1.serialize(); auto sketch2 = frequent_items_sketch::deserialize(bytes.data(), bytes.size()); REQUIRE_FALSE(sketch2.is_empty()); REQUIRE(sketch2.get_total_weight() == 15); REQUIRE(sketch2.get_num_active_items() == 5); REQUIRE(sketch2.get_estimate("aaaaaaaaaaaaaaaa") == 1); REQUIRE(sketch2.get_estimate("bbbbbbbbbbbbbbbb") == 2); REQUIRE(sketch2.get_estimate("cccccccccccccccc") == 3); REQUIRE(sketch2.get_estimate("dddddddddddddddd") == 4); REQUIRE(sketch2.get_estimate("eeeeeeeeeeeeeeee") == 5); } TEST_CASE("frequent items: serialize deserialize string, utf-8 stream", "[frequent_items_sketch]") { frequent_items_sketch sketch1(3); sketch1.update("абвгд", 1); sketch1.update("еёжзи", 2); sketch1.update("йклмн", 3); sketch1.update("опрст", 4); sketch1.update("уфхцч", 5); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch1.serialize(s); auto sketch2 = frequent_items_sketch::deserialize(s); REQUIRE_FALSE(sketch2.is_empty()); REQUIRE(sketch2.get_total_weight() == 15); REQUIRE(sketch2.get_num_active_items() == 5); REQUIRE(sketch2.get_estimate("абвгд") == 1); REQUIRE(sketch2.get_estimate("еёжзи") == 2); REQUIRE(sketch2.get_estimate("йклмн") == 3); REQUIRE(sketch2.get_estimate("опрст") == 4); REQUIRE(sketch2.get_estimate("уфхцч") == 5); } TEST_CASE("frequent items: int64 deserialize single item buffer overrun", "[frequent_items_sketch]") { frequent_items_sketch sketch(3); sketch.update(1); auto bytes = sketch.serialize(); REQUIRE_THROWS_AS(frequent_items_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range); } TEST_CASE("frequent items: string deserialize single item buffer overrun", "[frequent_items_sketch]") { frequent_items_sketch sketch(3); sketch.update("a"); auto bytes = sketch.serialize(); REQUIRE_THROWS_AS(frequent_items_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range); } } /* namespace datasketches */