/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #include <cstring> #include <cppunit/TestFixture.h> #include <cppunit/extensions/HelperMacros.h> #include "cpc_sketch.hpp" namespace datasketches { static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02; class cpc_sketch_test: public CppUnit::TestFixture { CPPUNIT_TEST_SUITE(cpc_sketch_test); CPPUNIT_TEST(lg_k_limits); CPPUNIT_TEST(empty); CPPUNIT_TEST(one_value); CPPUNIT_TEST(many_values); CPPUNIT_TEST(serialize_deserialize_empty); CPPUNIT_TEST(serialize_deserialize_sparse); CPPUNIT_TEST(serialize_deserialize_hybrid); CPPUNIT_TEST(serialize_deserialize_pinned); CPPUNIT_TEST(serialize_deserialize_sliding); CPPUNIT_TEST(serialize_deserialize_empty_bytes); CPPUNIT_TEST(serialize_deserialize_sparse_bytes); CPPUNIT_TEST(serialize_deserialize_hybrid_bytes); CPPUNIT_TEST(serialize_deserialize_pinned_bytes); CPPUNIT_TEST(serialize_deserialize_sliding_bytes); CPPUNIT_TEST(serialize_deserialize_empty_custom_seed); CPPUNIT_TEST(copy); CPPUNIT_TEST(kappa_range); CPPUNIT_TEST(validate_fail); CPPUNIT_TEST(serialize_both_ways); CPPUNIT_TEST(update_int_equivalence); CPPUNIT_TEST(update_float_equivalience); CPPUNIT_TEST(update_string_equivalence); CPPUNIT_TEST_SUITE_END(); void lg_k_limits() { cpc_sketch s1(CPC_MIN_LG_K); // this should work cpc_sketch s2(CPC_MAX_LG_K); // this should work CPPUNIT_ASSERT_THROW(cpc_sketch s3(CPC_MIN_LG_K - 1), std::invalid_argument); CPPUNIT_ASSERT_THROW(cpc_sketch s4(CPC_MAX_LG_K + 1), std::invalid_argument); } void empty() { cpc_sketch sketch(11); CPPUNIT_ASSERT(sketch.is_empty()); CPPUNIT_ASSERT_EQUAL(0.0, sketch.get_estimate()); CPPUNIT_ASSERT_EQUAL(0.0, sketch.get_lower_bound(1)); CPPUNIT_ASSERT_EQUAL(0.0, sketch.get_upper_bound(1)); CPPUNIT_ASSERT(sketch.validate()); } void one_value() { cpc_sketch sketch(11); sketch.update(1); CPPUNIT_ASSERT(!sketch.is_empty()); CPPUNIT_ASSERT_DOUBLES_EQUAL(1, sketch.get_estimate(), RELATIVE_ERROR_FOR_LG_K_11); CPPUNIT_ASSERT(sketch.get_estimate() >= sketch.get_lower_bound(1)); CPPUNIT_ASSERT(sketch.get_estimate() <= sketch.get_upper_bound(1)); CPPUNIT_ASSERT(sketch.validate()); } void many_values() { cpc_sketch sketch(11); const int n(10000); for (int i = 0; i < n; i++) sketch.update(i); CPPUNIT_ASSERT(!sketch.is_empty()); CPPUNIT_ASSERT_DOUBLES_EQUAL(n, sketch.get_estimate(), n * RELATIVE_ERROR_FOR_LG_K_11); CPPUNIT_ASSERT(sketch.get_estimate() >= sketch.get_lower_bound(1)); CPPUNIT_ASSERT(sketch.get_estimate() <= sketch.get_upper_bound(1)); CPPUNIT_ASSERT(sketch.validate()); } void serialize_deserialize_empty() { cpc_sketch sketch(11); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); auto sketch_ptr(cpc_sketch::deserialize(s)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); std::ofstream os("cpc-empty.bin"); sketch.serialize(os); } void serialize_deserialize_sparse() { cpc_sketch sketch(11); const int n(100); for (int i = 0; i < n; i++) sketch.update(i); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); auto sketch_ptr(cpc_sketch::deserialize(s)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); std::ofstream os("cpc-sparse.bin"); sketch.serialize(os); } void serialize_deserialize_hybrid() { cpc_sketch sketch(11); const int n(200); for (int i = 0; i < n; i++) sketch.update(i); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); auto sketch_ptr(cpc_sketch::deserialize(s)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); std::ofstream os("cpc-hybrid.bin"); sketch.serialize(os); } void serialize_deserialize_pinned() { cpc_sketch sketch(11); const int n(2000); for (int i = 0; i < n; i++) sketch.update(i); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); auto sketch_ptr(cpc_sketch::deserialize(s)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); std::ofstream os("cpc-pinned.bin"); sketch.serialize(os); } void serialize_deserialize_sliding() { cpc_sketch sketch(11); const int n(20000); for (int i = 0; i < n; i++) sketch.update(i); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); auto sketch_ptr(cpc_sketch::deserialize(s)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); std::ofstream os("cpc-sliding.bin"); sketch.serialize(os); } void serialize_deserialize_empty_bytes() { cpc_sketch sketch(11); auto data = sketch.serialize(); auto sketch_ptr(cpc_sketch::deserialize(data.first.get(), data.second)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); std::ofstream os("cpc-empty.bin"); sketch.serialize(os); } void serialize_deserialize_hybrid_bytes() { cpc_sketch sketch(11); const int n(200); for (int i = 0; i < n; i++) sketch.update(i); auto data = sketch.serialize(); auto sketch_ptr(cpc_sketch::deserialize(data.first.get(), data.second)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); } void serialize_deserialize_sparse_bytes() { cpc_sketch sketch(11); const int n(100); for (int i = 0; i < n; i++) sketch.update(i); auto data = sketch.serialize(); auto sketch_ptr(cpc_sketch::deserialize(data.first.get(), data.second)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); } void serialize_deserialize_pinned_bytes() { cpc_sketch sketch(11); const int n(2000); for (int i = 0; i < n; i++) sketch.update(i); auto data = sketch.serialize(); auto sketch_ptr(cpc_sketch::deserialize(data.first.get(), data.second)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); } void serialize_deserialize_sliding_bytes() { cpc_sketch sketch(11); const int n(20000); for (int i = 0; i < n; i++) sketch.update(i); auto data = sketch.serialize(); auto sketch_ptr(cpc_sketch::deserialize(data.first.get(), data.second)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // updating again with the same values should not change the sketch for (int i = 0; i < n; i++) sketch_ptr->update(i); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); } void copy() { cpc_sketch s1(11); s1.update(1); cpc_sketch s2 = s1; // copy constructor CPPUNIT_ASSERT(!s2.is_empty()); CPPUNIT_ASSERT_DOUBLES_EQUAL(1, s2.get_estimate(), RELATIVE_ERROR_FOR_LG_K_11); s2.update(2); s1 = s2; // operator= CPPUNIT_ASSERT_DOUBLES_EQUAL(2, s1.get_estimate(), RELATIVE_ERROR_FOR_LG_K_11); } void serialize_deserialize_empty_custom_seed() { cpc_sketch sketch(11, 123); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); auto sketch_ptr(cpc_sketch::deserialize(s, 123)); CPPUNIT_ASSERT_EQUAL(sketch.is_empty(), sketch_ptr->is_empty()); CPPUNIT_ASSERT_EQUAL(sketch.get_estimate(), sketch_ptr->get_estimate()); CPPUNIT_ASSERT(sketch_ptr->validate()); // incompatible seed CPPUNIT_ASSERT_THROW(cpc_sketch::deserialize(s), std::invalid_argument); } void kappa_range() { cpc_sketch s(11); CPPUNIT_ASSERT_EQUAL(0.0, s.get_lower_bound(1)); CPPUNIT_ASSERT_EQUAL(0.0, s.get_upper_bound(1)); CPPUNIT_ASSERT_EQUAL(0.0, s.get_lower_bound(2)); CPPUNIT_ASSERT_EQUAL(0.0, s.get_upper_bound(2)); CPPUNIT_ASSERT_EQUAL(0.0, s.get_lower_bound(3)); CPPUNIT_ASSERT_EQUAL(0.0, s.get_upper_bound(3)); CPPUNIT_ASSERT_THROW(s.get_lower_bound(4), std::invalid_argument); CPPUNIT_ASSERT_THROW(s.get_upper_bound(4), std::invalid_argument); } void validate_fail() { cpc_sketch sketch(11); const int n(2000); for (int i = 0; i < n; i++) sketch.update(i); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); s.seekp(700); // the stream should be 856 bytes long. corrupt it somewhere before the end s << "corrupt data"; auto sketch_ptr(cpc_sketch::deserialize(s)); CPPUNIT_ASSERT(!sketch_ptr->validate()); } void serialize_both_ways() { cpc_sketch sketch(11); const int n(2000); for (int i = 0; i < n; i++) sketch.update(i); const int header_size_bytes = 4; auto data(sketch.serialize(header_size_bytes)); std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); sketch.serialize(s); CPPUNIT_ASSERT_EQUAL(data.second - header_size_bytes, (size_t) s.tellp()); char* pp = new char[s.tellp()]; s.read(pp, s.tellp()); CPPUNIT_ASSERT(std::memcmp(pp, static_cast<char*>(data.first.get()) + header_size_bytes, data.second - header_size_bytes) == 0); } void update_int_equivalence() { cpc_sketch sketch(11); sketch.update((uint64_t) -1); sketch.update((int64_t) -1); sketch.update((uint32_t) -1); sketch.update((int32_t) -1); sketch.update((uint16_t) -1); sketch.update((int16_t) -1); sketch.update((uint8_t) -1); sketch.update((int8_t) -1); CPPUNIT_ASSERT_DOUBLES_EQUAL(1, sketch.get_estimate(), RELATIVE_ERROR_FOR_LG_K_11); std::ofstream os("cpc-negative-one.bin"); // to compare with Java sketch.serialize(os); } void update_float_equivalience() { cpc_sketch sketch(11); sketch.update((float) 1); sketch.update((double) 1); CPPUNIT_ASSERT_DOUBLES_EQUAL(1, sketch.get_estimate(), RELATIVE_ERROR_FOR_LG_K_11); } void update_string_equivalence() { cpc_sketch sketch(11); const std::string a("a"); sketch.update(a); sketch.update(a.c_str(), a.length()); CPPUNIT_ASSERT_DOUBLES_EQUAL(1, sketch.get_estimate(), RELATIVE_ERROR_FOR_LG_K_11); } }; CPPUNIT_TEST_SUITE_REGISTRATION(cpc_sketch_test); } /* namespace datasketches */