/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef _COUPONLIST_INTERNAL_HPP_ #define _COUPONLIST_INTERNAL_HPP_ #include "CouponList.hpp" #include "CubicInterpolation.hpp" #include "HllUtil.hpp" #include "count_zeros.hpp" #include #include #include namespace datasketches { template CouponList::CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator): HllSketchImpl(lgConfigK, tgtHllType, mode, false), couponCount_(0), oooFlag_(false), coupons_(1ULL << (mode == hll_mode::LIST ? hll_constants::LG_INIT_LIST_SIZE : hll_constants::LG_INIT_SET_SIZE), 0, allocator) {} template CouponList::CouponList(const CouponList& that, const target_hll_type tgtHllType): HllSketchImpl(that.lgConfigK_, tgtHllType, that.mode_, false), couponCount_(that.couponCount_), oooFlag_(that.oooFlag_), coupons_(that.coupons_) {} template std::function*)> CouponList::get_deleter() const { return [](HllSketchImpl* ptr) { CouponList* cl = static_cast*>(ptr); ClAlloc cla(cl->getAllocator()); cl->~CouponList(); cla.deallocate(cl, 1); }; } template CouponList* CouponList::copy() const { ClAlloc cla(coupons_.get_allocator()); return new (cla.allocate(1)) CouponList(*this); } template CouponList* CouponList::copyAs(target_hll_type tgtHllType) const { ClAlloc cla(coupons_.get_allocator()); return new (cla.allocate(1)) CouponList(*this, tgtHllType); } template CouponList* CouponList::newList(const void* bytes, size_t len, const A& allocator) { if (len < hll_constants::LIST_INT_ARR_START) { throw std::out_of_range("Input data length insufficient to hold CouponHashSet"); } const uint8_t* data = static_cast(bytes); if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) { throw std::invalid_argument("Incorrect number of preInts in input stream"); } if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) { throw std::invalid_argument("Wrong ser ver in input stream"); } if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) { throw std::invalid_argument("Input stream is not an HLL sketch"); } hll_mode mode = HllSketchImpl::extractCurMode(data[hll_constants::MODE_BYTE]); if (mode != LIST) { throw std::invalid_argument("Calling list constructor with non-list mode data"); } target_hll_type tgtHllType = HllSketchImpl::extractTgtHllType(data[hll_constants::MODE_BYTE]); const uint8_t lgK = data[hll_constants::LG_K_BYTE]; const bool compact = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false); const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false); const bool emptyFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ? true : false); const uint32_t couponCount = data[hll_constants::LIST_COUNT_BYTE]; const uint32_t couponsInArray = (compact ? couponCount : (1 << HllUtil::computeLgArrInts(LIST, couponCount, lgK))); const size_t expectedLength = hll_constants::LIST_INT_ARR_START + (couponsInArray * sizeof(uint32_t)); if (len < expectedLength) { throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength) + ", found: " + std::to_string(len)); } ClAlloc cla(allocator); CouponList* sketch = new (cla.allocate(1)) CouponList(lgK, tgtHllType, mode, allocator); sketch->couponCount_ = couponCount; sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST if (!emptyFlag) { // only need to read valid coupons, unlike in stream case std::memcpy(sketch->coupons_.data(), data + hll_constants::LIST_INT_ARR_START, couponCount * sizeof(uint32_t)); } return sketch; } template CouponList* CouponList::newList(std::istream& is, const A& allocator) { uint8_t listHeader[8]; read(is, listHeader, 8 * sizeof(uint8_t)); if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) { throw std::invalid_argument("Incorrect number of preInts in input stream"); } if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) { throw std::invalid_argument("Wrong ser ver in input stream"); } if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) { throw std::invalid_argument("Input stream is not an HLL sketch"); } hll_mode mode = HllSketchImpl::extractCurMode(listHeader[hll_constants::MODE_BYTE]); if (mode != LIST) { throw std::invalid_argument("Calling list constructor with non-list mode data"); } const target_hll_type tgtHllType = HllSketchImpl::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]); const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE]; const bool compact = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false); const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false); const bool emptyFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ? true : false); ClAlloc cla(allocator); CouponList* sketch = new (cla.allocate(1)) CouponList(lgK, tgtHllType, mode, allocator); using coupon_list_ptr = std::unique_ptr, std::function*)>>; coupon_list_ptr ptr(sketch, sketch->get_deleter()); const uint32_t couponCount = listHeader[hll_constants::LIST_COUNT_BYTE]; sketch->couponCount_ = couponCount; sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST if (!emptyFlag) { // For stream processing, need to read entire number written to stream so read // pointer ends up set correctly. // If not compact, still need to read empty items even though in order. const uint32_t numToRead = (compact ? couponCount : static_cast(sketch->coupons_.size())); read(is, sketch->coupons_.data(), numToRead * sizeof(uint32_t)); } if (!is.good()) throw std::runtime_error("error reading from std::istream"); return ptr.release(); } template auto CouponList::serialize(bool compact, unsigned header_size_bytes) const -> vector_bytes { const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes; vector_bytes byteArr(sketchSizeBytes, 0, getAllocator()); uint8_t* bytes = byteArr.data() + header_size_bytes; bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast(getPreInts()); bytes[hll_constants::SER_VER_BYTE] = static_cast(hll_constants::SER_VER); bytes[hll_constants::FAMILY_BYTE] = static_cast(hll_constants::FAMILY_ID); bytes[hll_constants::LG_K_BYTE] = static_cast(this->lgConfigK_); bytes[hll_constants::LG_ARR_BYTE] = count_trailing_zeros_in_u32(static_cast(coupons_.size())); bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact); bytes[hll_constants::LIST_COUNT_BYTE] = static_cast(this->mode_ == LIST ? couponCount_ : 0); bytes[hll_constants::MODE_BYTE] = this->makeModeByte(); if (this->mode_ == SET) { std::memcpy(bytes + hll_constants::HASH_SET_COUNT_INT, &couponCount_, sizeof(couponCount_)); } // coupons // isCompact() is always false for now const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); switch (sw) { case 0: { // src updatable, dst updatable std::memcpy(bytes + getMemDataStart(), coupons_.data(), coupons_.size() * sizeof(uint32_t)); break; } case 1: { // src updatable, dst compact bytes += getMemDataStart(); // reusing pointer for incremental writes for (const uint32_t coupon: *this) { std::memcpy(bytes, &coupon, sizeof(coupon)); bytes += sizeof(coupon); } break; } default: throw std::runtime_error("Impossible condition when serializing"); } return byteArr; } template void CouponList::serialize(std::ostream& os, const bool compact) const { // header const uint8_t preInts = getPreInts(); write(os, preInts); const uint8_t serialVersion(hll_constants::SER_VER); write(os, serialVersion); const uint8_t familyId(hll_constants::FAMILY_ID); write(os, familyId); const uint8_t lgKByte = this->lgConfigK_; write(os, lgKByte); const uint8_t lgArrIntsByte = count_trailing_zeros_in_u32(static_cast(coupons_.size())); write(os, lgArrIntsByte); const uint8_t flagsByte = this->makeFlagsByte(compact); write(os, flagsByte); if (this->mode_ == LIST) { const uint8_t listCount = static_cast(couponCount_); write(os, listCount); } else { // mode == SET const uint8_t unused = 0; write(os, unused); } const uint8_t modeByte = this->makeModeByte(); write(os, modeByte); if (this->mode_ == SET) { // writing as int, already stored as int write(os, couponCount_); } // coupons // isCompact() is always false for now const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); switch (sw) { case 0: { // src updatable, dst updatable write(os, coupons_.data(), coupons_.size() * sizeof(uint32_t)); break; } case 1: { // src updatable, dst compact for (const uint32_t coupon: *this) { write(os, coupon); } break; } default: throw std::runtime_error("Impossible condition when serializing"); } return; } template HllSketchImpl* CouponList::couponUpdate(uint32_t coupon) { for (size_t i = 0; i < coupons_.size(); ++i) { // search for empty slot const uint32_t couponAtIdx = coupons_[i]; if (couponAtIdx == hll_constants::EMPTY) { coupons_[i] = coupon; // the actual update ++couponCount_; if (couponCount_ == static_cast(coupons_.size())) { // array full if (this->lgConfigK_ < 8) { return promoteHeapListOrSetToHll(*this); } return promoteHeapListToSet(*this); } return this; } // cell not empty if (couponAtIdx == coupon) { return this; // duplicate } // cell not empty and not a duplicate, continue } throw std::runtime_error("Array invalid: no empties and no duplicates"); } template double CouponList::getCompositeEstimate() const { return getEstimate(); } template double CouponList::getEstimate() const { const double est = CubicInterpolation::usingXAndYTables(couponCount_); return fmax(est, couponCount_); } template double CouponList::getLowerBound(uint8_t numStdDev) const { HllUtil::checkNumStdDev(numStdDev); const double est = CubicInterpolation::usingXAndYTables(couponCount_); const double tmp = est / (1.0 + (numStdDev * hll_constants::COUPON_RSE)); return fmax(tmp, couponCount_); } template double CouponList::getUpperBound(uint8_t numStdDev) const { HllUtil::checkNumStdDev(numStdDev); const double est = CubicInterpolation::usingXAndYTables(couponCount_); const double tmp = est / (1.0 - (numStdDev * hll_constants::COUPON_RSE)); return fmax(tmp, couponCount_); } template bool CouponList::isEmpty() const { return getCouponCount() == 0; } template uint32_t CouponList::getUpdatableSerializationBytes() const { return getMemDataStart() + static_cast(coupons_.size()) * sizeof(uint32_t); } template uint32_t CouponList::getCouponCount() const { return couponCount_; } template uint32_t CouponList::getCompactSerializationBytes() const { return getMemDataStart() + (couponCount_ << 2); } template uint32_t CouponList::getMemDataStart() const { return hll_constants::LIST_INT_ARR_START; } template uint8_t CouponList::getPreInts() const { return hll_constants::LIST_PREINTS; } template bool CouponList::isCompact() const { return false; } template bool CouponList::isOutOfOrderFlag() const { return oooFlag_; } template void CouponList::putOutOfOrderFlag(bool oooFlag) { oooFlag_ = oooFlag; } template A CouponList::getAllocator() const { return coupons_.get_allocator(); } template HllSketchImpl* CouponList::promoteHeapListToSet(CouponList& list) { return HllSketchImplFactory::promoteListToSet(list); } template HllSketchImpl* CouponList::promoteHeapListOrSetToHll(CouponList& src) { return HllSketchImplFactory::promoteListOrSetToHll(src); } template coupon_iterator CouponList::begin(bool all) const { return coupon_iterator(coupons_.data(), coupons_.size(), 0, all); } template coupon_iterator CouponList::end() const { return coupon_iterator(coupons_.data(), coupons_.size(), coupons_.size(), false); } } #endif // _COUPONLIST_INTERNAL_HPP_