/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef _COUPONLIST_INTERNAL_HPP_ #define _COUPONLIST_INTERNAL_HPP_ #include "CouponList.hpp" #include "CubicInterpolation.hpp" #include "HllUtil.hpp" #include "IntArrayPairIterator.hpp" #include #include #include #include namespace datasketches { template CouponList::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode) : HllSketchImpl(lgConfigK, tgtHllType, mode, false) { if (mode == hll_mode::LIST) { lgCouponArrInts = HllUtil::LG_INIT_LIST_SIZE; oooFlag = false; } else { // mode == SET lgCouponArrInts = HllUtil::LG_INIT_SET_SIZE; oooFlag = true; } const int arrayLen = 1 << lgCouponArrInts; typedef typename std::allocator_traits::template rebind_alloc intAlloc; couponIntArr = intAlloc().allocate(arrayLen); std::fill(couponIntArr, couponIntArr + arrayLen, 0); couponCount = 0; } template CouponList::CouponList(const CouponList& that) : HllSketchImpl(that.lgConfigK, that.tgtHllType, that.mode, false), lgCouponArrInts(that.lgCouponArrInts), couponCount(that.couponCount), oooFlag(that.oooFlag) { const int numItems = 1 << lgCouponArrInts; typedef typename std::allocator_traits::template rebind_alloc intAlloc; couponIntArr = intAlloc().allocate(numItems); std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr); } template CouponList::CouponList(const CouponList& that, const target_hll_type tgtHllType) : HllSketchImpl(that.lgConfigK, tgtHllType, that.mode, false), lgCouponArrInts(that.lgCouponArrInts), couponCount(that.couponCount), oooFlag(that.oooFlag) { const int numItems = 1 << lgCouponArrInts; typedef typename std::allocator_traits::template rebind_alloc intAlloc; couponIntArr = intAlloc().allocate(numItems); std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr); } template CouponList::~CouponList() { typedef typename std::allocator_traits::template rebind_alloc intAlloc; intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts); } template std::function*)> CouponList::get_deleter() const { return [](HllSketchImpl* ptr) { CouponList* cl = static_cast*>(ptr); cl->~CouponList(); clAlloc().deallocate(cl, 1); }; } template CouponList* CouponList::copy() const { return new (clAlloc().allocate(1)) CouponList(*this); } template CouponList* CouponList::copyAs(target_hll_type tgtHllType) const { return new (clAlloc().allocate(1)) CouponList(*this, tgtHllType); } template CouponList* CouponList::newList(const void* bytes, size_t len) { if (len < HllUtil::LIST_INT_ARR_START) { throw std::invalid_argument("Input data length insufficient to hold CouponHashSet"); } const uint8_t* data = static_cast(bytes); if (data[HllUtil::PREAMBLE_INTS_BYTE] != HllUtil::LIST_PREINTS) { throw std::invalid_argument("Incorrect number of preInts in input stream"); } if (data[HllUtil::SER_VER_BYTE] != HllUtil::SER_VER) { throw std::invalid_argument("Wrong ser ver in input stream"); } if (data[HllUtil::FAMILY_BYTE] != HllUtil::FAMILY_ID) { throw std::invalid_argument("Input stream is not an HLL sketch"); } hll_mode mode = HllSketchImpl::extractCurMode(data[HllUtil::MODE_BYTE]); if (mode != LIST) { throw std::invalid_argument("Calling set construtor with non-list mode data"); } target_hll_type tgtHllType = HllSketchImpl::extractTgtHllType(data[HllUtil::MODE_BYTE]); const int lgK = data[HllUtil::LG_K_BYTE]; const bool compact = ((data[HllUtil::FLAGS_BYTE] & HllUtil::COMPACT_FLAG_MASK) ? true : false); const bool oooFlag = ((data[HllUtil::FLAGS_BYTE] & HllUtil::OUT_OF_ORDER_FLAG_MASK) ? true : false); const bool emptyFlag = ((data[HllUtil::FLAGS_BYTE] & HllUtil::EMPTY_FLAG_MASK) ? true : false); const int couponCount = data[HllUtil::LIST_COUNT_BYTE]; const int couponsInArray = (compact ? couponCount : (1 << HllUtil::computeLgArrInts(LIST, couponCount, lgK))); const size_t expectedLength = HllUtil::LIST_INT_ARR_START + (couponsInArray * sizeof(int)); if (len < expectedLength) { throw std::invalid_argument("Byte array too short for sketch. Expected " + std::to_string(expectedLength) + ", found: " + std::to_string(len)); } CouponList* sketch = new (clAlloc().allocate(1)) CouponList(lgK, tgtHllType, mode); sketch->couponCount = couponCount; sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST if (!emptyFlag) { // only need to read valid coupons, unlike in stream case std::memcpy(sketch->couponIntArr, data + HllUtil::LIST_INT_ARR_START, couponCount * sizeof(int)); } return sketch; } template CouponList* CouponList::newList(std::istream& is) { uint8_t listHeader[8]; is.read((char*)listHeader, 8 * sizeof(uint8_t)); if (listHeader[HllUtil::PREAMBLE_INTS_BYTE] != HllUtil::LIST_PREINTS) { throw std::invalid_argument("Incorrect number of preInts in input stream"); } if (listHeader[HllUtil::SER_VER_BYTE] != HllUtil::SER_VER) { throw std::invalid_argument("Wrong ser ver in input stream"); } if (listHeader[HllUtil::FAMILY_BYTE] != HllUtil::FAMILY_ID) { throw std::invalid_argument("Input stream is not an HLL sketch"); } hll_mode mode = HllSketchImpl::extractCurMode(listHeader[HllUtil::MODE_BYTE]); if (mode != LIST) { throw std::invalid_argument("Calling list construtor with non-list mode data"); } const target_hll_type tgtHllType = HllSketchImpl::extractTgtHllType(listHeader[HllUtil::MODE_BYTE]); const int lgK = (int) listHeader[HllUtil::LG_K_BYTE]; const bool compact = ((listHeader[HllUtil::FLAGS_BYTE] & HllUtil::COMPACT_FLAG_MASK) ? true : false); const bool oooFlag = ((listHeader[HllUtil::FLAGS_BYTE] & HllUtil::OUT_OF_ORDER_FLAG_MASK) ? true : false); const bool emptyFlag = ((listHeader[HllUtil::FLAGS_BYTE] & HllUtil::EMPTY_FLAG_MASK) ? true : false); CouponList* sketch = new (clAlloc().allocate(1)) CouponList(lgK, tgtHllType, mode); const int couponCount = listHeader[HllUtil::LIST_COUNT_BYTE]; sketch->couponCount = couponCount; sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST if (!emptyFlag) { // For stream processing, need to read entire number written to stream so read // pointer ends up set correctly. // If not compact, still need to read empty items even though in order. const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts)); is.read((char*)sketch->couponIntArr, numToRead * sizeof(int)); } return sketch; } template std::pair>, const size_t> CouponList::serialize(bool compact, unsigned header_size_bytes) const { const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes; typedef typename std::allocator_traits::template rebind_alloc uint8Alloc; std::unique_ptr> byteArr( uint8Alloc().allocate(sketchSizeBytes), [sketchSizeBytes](uint8_t* p){ uint8Alloc().deallocate(p, sketchSizeBytes); } ); uint8_t* bytes = byteArr.get() + header_size_bytes; bytes[HllUtil::PREAMBLE_INTS_BYTE] = static_cast(getPreInts()); bytes[HllUtil::SER_VER_BYTE] = static_cast(HllUtil::SER_VER); bytes[HllUtil::FAMILY_BYTE] = static_cast(HllUtil::FAMILY_ID); bytes[HllUtil::LG_K_BYTE] = static_cast(this->lgConfigK); bytes[HllUtil::LG_ARR_BYTE] = static_cast(lgCouponArrInts); bytes[HllUtil::FLAGS_BYTE] = this->makeFlagsByte(compact); bytes[HllUtil::LIST_COUNT_BYTE] = static_cast(this->mode == LIST ? couponCount : 0); bytes[HllUtil::MODE_BYTE] = this->makeModeByte(); if (this->mode == SET) { std::memcpy(bytes + HllUtil::HASH_SET_COUNT_INT, &couponCount, sizeof(couponCount)); } // coupons // isCompact() is always false for now const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); switch (sw) { case 0: { // src updatable, dst updatable std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int)); break; } case 1: { // src updatable, dst compact pair_iterator_with_deleter itr = getIterator(); bytes += getMemDataStart(); // reusing ponter for incremental writes while (itr->nextValid()) { const int pairValue = itr->getPair(); std::memcpy(bytes, &pairValue, sizeof(pairValue)); bytes += sizeof(pairValue); } break; } default: throw std::runtime_error("Impossible condition when serializing"); } return std::make_pair(std::move(byteArr), sketchSizeBytes); } template void CouponList::serialize(std::ostream& os, const bool compact) const { // header const uint8_t preInts(getPreInts()); os.write((char*)&preInts, sizeof(preInts)); const uint8_t serialVersion(HllUtil::SER_VER); os.write((char*)&serialVersion, sizeof(serialVersion)); const uint8_t familyId(HllUtil::FAMILY_ID); os.write((char*)&familyId, sizeof(familyId)); const uint8_t lgKByte((uint8_t) this->lgConfigK); os.write((char*)&lgKByte, sizeof(lgKByte)); const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts); os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte)); const uint8_t flagsByte(this->makeFlagsByte(compact)); os.write((char*)&flagsByte, sizeof(flagsByte)); if (this->mode == LIST) { const uint8_t listCount((uint8_t) couponCount); os.write((char*)&listCount, sizeof(listCount)); } else { // mode == SET const uint8_t unused(0); os.write((char*)&unused, sizeof(unused)); } const uint8_t modeByte(this->makeModeByte()); os.write((char*)&modeByte, sizeof(modeByte)); if (this->mode == SET) { // writing as int, already stored as int os.write((char*)&couponCount, sizeof(couponCount)); } // coupons // isCompact() is always false for now const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); switch (sw) { case 0: { // src updatable, dst updatable os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int)); break; } case 1: { // src updatable, dst compact pair_iterator_with_deleter itr = getIterator(); while (itr->nextValid()) { const int pairValue = itr->getPair(); os.write((char*)&pairValue, sizeof(pairValue)); } break; } default: throw std::runtime_error("Impossible condition when serializing"); } return; } template HllSketchImpl* CouponList::couponUpdate(int coupon) { const int len = 1 << lgCouponArrInts; for (int i = 0; i < len; ++i) { // search for empty slot const int couponAtIdx = couponIntArr[i]; if (couponAtIdx == HllUtil::EMPTY) { couponIntArr[i] = coupon; // the actual update ++couponCount; if (couponCount >= len) { // array full if (this->lgConfigK < 8) { return promoteHeapListOrSetToHll(*this); // oooFlag = false } return promoteHeapListToSet(*this); // oooFlag = true; } return this; } // cell not empty if (couponAtIdx == coupon) { return this; // duplicate } // cell not empty and not a duplicate, continue } throw std::runtime_error("Array invalid: no empties and no duplicates"); } template double CouponList::getCompositeEstimate() const { return getEstimate(); } template double CouponList::getEstimate() const { const int couponCount = getCouponCount(); const double est = CubicInterpolation::usingXAndYTables(couponCount); return fmax(est, couponCount); } template double CouponList::getLowerBound(const int numStdDev) const { HllUtil::checkNumStdDev(numStdDev); const int couponCount = getCouponCount(); const double est = CubicInterpolation::usingXAndYTables(couponCount); const double tmp = est / (1.0 + (numStdDev * HllUtil::COUPON_RSE)); return fmax(tmp, couponCount); } template double CouponList::getUpperBound(const int numStdDev) const { HllUtil::checkNumStdDev(numStdDev); const int couponCount = getCouponCount(); const double est = CubicInterpolation::usingXAndYTables(couponCount); const double tmp = est / (1.0 - (numStdDev * HllUtil::COUPON_RSE)); return fmax(tmp, couponCount); } template bool CouponList::isEmpty() const { return getCouponCount() == 0; } template int CouponList::getUpdatableSerializationBytes() const { return getMemDataStart() + (4 << getLgCouponArrInts()); } template int CouponList::getCouponCount() const { return couponCount; } template int CouponList::getCompactSerializationBytes() const { return getMemDataStart() + (couponCount << 2); } template int CouponList::getMemDataStart() const { return HllUtil::LIST_INT_ARR_START; } template int CouponList::getPreInts() const { return HllUtil::LIST_PREINTS; } template bool CouponList::isCompact() const { return false; } template bool CouponList::isOutOfOrderFlag() const { return oooFlag; } template void CouponList::putOutOfOrderFlag(bool oooFlag) { this->oooFlag = oooFlag; } template int CouponList::getLgCouponArrInts() const { return lgCouponArrInts; } template int* CouponList::getCouponIntArr() const { return couponIntArr; } template pair_iterator_with_deleter CouponList::getIterator() const { typedef typename std::allocator_traits::template rebind_alloc> iapiAlloc; IntArrayPairIterator* itr = new (iapiAlloc().allocate(1)) IntArrayPairIterator(couponIntArr, 1 << lgCouponArrInts, this->lgConfigK); return pair_iterator_with_deleter( itr, [](PairIterator* ptr) { IntArrayPairIterator* iapi = static_cast*>(ptr); iapi->~IntArrayPairIterator(); iapiAlloc().deallocate(iapi, 1); } ); } template HllSketchImpl* CouponList::promoteHeapListToSet(CouponList& list) { return HllSketchImplFactory::promoteListToSet(list); } template HllSketchImpl* CouponList::promoteHeapListOrSetToHll(CouponList& src) { return HllSketchImplFactory::promoteListOrSetToHll(src); } } #endif // _COUPONLIST_INTERNAL_HPP_