/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef _AUXHASHMAP_INTERNAL_HPP_ #define _AUXHASHMAP_INTERNAL_HPP_ #include "HllUtil.hpp" #include "AuxHashMap.hpp" #include "PairIterator.hpp" #include "IntArrayPairIterator.hpp" #include #include #include namespace datasketches { template AuxHashMap::AuxHashMap(int lgAuxArrInts, int lgConfigK) : lgConfigK(lgConfigK), lgAuxArrInts(lgAuxArrInts), auxCount(0) { typedef typename std::allocator_traits::template rebind_alloc intAlloc; const int numItems = 1 << lgAuxArrInts; auxIntArr = intAlloc().allocate(numItems); std::fill(auxIntArr, auxIntArr + numItems, 0); } template AuxHashMap* AuxHashMap::newAuxHashMap(int lgAuxArrInts, int lgConfigK) { return new (ahmAlloc().allocate(1)) AuxHashMap(lgAuxArrInts, lgConfigK); } template AuxHashMap::AuxHashMap(const AuxHashMap& that) : lgConfigK(that.lgConfigK), lgAuxArrInts(that.lgAuxArrInts), auxCount(that.auxCount) { typedef typename std::allocator_traits::template rebind_alloc intAlloc; const int numItems = 1 << lgAuxArrInts; auxIntArr = intAlloc().allocate(numItems); std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr); } template AuxHashMap* AuxHashMap::newAuxHashMap(const AuxHashMap& that) { return new (ahmAlloc().allocate(1)) AuxHashMap(that); } template AuxHashMap* AuxHashMap::deserialize(const void* bytes, size_t len, int lgConfigK, int auxCount, int lgAuxArrInts, bool srcCompact) { int lgArrInts = lgAuxArrInts; if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input lgArrInts = HllUtil::computeLgArrInts(HLL, auxCount, lgConfigK); } else { // updatable lgArrInts = lgAuxArrInts; } int configKmask = (1 << lgConfigK) - 1; AuxHashMap* auxHashMap; const int* auxPtr = static_cast(bytes); if (srcCompact) { if (len < auxCount * sizeof(int)) { throw std::invalid_argument("Input array too small to hold AuxHashMap image"); } auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap(lgArrInts, lgConfigK); for (int i = 0; i < auxCount; ++i) { int pair = auxPtr[i]; int slotNo = HllUtil::getLow26(pair) & configKmask; int value = HllUtil::getValue(pair); auxHashMap->mustAdd(slotNo, value); } } else { // updatable int itemsToRead = 1 << lgAuxArrInts; if (len < itemsToRead * sizeof(int)) { throw std::invalid_argument("Input array too small to hold AuxHashMap image"); } auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap(lgArrInts, lgConfigK); for (int i = 0; i < itemsToRead; ++i) { int pair = auxPtr[i]; if (pair == HllUtil::EMPTY) { continue; } int slotNo = HllUtil::getLow26(pair) & configKmask; int value = HllUtil::getValue(pair); auxHashMap->mustAdd(slotNo, value); } } if (auxHashMap->getAuxCount() != auxCount) { make_deleter()(auxHashMap); throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries"); } return auxHashMap; } template AuxHashMap* AuxHashMap::deserialize(std::istream& is, const int lgConfigK, const int auxCount, const int lgAuxArrInts, const bool srcCompact) { int lgArrInts = lgAuxArrInts; if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input lgArrInts = HllUtil::computeLgArrInts(HLL, auxCount, lgConfigK); } else { // updatable lgArrInts = lgAuxArrInts; } // TODO: truncated stream will throw exception without freeing memory AuxHashMap* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap(lgArrInts, lgConfigK); int configKmask = (1 << lgConfigK) - 1; if (srcCompact) { int pair; for (int i = 0; i < auxCount; ++i) { is.read((char*)&pair, sizeof(pair)); int slotNo = HllUtil::getLow26(pair) & configKmask; int value = HllUtil::getValue(pair); auxHashMap->mustAdd(slotNo, value); } } else { // updatable int itemsToRead = 1 << lgAuxArrInts; int pair; for (int i = 0; i < itemsToRead; ++i) { is.read((char*)&pair, sizeof(pair)); if (pair == HllUtil::EMPTY) { continue; } int slotNo = HllUtil::getLow26(pair) & configKmask; int value = HllUtil::getValue(pair); auxHashMap->mustAdd(slotNo, value); } } if (auxHashMap->getAuxCount() != auxCount) { make_deleter()(auxHashMap); throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries"); } return auxHashMap; } template AuxHashMap::~AuxHashMap() { // should be no way to have an object without a valid array typedef typename std::allocator_traits::template rebind_alloc intAlloc; intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts); } template std::function*)> AuxHashMap::make_deleter() { return [](AuxHashMap* ptr) { ptr->~AuxHashMap(); ahmAlloc().deallocate(ptr, 1); }; } template AuxHashMap* AuxHashMap::copy() const { return new (ahmAlloc().allocate(1)) AuxHashMap(*this); } template int AuxHashMap::getAuxCount() const { return auxCount; } template int* AuxHashMap::getAuxIntArr(){ return auxIntArr; } template int AuxHashMap::getLgAuxArrInts() const { return lgAuxArrInts; } template int AuxHashMap::getCompactSizeBytes() const { return auxCount << 2; } template int AuxHashMap::getUpdatableSizeBytes() const { return 4 << lgAuxArrInts; } template std::unique_ptr, std::function*)>> AuxHashMap::getIterator() const { typedef typename std::allocator_traits::template rebind_alloc> iapiAlloc; IntArrayPairIterator* itr = new (iapiAlloc().allocate(1)) IntArrayPairIterator(auxIntArr, 1 << lgAuxArrInts, lgConfigK); return std::unique_ptr, std::function*)>>( itr, [](PairIterator* ptr) { IntArrayPairIterator* itr = static_cast*>(ptr); itr->~IntArrayPairIterator(); iapiAlloc().deallocate(itr, 1); } ); } template void AuxHashMap::mustAdd(const int slotNo, const int value) { const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo); const int entry_pair = HllUtil::pair(slotNo, value); if (index >= 0) { throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: " + std::to_string(slotNo) + ", Value: " + std::to_string(value)); } // found empty entry auxIntArr[~index] = entry_pair; ++auxCount; checkGrow(); } template int AuxHashMap::mustFindValueFor(const int slotNo) { const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo); if (index >= 0) { return HllUtil::getValue(auxIntArr[index]); } throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo)); } template void AuxHashMap::mustReplace(const int slotNo, const int value) { const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo); if (idx >= 0) { auxIntArr[idx] = HllUtil::pair(slotNo, value); return; } throw std::invalid_argument("Pair not found: SlotNo: " + std::to_string(slotNo) + ", Value: " + std::to_string(value)); } template void AuxHashMap::checkGrow() { if ((HllUtil::RESIZE_DENOM * auxCount) > (HllUtil::RESIZE_NUMER * (1 << lgAuxArrInts))) { growAuxSpace(); } } template void AuxHashMap::growAuxSpace() { int* oldArray = auxIntArr; const int oldArrLen = 1 << lgAuxArrInts; const int configKmask = (1 << lgConfigK) - 1; const int newArrLen = 1 << ++lgAuxArrInts; typedef typename std::allocator_traits::template rebind_alloc intAlloc; auxIntArr = intAlloc().allocate(newArrLen); std::fill(auxIntArr, auxIntArr + newArrLen, 0); for (int i = 0; i < oldArrLen; ++i) { const int fetched = oldArray[i]; if (fetched != HllUtil::EMPTY) { // find empty in new array const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask); auxIntArr[~idx] = fetched; } } intAlloc().deallocate(oldArray, oldArrLen); } //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context. //If entire entry is empty, returns one's complement of index = found empty. //If entry contains given slotNo, returns its index = found slotNo. //Continues searching. //If the probe comes back to original index, throws an exception. template int AuxHashMap::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK, const int slotNo) { const int auxArrMask = (1 << lgAuxArrInts) - 1; const int configKmask = (1 << lgConfigK) - 1; int probe = slotNo & auxArrMask; const int loopIndex = probe; do { const int arrVal = auxArr[probe]; if (arrVal == HllUtil::EMPTY) { //Compares on entire entry return ~probe; //empty } else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo return probe; //found given slotNo, return probe = index into aux array } const int stride = (slotNo >> lgAuxArrInts) | 1; probe = (probe + stride) & auxArrMask; } while (probe != loopIndex); throw std::runtime_error("Key not found and no empty slots!"); } } #endif // _AUXHASHMAP_INTERNAL_HPP_