/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ #ifndef _EBPPS_SAMPLE_HPP_ #define _EBPPS_SAMPLE_HPP_ #include "common_defs.hpp" #include "optional.hpp" #include "serde.hpp" #include #include namespace datasketches { template< typename T, typename A = std::allocator > class ebpps_sample { public: explicit ebpps_sample(uint32_t k, const A& allocator = A()); // constructor used to create a sample to merge one itme template ebpps_sample(TT&& item, double theta, const A& allocator = A()); // for deserialization class items_deleter; ebpps_sample(std::vector&& data, optional&& partial_item, double c, const A& allocator = A()); void reset(); void downsample(double theta); template void merge(FwdSample&& other); // standard way to query the sample using result_type = std::vector; result_type get_sample() const; double get_c() const; // intended for internal use // returns only full items result_type get_full_items() const; // intended for internal use // handles only the partial item bool has_partial_item() const; T get_partial_item() const; string to_string() const; /** * @brief Returns the number of items contained in the sample * Computes the number of items, full or partial, currently in the sample. * The result should match ceiling(c); * @return the number of items contained in the sample */ inline uint32_t get_num_retained_items() const; /** * Computes size needed to serialize the current state of the sample. * This version is for fixed-size arithmetic types (integral and floating point). * @param sd instance of a SerDe * @return size in bytes needed to serialize the items in this sample */ template, typename std::enable_if::value, int>::type = 0> inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const; /** * Computes size needed to serialize the items in the sample. * This version is for all other types and can be expensive since every item needs to be looked at. * @param sd instance of a SerDe * @return size in bytes needed to serialize the items in this sample */ template, typename std::enable_if::value, int>::type = 0> inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const; // This is a convenience alias for users // The type returned by the following serialize method using vector_bytes = std::vector::template rebind_alloc>; /** * This method serializes the sample as a vector of bytes. * @param ptr pointer to start of pre-allocated memory to use * @param end_ptr pointer to end of pre-allocated memory to use * @param sd instance of a SerDe * @return number of bytes written */ template> size_t serialize(uint8_t* ptr, const uint8_t* end_ptr, const SerDe& sd = SerDe()) const; /** * This method serializes the sample into a given stream in a binary form * @param os output stream * @param sd instance of a SerDe */ template> void serialize(std::ostream& os, const SerDe& sd = SerDe()) const; /** * This method deserializes a sample from a given array of bytes. * @param ptr pointer to the array of bytes * @param size the size of the array * @param sd instance of a SerDe * @param allocator instance of an allocator * @return a std::pair with a sample and the number of bytes read */ template> static std::pair deserialize(const uint8_t* ptr, size_t size, const SerDe& sd = SerDe(), const A& allocator = A()); /** * This method deserializes a sample from a given stream. * @param is input stream * @param sd instance of a SerDe * @param allocator instance of an allocator * @return an instance of a sample */ template> static ebpps_sample deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A()); class const_iterator; /** * Iterator pointing to the first item in the sample. * If the sample is empty, the returned iterator must not be dereferenced or incremented * @param force_partial forces the inclusion of the partial item, if one exists * @return iterator pointing to the first item in the sample */ const_iterator begin() const; /** * Iterator pointing to the past-the-end item in the sample. * The past-the-end item is the hypothetical item that would follow the last item. * It does not point to any item, and must not be dereferenced or incremented. * @return iterator pointing to the past-the-end item in the sample */ const_iterator end() const; private: A allocator_; double c_; // Current sample size, including fractional part optional partial_item_; // a sample item corresponding to a partial weight std::vector data_; // stored sampled items template inline void set_partial(FwdItem&& item); void swap_with_partial(); void move_one_to_partial(); void subsample(uint32_t num_samples); static inline uint32_t random_idx(uint32_t max); static inline double next_double(); friend class const_iterator; }; template class ebpps_sample::const_iterator { public: using iterator_category = std::input_iterator_tag; using value_type = const T&; using difference_type = void; using pointer = const return_value_holder; using reference = value_type; const_iterator(const const_iterator& other); const_iterator& operator++(); const_iterator& operator++(int); bool operator==(const const_iterator& other) const; bool operator!=(const const_iterator& other) const; reference operator*() const; pointer operator->() const; private: static const size_t PARTIAL_IDX = static_cast(-1); // default iterator over sample const_iterator(const ebpps_sample* sample); const ebpps_sample* sample_; size_t idx_; bool use_partial_; friend class ebpps_sample; }; } // namespace datasketches #include "ebpps_sample_impl.hpp" #endif // _EBPPS_SAMPLE_HPP_