Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added get_serialized_size_bytes() #428

Merged
merged 5 commits into from
Mar 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions tdigest/include/tdigest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
#ifndef _TDIGEST_HPP_
#define _TDIGEST_HPP_

#include <type_traits>
#include <cstddef>
#include <limits>
#include <type_traits>
#include <vector>

#include "common_defs.hpp"

Expand Down Expand Up @@ -84,6 +86,7 @@ class tdigest {
T mean_;
W weight_;
};
using vector_t = std::vector<T, Allocator>;
using vector_centroid = std::vector<centroid, typename std::allocator_traits<Allocator>::template rebind_alloc<centroid>>;
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>>;

Expand Down Expand Up @@ -165,20 +168,29 @@ class tdigest {
*/
string<Allocator> to_string(bool print_centroids = false) const;

/**
* Computes size needed to serialize the current state.
* @param with_buffer optionally serialize buffered values avoiding compression
* @return size in bytes needed to serialize this tdigest
*/
size_t get_serialized_size_bytes(bool with_buffer = false) const;

/**
* This method serializes t-Digest into a given stream in a binary form
* @param os output stream
* @param with_buffer optionally serialize buffered values avoiding compression
*/
void serialize(std::ostream& os) const;
void serialize(std::ostream& os, bool with_buffer = false) const;

/**
* This method serializes t-Digest as a vector of bytes.
* An optional header can be reserved in front of the sketch.
* It is an uninitialized space of a given size.
* @param header_size_bytes space to reserve in front of the sketch
* @param with_buffer optionally serialize buffered values avoiding compression
* @return serialized sketch as a vector of bytes
*/
vector_bytes serialize(unsigned header_size_bytes = 0) const;
vector_bytes serialize(unsigned header_size_bytes = 0, bool with_buffer = false) const;

/**
* This method deserializes t-Digest from a given stream.
Expand All @@ -198,7 +210,6 @@ class tdigest {
static tdigest deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());

private:
Allocator allocator_;
bool reverse_merge_;
uint16_t k_;
uint16_t internal_k_;
Expand All @@ -208,8 +219,9 @@ class tdigest {
vector_centroid centroids_;
uint64_t centroids_weight_;
size_t buffer_capacity_;
vector_centroid buffer_;
uint64_t buffered_weight_;
vector_t buffer_;

static const size_t BUFFER_MULTIPLIER = 4;

static const uint8_t PREAMBLE_LONGS_EMPTY_OR_SINGLE = 1;
static const uint8_t PREAMBLE_LONGS_MULTIPLE = 2;
Expand All @@ -222,11 +234,11 @@ class tdigest {
enum flags { IS_EMPTY, IS_SINGLE_VALUE, REVERSE_MERGE };

bool is_single_value() const;
uint8_t get_preamble_longs() const;
void merge(vector_centroid& buffer, W weight);

// for deserialize
tdigest(bool reverse_merge, uint16_t k, T min, T max, vector_centroid&& centroids, uint64_t total_weight_, const Allocator& allocator);

void merge_buffered();
tdigest(bool reverse_merge, uint16_t k, T min, T max, vector_centroid&& centroids, uint64_t total_weight_, vector_t&& buffer);

static double weighted_average(double x1, double w1, double x2, double w2);

Expand Down
Loading
Loading