Statistics (DDSketch, Histogram)

Namespace: dftracer::utils::utilities::common::statistics

class DDSketch

Public Functions

explicit DDSketch(double relative_accuracy = 0.01)
void add(double value, double weight = 1.0)
void merge(const DDSketch &other)
double quantile(double q) const
void reset()
inline std::uint64_t count() const
inline bool empty() const
inline double min() const
inline double max() const
std::size_t memory_usage() const
std::vector<std::uint8_t> serialize() const
void serialize_into(std::vector<std::uint8_t> &buf) const

Public Static Functions

static DDSketch deserialize(const std::uint8_t *data, std::size_t len)

Public Static Attributes

static constexpr int MAX_BINS = 128
struct FittedDistribution

Public Members

DistributionKind kind
std::array<double, 3> params = {}
double ks_stat = 1.0
double log_likelihood = 0.0
double bic = 0.0
bool valid = false
struct FittedMixture

Public Members

std::vector<double> weights
std::vector<GmmComponent> components
double log_likelihood = 0.0
double bic = 0.0
int iterations = 0
bool converged = false
bool valid = false
struct GmmComponent

Public Members

double mean = 0.0
double stddev = 0.0
class Log2Histogram

Log2-scale histogram with 65 fixed bins.

Bin 0: value == 0 Bin k (1 <= k <= 64): values in [2^(k-1), 2^k)

Covers the full uint64_t range (0 to 2^63). Suitable for both microsecond durations and byte sizes.

Public Functions

Log2Histogram() = default
void add(std::uint64_t value, std::uint64_t count = 1)
void merge(const Log2Histogram &other)
double approx_percentile(double p) const
std::string render_ascii(std::size_t max_width, const std::string &unit) const
std::string render_blocks(std::size_t max_width, const std::string &unit, const std::string &indent = "      ") const
std::string to_json() const
inline std::uint64_t total_count() const
inline const std::array<std::uint64_t, NUM_BINS> &bins() const

Public Static Functions

static Log2Histogram from_json(const std::string &json)
static std::size_t bin_index(std::uint64_t value)
static std::uint64_t bin_lower(std::size_t bin)
static std::uint64_t bin_upper(std::size_t bin)

Public Static Attributes

static constexpr std::size_t NUM_BINS = 65
struct MixtureFitOptions

Public Members

int max_iter = 200
double tol = 1e-6
double variance_floor = 1e-12
std::uint64_t seed = 0xC0FFEE
struct ModelSelection

Public Members

BestModel model
double bic = 0.0
int free_params = 0
class Statistic

Public Functions

Statistic() = default
inline void attach_sketch(std::shared_ptr<const DDSketch> sketch)
inline void update(double value)
inline double quantile(double q) const
inline double min() const
inline double max() const
inline double mean() const
inline std::uint64_t count() const
class TimestampHistogram

Public Functions

TimestampHistogram() = default
void add(std::uint64_t timestamp_us)
void merge(const TimestampHistogram &other)
std::uint64_t count_in_range(std::uint64_t ts_start_us, std::uint64_t ts_end_us) const
double selectivity(std::uint64_t ts_start_us, std::uint64_t ts_end_us) const
std::vector<double> expansion_weights(std::uint64_t bucket_start_us, std::uint64_t bucket_end_us, std::size_t num_sub_buckets) const
std::vector<std::uint8_t> serialize() const
inline std::uint64_t total_count() const
inline bool empty() const
inline std::size_t num_bins() const
inline const std::vector<std::pair<std::uint64_t, std::uint64_t>> &bins() const

Public Static Functions

static TimestampHistogram deserialize(const std::uint8_t *data, std::size_t len)
static inline std::uint64_t bin_index(std::uint64_t timestamp_us)
static inline std::uint64_t bin_start_us(std::uint64_t bin_idx)
static inline std::uint64_t bin_end_us(std::uint64_t bin_idx)

Public Static Attributes

static constexpr std::uint64_t BIN_WIDTH_US = 100'000