DFTracer Statistics

Namespace: dftracer::utils::utilities::composites::dft::statistics

struct ChunkDetailScanInput

Public Members

std::string file_path
std::string index_path
std::size_t checkpoint_size = 0
std::size_t start_byte = 0
std::size_t end_byte = 0
std::uint64_t checkpoint_idx = 0
std::size_t batch_size = 4 * 1024 * 1024
const std::vector<std::string> *filter_names = nullptr
const std::vector<std::string> *filter_categories = nullptr
const std::vector<std::string> *group_by = nullptr
struct ChunkDetailScanOutput

Public Members

DetailedStatistics stats
bool success = false
class ChunkDetailScannerUtility : public dftracer::utils::utilities::Utility<ChunkDetailScanInput, ChunkDetailScanOutput, utilities::tags::Parallelizable>

Public Functions

ChunkDetailScannerUtility() = default
coro::CoroTask<ChunkDetailScanOutput> process(const ChunkDetailScanInput &input) override
struct DetailedStatistics

Accumulates distribution data during on-demand chunk scanning.

Supports optional group-by dimensions (name, cat, pid, tid, fhash, hhash, pid_tid). When group_by is empty, only global duration is tracked.

Public Functions

void merge(const DetailedStatistics &other)
std::string to_json() const

Public Members

DistributionStats duration
StringViewMap<DistributionStats> grouped_duration
StringViewMap<IOEventMetrics> grouped_io
StringViewMap<std::string> group_key_category
std::uint64_t events_scanned = 0
std::uint64_t chunks_scanned = 0
std::uint64_t chunks_skipped = 0
struct DistributionStats

Reusable building block combining histogram + sketch + sum.

Public Functions

void update(double value)
void merge(const DistributionStats &other)
std::uint64_t count() const
double mean() const
double stddev() const

Public Members

Log2Histogram histogram
common::statistics::DDSketch sketch = {0.01}
double sum = 0.0
double sum_sq = 0.0
struct EntrySnapshot

Public Members

std::size_t file_index
int file_id
std::string file_path
struct IOEventMetrics

Per-event I/O metrics: duration, size, bandwidth, offset.

Public Functions

void merge(const IOEventMetrics &other)

Public Members

DistributionStats duration
DistributionStats size
DistributionStats bandwidth
DistributionStats offset
struct SharedIndexBatchRows

Public Members

std::unordered_map<int, std::uint64_t> num_chunks
std::unordered_map<int, utilities::indexer::MergedStatisticsResult> fallback_merged_stats
std::unordered_map<int, utilities::indexer::ChunkStatistics> merged_stats
std::vector<EntrySnapshot> entries_snapshot
class SharedIndexStatisticsReader

Public Functions

SharedIndexStatisticsReader() = default
inline coro::CoroTask<SharedIndexBatchRows> query(std::string index_path, std::vector<indexing::ResolvedFile> entries, StatisticsQueryType query_type) const

Public Static Functions

template<typename Callback>
static inline void process_batch_results(SharedIndexBatchRows &batch_rows, Callback &callback)
struct StatisticsAggregatorBatchInput

Public Members

std::vector<std::string> file_paths
std::string index_path
struct StatisticsAggregatorInput

Public Members

std::string file_path
std::string index_path
std::string index_dir
class StatisticsAggregatorUtility : public dftracer::utils::utilities::Utility<StatisticsAggregatorInput, TraceStatistics, utilities::tags::Parallelizable>

Public Functions

StatisticsAggregatorUtility() = default
coro::CoroTask<TraceStatistics> process(const StatisticsAggregatorInput &input) override
coro::CoroTask<std::vector<TraceStatistics>> process_batch(const StatisticsAggregatorBatchInput &input)
struct StatisticsQueryInput

Public Members

TraceStatistics stats
StatisticsQueryType query_type = StatisticsQueryType::SUMMARY
std::uint64_t top_n = 10
struct StatisticsQueryOutput

Public Functions

std::string to_json() const

Public Members

std::vector<std::pair<std::string, std::uint64_t>> results
std::uint64_t total_events = 0
std::uint64_t min_timestamp_us = 0
std::uint64_t max_timestamp_us = 0
double time_span_seconds = 0.0
std::uint64_t duration_count = 0
double duration_mean_us = 0.0
double duration_stddev_us = 0.0
std::uint64_t duration_min_us = 0
std::uint64_t duration_max_us = 0
std::string query_type_name
class StatisticsQueryUtility : public dftracer::utils::utilities::Utility<StatisticsQueryInput, StatisticsQueryOutput, utilities::tags::Parallelizable>

Public Functions

StatisticsQueryUtility() = default
coro::CoroTask<StatisticsQueryOutput> process(const StatisticsQueryInput &input) override
struct TraceStatistics

Public Functions

std::uint64_t total_events() const
double time_span_seconds() const
double duration_mean_us() const
double duration_stddev_us() const
std::size_t num_categories() const
std::size_t num_unique_names() const
std::size_t num_pid_tids() const
std::string to_json() const

Public Members

std::string file_path
std::string index_path
ChunkStatistics merged
std::uint64_t num_chunks = 0
bool success = false
std::string error_message