Visitors¶
Namespace: dftracer::utils::utilities::composites::dft::visitors
-
class BloomVisitor : public dftracer::utils::utilities::composites::dft::DftEventVisitor¶
Public Types
-
enum FixedBloom¶
Fixed bloom filter slots. Indices match DEFAULT_BLOOM_DIMENSIONS order: name, cat, pid, tid, hhash, fhash, shash.
Values:
-
enumerator BF_NAME¶
-
enumerator BF_CAT¶
-
enumerator BF_PID¶
-
enumerator BF_TID¶
-
enumerator BF_HHASH¶
-
enumerator BF_FHASH¶
-
enumerator BF_SHASH¶
-
enumerator BF_COUNT¶
-
enumerator BF_NAME¶
-
enum FixedDim¶
Fixed dimension_stats slots. Superset of bloom dims plus pid_tid, ts, dur (which are observed for range stats but not hashed).
Values:
-
enumerator FD_NAME¶
-
enumerator FD_CAT¶
-
enumerator FD_PID¶
-
enumerator FD_TID¶
-
enumerator FD_PID_TID¶
-
enumerator FD_HHASH¶
-
enumerator FD_FHASH¶
-
enumerator FD_SHASH¶
-
enumerator FD_TS¶
-
enumerator FD_DUR¶
-
enumerator FD_COUNT¶
-
enumerator FD_NAME¶
-
using HashResolutions = indexing::HashResolutions¶
-
using ChunkStatistics = indexing::ChunkStatistics¶
-
using ChunkDimensionStats = indexing::ChunkDimensionStats¶
-
using ChunkIndexerConfig = indexing::ChunkIndexerConfig¶
Public Functions
-
BloomVisitor(ChunkIndexerConfig config, std::vector<std::string> dimensions)¶
-
BloomVisitor(const BloomVisitor&) = delete¶
-
BloomVisitor &operator=(const BloomVisitor&) = delete¶
-
BloomVisitor(BloomVisitor&&) noexcept = default¶
-
BloomVisitor &operator=(BloomVisitor&&) noexcept = default¶
-
virtual void begin(std::size_t num_checkpoints) override¶
-
virtual void on_checkpoint(std::size_t checkpoint_idx) override¶
-
virtual void on_event(const EventRecord &record) override¶
-
virtual std::unique_ptr<DftEventVisitor> create_parallel_slice() const override¶
-
virtual void merge_parallel_slice(DftEventVisitor &slice) override¶
-
void finalize(indexer::IndexDatabaseWriterContext &writer, int file_id)¶
-
void finalize_sink_only(indexer::IndexBatchSink &sink, int file_id)¶
Emit bloom / stats / dimension records plus name dictionary/postings to a sink backend. Skips ROOT_* summaries (rebuilt separately by
IndexDatabase::rebuild_root_summaries()). Works for both the RocksDB-backed writer and the SST writer.
-
void flush_per_checkpoint_to_sink(indexer::IndexBatchSink &sink, int file_id)¶
Emit per-checkpoint chunk records (bloom, stats, dim_stats, name_chunk_postings) using the current
chunks_buffer, merge their state into the persistent file-level accumulator, then clearchunks_and advance the base index. Used for mid-chunk slice rotation whenchunks_would otherwise grow unbounded.
-
void finalize_file_to_sink(indexer::IndexBatchSink &sink, int file_id)¶
Emit file-level records (file_bloom, scalar_stats, counts, dimensions, name_dictionary, name_file_postings) from the persistent accumulator. Call once at end-of-file.
-
inline std::size_t num_chunks() const¶
-
inline std::uint64_t total_events() const¶
Total event count across already-flushed chunks plus the currently buffered ones. Reflects all events ingested via on_event() so far.
-
struct ChunkState¶
Public Functions
-
ChunkState()¶
Public Members
-
std::array<indexing::BloomFilter, BF_COUNT> fixed_blooms¶
-
std::array<ChunkDimensionStats, FD_COUNT> fixed_dim_stats¶
-
std::vector<indexing::BloomFilter> extra_blooms¶
-
std::vector<ChunkDimensionStats> extra_dim_stats¶
-
ChunkStatistics statistics¶
-
HashResolutions hash_resolutions¶
-
std::size_t events_processed = 0¶
-
ChunkState()¶
-
enum FixedBloom¶
-
class HashTableVisitor : public dftracer::utils::utilities::composites::dft::DftEventVisitor¶
Captures FH/HH/SH/PR metadata events during indexing and stores them in HASH_TABLES column family with bidirectional lookups:
Forward (hash -> name): for resolving hashes in output
Reverse (name -> hash): for query DSL like
file_name == "/path/..."
Public Types
Public Functions
-
HashTableVisitor() = default¶
-
HashTableVisitor(const HashTableVisitor&) = delete¶
-
HashTableVisitor &operator=(const HashTableVisitor&) = delete¶
-
HashTableVisitor(HashTableVisitor&&) noexcept = default¶
-
HashTableVisitor &operator=(HashTableVisitor&&) noexcept = default¶
-
virtual void begin(std::size_t num_checkpoints) override¶
-
virtual void on_checkpoint(std::size_t checkpoint_idx) override¶
-
virtual void on_event(const EventRecord &record) override¶
-
virtual std::unique_ptr<DftEventVisitor> create_parallel_slice() const override¶
-
virtual void merge_parallel_slice(DftEventVisitor &slice) override¶
-
void finalize(indexer::IndexBatchSink &writer, int file_id)¶
-
std::size_t num_entries() const¶
-
class ManifestVisitor : public dftracer::utils::utilities::composites::dft::DftEventVisitor¶
Public Functions
-
ManifestVisitor() = default¶
-
virtual void begin(std::size_t num_checkpoints) override¶
-
virtual void on_checkpoint(std::size_t checkpoint_idx) override¶
-
virtual void on_event(const EventRecord &record) override¶
-
virtual std::unique_ptr<DftEventVisitor> create_parallel_slice() const override¶
-
virtual void merge_parallel_slice(DftEventVisitor &slice) override¶
-
inline virtual void set_line_offset(std::size_t offset) override¶
In parallel-flush mode, slices receive events with slice-local line numbers (0..N-1). The dispatcher calls this on the slice before merge_parallel_slice with the cumulative successful-event count of prior slices, so the slice can renumber its stored line indices.
-
inline virtual std::size_t parallel_event_count() const override¶
Successful events processed by this slice. Used by the dispatcher to propagate line offsets across slices in byte order.
-
void finalize(indexer::IndexBatchSink &writer, int file_id)¶
-
void flush_per_checkpoint_to_sink(indexer::IndexBatchSink &sink, int file_id)¶
Emit per-checkpoint event/metadata line records and clear the vectors. Used for mid-chunk slice rotation.
-
void finalize_file_to_sink(indexer::IndexBatchSink &sink, int file_id)¶
Emit file-level records (observed pids). Call once at end-of-file.
-
ManifestVisitor() = default¶