DFTracer Reorganization¶
Namespace: dftracer::utils::utilities::composites::dft::reorganize
-
struct ChunkMemberLayout¶
-
struct EventRouterConfig¶
-
struct EventRouterResult¶
-
struct ExtractionPlan¶
Public Members
-
std::vector<PredicateGroup> groups¶
-
std::vector<SourceFileInfo> source_files¶
-
std::vector<ExtractionTask> tasks¶
-
std::size_t total_events = 0¶
-
std::vector<PredicateGroup> groups¶
-
struct ExtractionTask¶
-
struct GroupWriterConfig¶
Public Members
-
std::string group_name¶
-
std::string group_query¶
-
std::string output_dir¶
-
std::size_t chunk_size_bytes = 256 * 1024 * 1024¶
-
bool compress = true¶
-
int compression_level = -1¶
-
const std::vector<SourceFileInfo> *source_files = nullptr¶
-
bool build_output_index = true¶
-
std::string index_dir¶
-
bool with_aggregation = false¶
-
double agg_time_interval_us = 5'000'000.0¶
-
std::vector<std::string> bloom_dimensions¶
-
indexing::ChunkIndexerConfig bloom_config¶
-
std::string staging_root¶
-
std::shared_ptr<moodycamel::ConcurrentQueue<indexer::IndexDatabaseSstWriterContext::Artifacts>> artifacts_queue¶
-
std::shared_ptr<std::atomic<std::size_t>> batch_counter¶
-
std::string group_name¶
-
struct GroupWriterResult¶
Public Members
-
std::string group_name¶
-
std::size_t events_written = 0¶
-
std::size_t bytes_written = 0¶
-
std::size_t chunks_created = 0¶
-
std::vector<std::string> output_files¶
-
std::vector<ChunkMemberLayout> chunk_layouts¶
Per-chunk-file gzip-member layout captured directly from the writer. Lets downstream indexing skip the post-write gzip header re-scan.
-
bool indexed_inline = false¶
-
bool success = false¶
-
std::string error_message¶
-
std::string group_name¶
-
struct LineBatch¶
Public Functions
-
inline void reserve(std::size_t n)¶
-
inline std::size_t size() const¶
-
inline bool empty() const¶
-
inline void clear()¶
-
inline std::string_view line_view(std::size_t i) const¶
-
inline void append_line(std::string_view line, std::size_t source_file_idx, std::size_t checkpoint_idx, std::size_t source_line_number)¶
-
inline void reserve(std::size_t n)¶
-
struct LineRecord¶
-
struct ManifestExtractorConfig¶
-
struct ManifestExtractorResult¶
-
class OrganizeVisitor : public dftracer::utils::utilities::composites::dft::DftEventVisitor¶
Public Functions
-
explicit OrganizeVisitor(OrganizeVisitorConfig config)¶
-
virtual void begin(std::size_t num_checkpoints) override¶
-
virtual void on_checkpoint(std::size_t checkpoint_idx) override¶
-
virtual void on_event(const EventRecord &record) override¶
-
virtual bool wants_drain() const noexcept override¶
-
virtual std::unique_ptr<DftEventVisitor> create_parallel_slice() const override¶
-
virtual void merge_parallel_slice(DftEventVisitor &slice) override¶
-
inline std::size_t events_routed() const¶
-
inline std::size_t events_unmatched() const¶
-
explicit OrganizeVisitor(OrganizeVisitorConfig config)¶
-
struct OrganizeVisitorConfig¶
-
struct OriginalFileReconstruction¶
Public Members
-
std::string original_path¶
-
int num_checkpoints¶
-
std::string event_hash¶
-
std::map<int, std::vector<ReconstructionSegment>> checkpoint_segments¶
-
std::string original_path¶
-
struct PredicateGroup¶
-
struct ProvenanceRecord¶
-
class ProvenanceTracker¶
Public Functions
-
ProvenanceTracker() = default¶
-
void record(int source_file_idx, int checkpoint_idx, int output_chunk_idx, int output_line_start, int output_line_end, int event_count)¶
-
coro::CoroTask<void> flush_to_db(const ExtractionPlan &plan, const std::string &group_name, const std::string &group_query, const std::vector<fileio::ChunkInfo> &chunks, const std::string &output_dir)¶
-
inline std::size_t record_count() const¶
-
inline const std::vector<ProvenanceRecord> &records() const¶
-
ProvenanceTracker() = default¶
-
struct ReconstructedFileInfo¶
-
struct ReconstructionPlan¶
Public Members
-
std::map<std::string, OriginalFileReconstruction> files¶
-
std::size_t total_segments = 0¶
-
std::size_t total_events = 0¶
-
std::map<std::string, OriginalFileReconstruction> files¶
-
struct ReconstructionPlannerInput¶
-
class ReconstructionPlannerUtility : public dftracer::utils::utilities::Utility<ReconstructionPlannerInput, ReconstructionPlan>¶
Public Functions
-
ReconstructionPlannerUtility() = default¶
-
coro::CoroTask<ReconstructionPlan> process(const ReconstructionPlannerInput &input) override¶
-
ReconstructionPlannerUtility() = default¶
-
struct ReconstructionSegment¶
-
struct ReconstructorInput¶
Public Functions
-
ReconstructorInput &with_input_dir(std::string dir)¶
-
ReconstructorInput &with_output_dir(std::string dir)¶
-
ReconstructorInput &with_checkpoint_size(std::size_t sz)¶
-
ReconstructorInput &with_parallelism(std::size_t n)¶
-
ReconstructorInput &with_compress(bool c)¶
-
ReconstructorInput &with_input_dir(std::string dir)¶
-
struct ReconstructorResult¶
-
class ReconstructorUtility : public dftracer::utils::utilities::Utility<ReconstructorInput, ReconstructorResult, utilities::tags::NeedsContext>¶
Public Functions
-
coro::CoroTask<ReconstructorResult> process(const ReconstructorInput &input) override¶
-
coro::CoroTask<ReconstructorResult> process(const ReconstructorInput &input) override¶
-
struct ReorganizationPlannerInput¶
Public Members
-
std::vector<std::string> source_files¶
-
std::vector<PredicateGroup> groups¶
-
std::string index_dir¶
-
std::size_t checkpoint_size = 0¶
-
std::vector<std::string> source_files¶
-
class ReorganizationPlannerUtility : public dftracer::utils::utilities::Utility<ReorganizationPlannerInput, ExtractionPlan, utilities::tags::NeedsContext>¶
Public Functions
-
ReorganizationPlannerUtility() = default¶
-
coro::CoroTask<ExtractionPlan> process(const ReorganizationPlannerInput &input) override¶
-
ReorganizationPlannerUtility() = default¶
-
struct SourceFileInfo¶