16#ifndef ROOT_INTERNAL_ML_RDATALOADERENGINE
17#define ROOT_INTERNAL_ML_RDATALOADERENGINE
19#include <condition_variable>
48template <
typename... Args>
76 std::vector<ROOT::RDF::RNode>
fRdfs;
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Loads TTree/RNTuple clusters from one or more RDataFrames into RFlat2DMatrix buffers for ML training ...
In this class, the processes of loading clusters (see RClusterLoader) and creating batches from those...
std::size_t ValidationRemainderRows()
RFlat2DMatrix fValidationDataset
bool fTrainingEpochActive
bool IsValidationActive()
std::vector< std::string > fCols
void ActivateTrainingEpoch()
Activate the training epoch by starting the batchloader.
std::size_t fValidationClusterIdx
RFlat2DMatrix GetTrainBatch()
Loads a training batch from the queue.
RFlat2DMatrix fTrainingDataset
std::unique_ptr< RSampler > fValidationSampler
std::size_t fNumTrainingEntries
std::unique_ptr< RFlat2DMatrixOperators > fTensorOperators
RFlat2DMatrix fSampledValidationDataset
std::size_t fTrainingEpochCount
std::condition_variable fLoadingCondition
void CreateValidationBatches()
Creates validation batches by first loading a chunk (see RClusterLoader), and then split it into batc...
std::unique_ptr< RSampler > fTrainingSampler
std::size_t NumberOfValidationBatches()
std::vector< RFlat2DMatrix > fValidationDatasets
std::vector< std::size_t > fVecSizes
void LoadData()
Main loop for loading clusters and creating batches.
void CreateTrainBatches()
Create training batches by first loading a chunk (see RClusterLoader) and split it into batches (see ...
std::size_t fLowWatermark
RFlat2DMatrix GetValidationBatch()
Loads a validation batch from the queue.
RFlat2DMatrix fSampledTrainingDataset
std::unique_ptr< RBatchLoader > fTrainingBatchLoader
std::vector< ROOT::RDF::RNode > fRdfs
void DeActivateTrainingEpoch()
std::size_t fTrainingClusterIdx
std::size_t fHighWatermark
std::unique_ptr< RBatchLoader > fValidationBatchLoader
void Activate()
Activate the loading process by spawning the loading thread.
std::size_t fNumValidationEntries
std::size_t fValidationEpochCount
std::vector< RFlat2DMatrix > fTrainingDatasets
void ActivateValidationEpoch()
std::size_t fBatchesInMemory
std::unique_ptr< RDatasetLoader< Args... > > fDatasetLoader
std::size_t fBufferCapacity
std::size_t TrainRemainderRows()
std::size_t NumberOfTrainingBatches()
bool fValidationEpochActive
std::unique_ptr< std::thread > fLoadingThread
void DeActivateValidationEpoch()
RDataLoaderEngine(const std::vector< ROOT::RDF::RNode > &rdfs, const std::size_t batchSize, const std::size_t batchesInMemory, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, const float testSize=0.0, bool shuffle=true, bool dropRemainder=true, const std::size_t setSeed=0, bool loadEager=false, std::string sampleType="", float sampleRatio=1.0, bool replacement=false)
std::unique_ptr< RClusterLoader< Args... > > fClusterLoader
Load the whole dataset into memory.
void SplitDatasets()
Split the dataframes in a training and validation dataset.
const_iterator end() const
Wrapper around ROOT::RVec<float> representing a 2D matrix.