Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROOT::Experimental::Internal::ML::RChunkLoader< Args > Class Template Reference

template<typename... Args>
class ROOT::Experimental::Internal::ML::RChunkLoader< Args >

Building and loading the chunks from the blocks and chunks constructed in RChunkConstructor.

In this class the blocks are stiches together to form chunks that are loaded into memory. The blocks used to create each chunk comes from different parts of the dataset. This is achieved by shuffling the blocks before distributing them into chunks. The purpose of this process is to reduce bias during machine learning training by ensuring that the data is well mixed. The dataset is also spit into training and validation sets with the user-defined validation split fraction.

Definition at line 105 of file RChunkLoader.hxx.

Public Member Functions

 RChunkLoader (ROOT::RDF::RNode &rdf, const std::size_t chunkSize, const std::size_t blockSize, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
 
void CheckIfOverlap (RFlat2DMatrix &Tensor1, RFlat2DMatrix &Tensor2)
 
void CheckIfUnique (RFlat2DMatrix &Tensor)
 
void CreateTrainingChunksIntervals ()
 Create training chunks consisiting of block intervals of different types.
 
void CreateValidationChunksIntervals ()
 Create training chunks consisiting of block intervals of different types.
 
std::size_t GetNumTrainingChunks ()
 
std::size_t GetNumTrainingEntries ()
 
std::size_t GetNumValidationChunks ()
 
std::size_t GetNumValidationEntries ()
 
std::vector< std::size_t > GetTrainingChunkSizes ()
 
std::vector< std::size_t > GetValidationChunkSizes ()
 
void LoadTrainingChunk (RFlat2DMatrix &TrainChunkTensor, std::size_t chunk)
 Load the nth chunk from the training dataset into a tensor.
 
void LoadValidationChunk (RFlat2DMatrix &ValidationChunkTensor, std::size_t chunk)
 Load the nth chunk from the validation dataset into a tensor.
 
void ResetDataframe ()
 
void SplitDataset ()
 Distribute the blocks into training and validation datasets.
 

Private Attributes

ROOT::RDF::RNodef_rdf
 
std::size_t fBlockSize
 
std::size_t fChunkSize
 
std::vector< std::string > fCols
 
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
 
bool fNotFiltered
 
std::size_t fNumChunkCols
 
std::size_t fNumCols
 
std::size_t fNumEntries
 
std::size_t fNumTrainEntries
 
std::size_t fNumValidationEntries
 
std::size_t fSetSeed
 
bool fShuffle
 
std::size_t fSumVecSizes
 
std::unique_ptr< RFlat2DMatrixOperatorsfTensorOperators
 
std::unique_ptr< RChunkConstructorfTraining
 
std::unique_ptr< RChunkConstructorfValidation
 
float fValidationSplit
 
std::size_t fVecPadding
 
std::vector< std::size_t > fVecSizes
 

#include <ROOT/ML/RChunkLoader.hxx>

Constructor & Destructor Documentation

◆ RChunkLoader()

template<typename... Args>
ROOT::Experimental::Internal::ML::RChunkLoader< Args >::RChunkLoader ( ROOT::RDF::RNode & rdf,
const std::size_t chunkSize,
const std::size_t blockSize,
const float validationSplit,
const std::vector< std::string > & cols,
const std::vector< std::size_t > & vecSizes = {},
const float vecPadding = 0.0,
bool shuffle = true,
const std::size_t setSeed = 0 )
inline

Definition at line 135 of file RChunkLoader.hxx.

Member Function Documentation

◆ CheckIfOverlap()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CheckIfOverlap ( RFlat2DMatrix & Tensor1,
RFlat2DMatrix & Tensor2 )
inline

Definition at line 441 of file RChunkLoader.hxx.

◆ CheckIfUnique()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CheckIfUnique ( RFlat2DMatrix & Tensor)
inline

Definition at line 433 of file RChunkLoader.hxx.

◆ CreateTrainingChunksIntervals()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CreateTrainingChunksIntervals ( )
inline

Create training chunks consisiting of block intervals of different types.

Definition at line 248 of file RChunkLoader.hxx.

◆ CreateValidationChunksIntervals()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CreateValidationChunksIntervals ( )
inline

Create training chunks consisiting of block intervals of different types.

Definition at line 288 of file RChunkLoader.hxx.

◆ GetNumTrainingChunks()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumTrainingChunks ( )
inline

Definition at line 464 of file RChunkLoader.hxx.

◆ GetNumTrainingEntries()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumTrainingEntries ( )
inline

Definition at line 430 of file RChunkLoader.hxx.

◆ GetNumValidationChunks()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumValidationChunks ( )
inline

Definition at line 466 of file RChunkLoader.hxx.

◆ GetNumValidationEntries()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumValidationEntries ( )
inline

Definition at line 431 of file RChunkLoader.hxx.

◆ GetTrainingChunkSizes()

template<typename... Args>
std::vector< std::size_t > ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetTrainingChunkSizes ( )
inline

Definition at line 427 of file RChunkLoader.hxx.

◆ GetValidationChunkSizes()

template<typename... Args>
std::vector< std::size_t > ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetValidationChunkSizes ( )
inline

Definition at line 428 of file RChunkLoader.hxx.

◆ LoadTrainingChunk()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::LoadTrainingChunk ( RFlat2DMatrix & TrainChunkTensor,
std::size_t chunk )
inline

Load the nth chunk from the training dataset into a tensor.

Parameters
[in]TrainChunkTensorRTensor for the training chunk
[in]chunkIndex of the chunk in the dataset

Definition at line 326 of file RChunkLoader.hxx.

◆ LoadValidationChunk()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::LoadValidationChunk ( RFlat2DMatrix & ValidationChunkTensor,
std::size_t chunk )
inline

Load the nth chunk from the validation dataset into a tensor.

Parameters
[in]ValidationChunkTensorRTensor for the validation chunk
[in]chunkIndex of the chunk in the dataset

Definition at line 378 of file RChunkLoader.hxx.

◆ ResetDataframe()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::ResetDataframe ( )
inline

Definition at line 425 of file RChunkLoader.hxx.

◆ SplitDataset()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::SplitDataset ( )
inline

Distribute the blocks into training and validation datasets.

Definition at line 173 of file RChunkLoader.hxx.

Member Data Documentation

◆ f_rdf

template<typename... Args>
ROOT::RDF::RNode& ROOT::Experimental::Internal::ML::RChunkLoader< Args >::f_rdf
private

Definition at line 121 of file RChunkLoader.hxx.

◆ fBlockSize

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fBlockSize
private

Definition at line 109 of file RChunkLoader.hxx.

◆ fChunkSize

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fChunkSize
private

Definition at line 108 of file RChunkLoader.hxx.

◆ fCols

template<typename... Args>
std::vector<std::string> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fCols
private

Definition at line 122 of file RChunkLoader.hxx.

◆ fEntries

template<typename... Args>
ROOT::RDF::RResultPtr<std::vector<ULong64_t> > ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fEntries
private

Definition at line 129 of file RChunkLoader.hxx.

◆ fNotFiltered

template<typename... Args>
bool ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNotFiltered
private

Definition at line 126 of file RChunkLoader.hxx.

◆ fNumChunkCols

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumChunkCols
private

Definition at line 115 of file RChunkLoader.hxx.

◆ fNumCols

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumCols
private

Definition at line 123 of file RChunkLoader.hxx.

◆ fNumEntries

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumEntries
private

Definition at line 107 of file RChunkLoader.hxx.

◆ fNumTrainEntries

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumTrainEntries
private

Definition at line 117 of file RChunkLoader.hxx.

◆ fNumValidationEntries

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumValidationEntries
private

Definition at line 118 of file RChunkLoader.hxx.

◆ fSetSeed

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fSetSeed
private

Definition at line 124 of file RChunkLoader.hxx.

◆ fShuffle

template<typename... Args>
bool ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fShuffle
private

Definition at line 127 of file RChunkLoader.hxx.

◆ fSumVecSizes

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fSumVecSizes
private

Definition at line 113 of file RChunkLoader.hxx.

◆ fTensorOperators

template<typename... Args>
std::unique_ptr<RFlat2DMatrixOperators> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fTensorOperators
private

Definition at line 119 of file RChunkLoader.hxx.

◆ fTraining

template<typename... Args>
std::unique_ptr<RChunkConstructor> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fTraining
private

Definition at line 131 of file RChunkLoader.hxx.

◆ fValidation

template<typename... Args>
std::unique_ptr<RChunkConstructor> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fValidation
private

Definition at line 132 of file RChunkLoader.hxx.

◆ fValidationSplit

template<typename... Args>
float ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fValidationSplit
private

Definition at line 110 of file RChunkLoader.hxx.

◆ fVecPadding

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fVecPadding
private

Definition at line 114 of file RChunkLoader.hxx.

◆ fVecSizes

template<typename... Args>
std::vector<std::size_t> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fVecSizes
private

Definition at line 112 of file RChunkLoader.hxx.

  • tree/ml/inc/ROOT/ML/RChunkLoader.hxx