doc/master/RChunkLoader_8hxx_source.html

// Author: Dante Niewenhuis, VU Amsterdam 07/2023

// Author: Kristupas Pranckietis, Vilnius University 05/2024

// Author: Nopphakorn Subsa-Ard, King Mongkut's University of Technology Thonburi (KMUTT) (TH) 08/2024

// Author: Vincenzo Eduardo Padulano, CERN 10/2024

// Author: Martin Føll, University of Oslo (UiO) & CERN 05/2025


/*************************************************************************

 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers.               *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


#ifndef ROOT_INTERNAL_ML_RCHUNKLOADER

#define ROOT_INTERNAL_ML_RCHUNKLOADER


#include <algorithm>

#include <iostream>

#include <iterator>

#include <memory>

#include <numeric>

#include <random>

#include <set>

#include <string>

#include <type_traits>

#include <utility>

#include <vector>


#include "ROOT/ML/RChunkConstructor.hxx"

#include "ROOT/ML/RFlat2DMatrix.hxx"

#include "ROOT/ML/RFlat2DMatrixOperators.hxx"

#include "ROOT/RDataFrame.hxx"

#include "ROOT/RDF/Utils.hxx"


namespace ROOT::Experimental::Internal::ML {

/**

\class ROOT::Experimental::Internal::ML::RChunkLoaderFunctor


\brief Loading chunks made in RChunkLoader into tensors from data from RDataFrame.

*/


template <typename... ColTypes>


class RChunkLoaderFunctor {

   std::size_t fOffset{};

   std::size_t fVecSizeIdx{};

   float fVecPadding{};

   std::vector<std::size_t> fMaxVecSizes{};

   RFlat2DMatrix &fChunkTensor;


   std::size_t fNumChunkCols;


   int fI;

   int fNumColumns;


   //////////////////////////////////////////////////////////////////////////

   /// \brief Copy the content of a column into RTensor when the column consits of vectors

   template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>


   void AssignToTensor(const T &vec, int i, int numColumns)

   {

      std::size_t max_vec_size = fMaxVecSizes[fVecSizeIdx++];

      std::size_t vec_size = vec.size();


      float *dst = fChunkTensor.GetData() + fOffset + numColumns * i;

      if (vec_size < max_vec_size) // Padding vector column to max_vec_size with fVecPadding

      {

         std::copy(vec.begin(), vec.end(), dst);

         std::fill(dst + vec_size, dst + max_vec_size, fVecPadding);

      } else // Copy only max_vec_size length from vector column

      {

         std::copy(vec.begin(), vec.begin() + max_vec_size, dst);

      }

      fOffset += max_vec_size;

   }


   //////////////////////////////////////////////////////////////////////////

   /// \brief Copy the content of a column into RTensor when the column consits of single values

   template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>


   void AssignToTensor(const T &val, int i, int numColumns)

   {

      fChunkTensor.GetData()[fOffset + numColumns * i] = val;

      fOffset++;

      // fChunkTensor.GetData()[numColumns * i] = val;

   }


public:


   RChunkLoaderFunctor(RFlat2DMatrix &chunkTensor, std::size_t numColumns, const std::vector<std::size_t> &maxVecSizes,

                       float vecPadding, int i)

      : fChunkTensor(chunkTensor), fMaxVecSizes(maxVecSizes), fVecPadding(vecPadding), fI(i), fNumColumns(numColumns)

   {

   }


   void operator()(const ColTypes &...cols)

   {

      fVecSizeIdx = 0;

      (AssignToTensor(cols, fI, fNumColumns), ...);

   }


};


/**

\class ROOT::Experimental::Internal::ML::RChunkLoader


\brief Building and loading the chunks from the blocks and chunks constructed in RChunkConstructor


In this class the blocks are stiches together to form chunks that are loaded into memory. The blocks used to create each

chunk comes from different parts of the dataset. This is achieved by shuffling the blocks before distributing them into

chunks. The purpose of this process is to reduce bias during machine learning training by ensuring that the data is well

mixed. The dataset is also spit into training and validation sets with the user-defined validation split fraction.

*/


template <typename... Args>


class RChunkLoader {

private:

   std::size_t fNumEntries;

   std::size_t fChunkSize;

   std::size_t fBlockSize;

   float fValidationSplit;


   std::vector<std::size_t> fVecSizes;

   std::size_t fSumVecSizes;

   std::size_t fVecPadding;

   std::size_t fNumChunkCols;


   std::size_t fNumTrainEntries;

   std::size_t fNumValidationEntries;

   std::unique_ptr<RFlat2DMatrixOperators> fTensorOperators;


   ROOT::RDF::RNode &f_rdf;

   std::vector<std::string> fCols;

   std::size_t fNumCols;

   std::size_t fSetSeed;


   bool fNotFiltered;

   bool fShuffle;


   ROOT::RDF::RResultPtr<std::vector<ULong64_t>> fEntries;


   std::unique_ptr<RChunkConstructor> fTraining;

   std::unique_ptr<RChunkConstructor> fValidation;


public:


   RChunkLoader(ROOT::RDF::RNode &rdf, const std::size_t chunkSize, const std::size_t blockSize,

                const float validationSplit, const std::vector<std::string> &cols,

                const std::vector<std::size_t> &vecSizes = {}, const float vecPadding = 0.0, bool shuffle = true,

                const std::size_t setSeed = 0)

      : f_rdf(rdf),

        fCols(cols),

        fVecSizes(vecSizes),

        fVecPadding(vecPadding),

        fChunkSize(chunkSize),

        fBlockSize(blockSize),

        fValidationSplit(validationSplit),

        fNotFiltered(f_rdf.GetFilterNames().empty()),

        fShuffle(shuffle),

        fSetSeed(setSeed)

   {

      fTensorOperators = std::make_unique<RFlat2DMatrixOperators>(fShuffle, fSetSeed);


      fEntries = f_rdf.Take<ULong64_t>("rdfentry_");

      fNumEntries = fEntries->size();


      // add the last element in entries to not go out of range when filling chunks

      fEntries->push_back((*fEntries)[fNumEntries - 1] + 1);


      fNumCols = fCols.size();

      fSumVecSizes = std::accumulate(fVecSizes.begin(), fVecSizes.end(), 0);


      fNumChunkCols = fNumCols + fSumVecSizes - fVecSizes.size();


      // number of training and validation entries after the split

      fNumValidationEntries = static_cast<std::size_t>(fValidationSplit * fNumEntries);

      fNumTrainEntries = fNumEntries - fNumValidationEntries;


      fTraining = std::make_unique<RChunkConstructor>(fNumTrainEntries, fChunkSize, fBlockSize);

      fValidation = std::make_unique<RChunkConstructor>(fNumValidationEntries, fChunkSize, fBlockSize);

   }


   //////////////////////////////////////////////////////////////////////////

   /// \brief Distribute the blocks into training and validation datasets


   void SplitDataset()

   {

      std::random_device rd;

      std::mt19937 g;


      if (fSetSeed == 0) {

         g.seed(rd());

      } else {

         g.seed(fSetSeed);

      }


      std::vector<Long_t> BlockSizes = {};


      // fill the training and validation block sizes

      for (size_t i = 0; i < fTraining->NumberOfDifferentBlocks.size(); i++) {

         BlockSizes.insert(BlockSizes.end(), fTraining->NumberOfDifferentBlocks[i], fTraining->SizeOfBlocks[i]);

      }


      for (size_t i = 0; i < fValidation->NumberOfDifferentBlocks.size(); i++) {

         BlockSizes.insert(BlockSizes.end(), fValidation->NumberOfDifferentBlocks[i], fValidation->SizeOfBlocks[i]);

      }


      // make an identity permutation map

      std::vector<Long_t> indices(BlockSizes.size());


      for (int i = 0; i < indices.size(); ++i) {

         indices[i] = i;

      }


      // shuffle the identity permutation to create a new permutation

      if (fShuffle) {

         std::shuffle(indices.begin(), indices.end(), g);

      }


      // use the permuation to shuffle the vector of block sizes

      std::vector<Long_t> PermutedBlockSizes(BlockSizes.size());

      for (int i = 0; i < BlockSizes.size(); ++i) {

         PermutedBlockSizes[i] = BlockSizes[indices[i]];

      }


      // create a vector for storing the boundaries of the blocks

      std::vector<Long_t> BlockBoundaries(BlockSizes.size());


      // get the boundaries of the blocks with the partial sum of the block sizes

      // insert 0 at the beginning for the lower boundary of the first block

      std::partial_sum(PermutedBlockSizes.begin(), PermutedBlockSizes.end(), BlockBoundaries.begin());

      BlockBoundaries.insert(BlockBoundaries.begin(), 0);


      // distribute the neighbouring block boudaries into pairs to get the intevals for the blocks

      std::vector<std::pair<Long_t, Long_t>> BlockIntervals;

      for (size_t i = 0; i < BlockBoundaries.size() - 1; ++i) {

         BlockIntervals.emplace_back(BlockBoundaries[i], BlockBoundaries[i + 1]);

      }


      // use the inverse of the permutation above to order the block intervals in the same order as

      // the original vector of block sizes

      std::vector<std::pair<Long_t, Long_t>> UnpermutedBlockIntervals(BlockIntervals.size());

      for (int i = 0; i < BlockIntervals.size(); ++i) {

         UnpermutedBlockIntervals[indices[i]] = BlockIntervals[i];

      }


      // distribute the block intervals between training and validation

      fTraining->BlockIntervals.insert(fTraining->BlockIntervals.begin(), UnpermutedBlockIntervals.begin(),

                                       UnpermutedBlockIntervals.begin() + fTraining->NumberOfBlocks);

      fValidation->BlockIntervals.insert(fValidation->BlockIntervals.begin(),

                                         UnpermutedBlockIntervals.begin() + fTraining->NumberOfBlocks,

                                         UnpermutedBlockIntervals.end());


      // distribute the different block intervals types for training and validation

      fTraining->DistributeBlockIntervals();

      fValidation->DistributeBlockIntervals();

   }


   //////////////////////////////////////////////////////////////////////////

   /// \brief Create training chunks consisiting of block intervals of different types


   void CreateTrainingChunksIntervals()

   {


      std::random_device rd;

      std::mt19937 g;


      if (fSetSeed == 0) {

         g.seed(rd());

      } else {

         g.seed(fSetSeed);

      }


      // shuffle the block intervals within each type of block

      if (fShuffle) {

         std::shuffle(fTraining->FullBlockIntervalsInFullChunks.begin(),

                      fTraining->FullBlockIntervalsInFullChunks.end(), g);

         std::shuffle(fTraining->LeftoverBlockIntervalsInFullChunks.begin(),

                      fTraining->LeftoverBlockIntervalsInFullChunks.end(), g);

         std::shuffle(fTraining->FullBlockIntervalsInLeftoverChunks.begin(),

                      fTraining->FullBlockIntervalsInLeftoverChunks.end(), g);

         std::shuffle(fTraining->LeftoverBlockIntervalsInLeftoverChunks.begin(),

                      fTraining->LeftoverBlockIntervalsInLeftoverChunks.end(), g);

      }


      // reset the chunk intervals and sizes before each epoch

      fTraining->ChunksIntervals = {};

      fTraining->ChunksSizes = {};


      // create the chunks each consisiting of block intervals

      fTraining->CreateChunksIntervals();


      if (fShuffle) {

         std::shuffle(fTraining->ChunksIntervals.begin(), fTraining->ChunksIntervals.end(), g);

      }


      fTraining->SizeOfChunks();

   }


   //////////////////////////////////////////////////////////////////////////

   /// \brief Create training chunks consisiting of block intervals of different types


   void CreateValidationChunksIntervals()

   {

      std::random_device rd;

      std::mt19937 g;


      if (fSetSeed == 0) {

         g.seed(rd());

      } else {

         g.seed(fSetSeed);

      }


      if (fShuffle) {

         std::shuffle(fValidation->FullBlockIntervalsInFullChunks.begin(),

                      fValidation->FullBlockIntervalsInFullChunks.end(), g);

         std::shuffle(fValidation->LeftoverBlockIntervalsInFullChunks.begin(),

                      fValidation->LeftoverBlockIntervalsInFullChunks.end(), g);

         std::shuffle(fValidation->FullBlockIntervalsInLeftoverChunks.begin(),

                      fValidation->FullBlockIntervalsInLeftoverChunks.end(), g);

         std::shuffle(fValidation->LeftoverBlockIntervalsInLeftoverChunks.begin(),

                      fValidation->LeftoverBlockIntervalsInLeftoverChunks.end(), g);

      }


      fValidation->ChunksIntervals = {};

      fValidation->ChunksSizes = {};


      fValidation->CreateChunksIntervals();


      if (fShuffle) {

         std::shuffle(fValidation->ChunksIntervals.begin(), fValidation->ChunksIntervals.end(), g);

      }


      fValidation->SizeOfChunks();

   }


   //////////////////////////////////////////////////////////////////////////

   /// \brief Load the nth chunk from the training dataset into a tensor

   /// \param[in] TrainChunkTensor RTensor for the training chunk

   /// \param[in] chunk Index of the chunk in the dataset


   void LoadTrainingChunk(RFlat2DMatrix &TrainChunkTensor, std::size_t chunk)

   {


      std::size_t chunkSize = fTraining->ChunksSizes[chunk];


      if (chunk < fTraining->Chunks) {

         RFlat2DMatrix Tensor(chunkSize, fNumChunkCols);


         // fill a chunk by looping over the blocks in a chunk (see RChunkConstructor)

         std::size_t chunkEntry = 0;

         std::vector<std::pair<Long_t, Long_t>> BlocksInChunk = fTraining->ChunksIntervals[chunk];


         std::sort(

            BlocksInChunk.begin(), BlocksInChunk.end(),

            [](const std::pair<Long_t, Long_t> &a, const std::pair<Long_t, Long_t> &b) { return a.first < b.first; });


         for (std::size_t i = 0; i < BlocksInChunk.size(); i++) {


            // Use the block start and end entry to load into the chunk if the dataframe is not filtered

            if (fNotFiltered) {

               RChunkLoaderFunctor<Args...> func(Tensor, fNumChunkCols, fVecSizes, fVecPadding, chunkEntry);

               ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, BlocksInChunk[i].first, BlocksInChunk[i].second);


               f_rdf.Foreach(func, fCols);

               chunkEntry += BlocksInChunk[i].second - BlocksInChunk[i].first;

            }


            // use the entry column of the dataframe as a map to load the entries that passed the filters

            else {

               std::size_t blockSize = BlocksInChunk[i].second - BlocksInChunk[i].first;

               for (std::size_t j = 0; j < blockSize; j++) {

                  RChunkLoaderFunctor<Args...> func(Tensor, fNumChunkCols, fVecSizes, fVecPadding, chunkEntry);

                  ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, (*fEntries)[BlocksInChunk[i].first + j],

                                                                (*fEntries)[BlocksInChunk[i].first + j + 1]);

                  f_rdf.Foreach(func, fCols);

                  chunkEntry++;

               }

            }

         }


         // reset dataframe

         ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, (*fEntries)[0], (*fEntries)[fNumEntries]);


         // shuffle the data in the chunk tensor

         fTensorOperators->ShuffleTensor(TrainChunkTensor, Tensor);

      }

   }


   //////////////////////////////////////////////////////////////////////////

   /// \brief Load the nth chunk from the validation dataset into a tensor

   /// \param[in] ValidationChunkTensor RTensor for the validation chunk

   /// \param[in] chunk Index of the chunk in the dataset


   void LoadValidationChunk(RFlat2DMatrix &ValidationChunkTensor, std::size_t chunk)

   {


      std::size_t chunkSize = fValidation->ChunksSizes[chunk];


      if (chunk < fValidation->Chunks) {

         RFlat2DMatrix Tensor(chunkSize, fNumChunkCols);


         std::size_t chunkEntry = 0;

         std::vector<std::pair<Long_t, Long_t>> BlocksInChunk = fValidation->ChunksIntervals[chunk];


         std::sort(

            BlocksInChunk.begin(), BlocksInChunk.end(),

            [](const std::pair<Long_t, Long_t> &a, const std::pair<Long_t, Long_t> &b) { return a.first < b.first; });


         for (std::size_t i = 0; i < BlocksInChunk.size(); i++) {


            // use the block start and end entry to load into the chunk if the dataframe is not filtered

            if (fNotFiltered) {

               RChunkLoaderFunctor<Args...> func(Tensor, fNumChunkCols, fVecSizes, fVecPadding, chunkEntry);

               ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, BlocksInChunk[i].first, BlocksInChunk[i].second);

               f_rdf.Foreach(func, fCols);

               chunkEntry += BlocksInChunk[i].second - BlocksInChunk[i].first;

            }


            // use the entry column of the dataframe as a map to load the entries that passed the filters

            else {

               std::size_t blockSize = BlocksInChunk[i].second - BlocksInChunk[i].first;

               for (std::size_t j = 0; j < blockSize; j++) {

                  RChunkLoaderFunctor<Args...> func(Tensor, fNumChunkCols, fVecSizes, fVecPadding, chunkEntry);

                  ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, (*fEntries)[BlocksInChunk[i].first + j],

                                                                (*fEntries)[BlocksInChunk[i].first + j + 1]);


                  f_rdf.Foreach(func, fCols);

                  chunkEntry++;

               }

            }

         }


         // reset dataframe

         ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, (*fEntries)[0], (*fEntries)[fNumEntries]);


         // shuffle the data in the chunk tensor

         fTensorOperators->ShuffleTensor(ValidationChunkTensor, Tensor);

      }

   }


   void ResetDataframe() { ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, 0, fNumEntries); }


   std::vector<std::size_t> GetTrainingChunkSizes() { return fTraining->ChunksSizes; }

   std::vector<std::size_t> GetValidationChunkSizes() { return fValidation->ChunksSizes; }


   std::size_t GetNumTrainingEntries() { return fNumTrainEntries; }

   std::size_t GetNumValidationEntries() { return fNumValidationEntries; }


   void CheckIfUnique(RFlat2DMatrix &Tensor)

   {

      const auto &rvec = Tensor.fRVec;

      if (std::set<float>(rvec.begin(), rvec.end()).size() == rvec.size()) {

         std::cout << "Tensor consists of only unique elements" << std::endl;

      }

   };


   void CheckIfOverlap(RFlat2DMatrix &Tensor1, RFlat2DMatrix &Tensor2)

   {

      std::set<float> result;


      // Call the set_intersection(), which computes the

      // intersection of set1 and set2 and

      // inserts the result into the 'result' set

      std::set<float> set1(Tensor1.fRVec.begin(), Tensor1.fRVec.end());

      std::set<float> set2(Tensor2.fRVec.begin(), Tensor2.fRVec.end());

      std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), std::inserter(result, result.begin()));

      // std::list<int> result = intersection(allEntries1, allEntries2);


      if (result.size() == 0) {

         std::cout << "No overlap between the tensors" << std::endl;

      } else {

         std::cout << "Intersection between tensors: ";

         for (auto num : result) {

            std::cout << num << " ";

         }

         std::cout << std::endl;

      }

   };


   std::size_t GetNumTrainingChunks() { return fTraining->Chunks; }


   std::size_t GetNumValidationChunks() { return fValidation->Chunks; }

};


} // namespace ROOT::Experimental::Internal::ML

#endif // ROOT_INTERNAL_ML_RCHUNKLOADER

RChunkConstructor.hxx

RDataFrame.hxx

RFlat2DMatrixOperators.hxx

RFlat2DMatrix.hxx

b
#define b(i)
Definition RSha256.hxx:100

g
#define g(i)
Definition RSha256.hxx:105

a
#define a(i)
Definition RSha256.hxx:99

TRangeDynCast
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Definition TCollection.h:360

result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Definition TGWin32VirtualXProxy.cxx:174

Utils.hxx

ROOT::Detail::TRangeCast
Definition TCollection.h:313

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor
Loading chunks made in RChunkLoader into tensors from data from RDataFrame.
Definition RChunkLoader.hxx:44

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fVecSizeIdx
std::size_t fVecSizeIdx
Definition RChunkLoader.hxx:46

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fNumChunkCols
std::size_t fNumChunkCols
Definition RChunkLoader.hxx:51

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fOffset
std::size_t fOffset
Definition RChunkLoader.hxx:45

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::AssignToTensor
void AssignToTensor(const T &vec, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of vectors.
Definition RChunkLoader.hxx:59

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::AssignToTensor
void AssignToTensor(const T &val, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of single values.
Definition RChunkLoader.hxx:79

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fNumColumns
int fNumColumns
Definition RChunkLoader.hxx:54

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fI
int fI
Definition RChunkLoader.hxx:53

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fVecPadding
float fVecPadding
Definition RChunkLoader.hxx:47

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fMaxVecSizes
std::vector< std::size_t > fMaxVecSizes
Definition RChunkLoader.hxx:48

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::fChunkTensor
RFlat2DMatrix & fChunkTensor
Definition RChunkLoader.hxx:49

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::RChunkLoaderFunctor
RChunkLoaderFunctor(RFlat2DMatrix &chunkTensor, std::size_t numColumns, const std::vector< std::size_t > &maxVecSizes, float vecPadding, int i)
Definition RChunkLoader.hxx:87

ROOT::Experimental::Internal::ML::RChunkLoaderFunctor::operator()
void operator()(const ColTypes &...cols)
Definition RChunkLoader.hxx:93

ROOT::Experimental::Internal::ML::RChunkLoader
Building and loading the chunks from the blocks and chunks constructed in RChunkConstructor.
Definition RChunkLoader.hxx:112

ROOT::Experimental::Internal::ML::RChunkLoader::fNumValidationEntries
std::size_t fNumValidationEntries
Definition RChunkLoader.hxx:125

ROOT::Experimental::Internal::ML::RChunkLoader::fValidationSplit
float fValidationSplit
Definition RChunkLoader.hxx:117

ROOT::Experimental::Internal::ML::RChunkLoader::RChunkLoader
RChunkLoader(ROOT::RDF::RNode &rdf, const std::size_t chunkSize, const std::size_t blockSize, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
Definition RChunkLoader.hxx:142

ROOT::Experimental::Internal::ML::RChunkLoader::CreateTrainingChunksIntervals
void CreateTrainingChunksIntervals()
Create training chunks consisiting of block intervals of different types.
Definition RChunkLoader.hxx:255

ROOT::Experimental::Internal::ML::RChunkLoader::fNumCols
std::size_t fNumCols
Definition RChunkLoader.hxx:130

ROOT::Experimental::Internal::ML::RChunkLoader::CheckIfUnique
void CheckIfUnique(RFlat2DMatrix &Tensor)
Definition RChunkLoader.hxx:440

ROOT::Experimental::Internal::ML::RChunkLoader::fNumChunkCols
std::size_t fNumChunkCols
Definition RChunkLoader.hxx:122

ROOT::Experimental::Internal::ML::RChunkLoader::GetNumValidationChunks
std::size_t GetNumValidationChunks()
Definition RChunkLoader.hxx:473

ROOT::Experimental::Internal::ML::RChunkLoader::SplitDataset
void SplitDataset()
Distribute the blocks into training and validation datasets.
Definition RChunkLoader.hxx:180

ROOT::Experimental::Internal::ML::RChunkLoader::fNumEntries
std::size_t fNumEntries
Definition RChunkLoader.hxx:114

ROOT::Experimental::Internal::ML::RChunkLoader::GetValidationChunkSizes
std::vector< std::size_t > GetValidationChunkSizes()
Definition RChunkLoader.hxx:435

ROOT::Experimental::Internal::ML::RChunkLoader::fCols
std::vector< std::string > fCols
Definition RChunkLoader.hxx:129

ROOT::Experimental::Internal::ML::RChunkLoader::GetNumTrainingEntries
std::size_t GetNumTrainingEntries()
Definition RChunkLoader.hxx:437

ROOT::Experimental::Internal::ML::RChunkLoader::fShuffle
bool fShuffle
Definition RChunkLoader.hxx:134

ROOT::Experimental::Internal::ML::RChunkLoader::f_rdf
ROOT::RDF::RNode & f_rdf
Definition RChunkLoader.hxx:128

ROOT::Experimental::Internal::ML::RChunkLoader::fChunkSize
std::size_t fChunkSize
Definition RChunkLoader.hxx:115

ROOT::Experimental::Internal::ML::RChunkLoader::fNumTrainEntries
std::size_t fNumTrainEntries
Definition RChunkLoader.hxx:124

ROOT::Experimental::Internal::ML::RChunkLoader::ResetDataframe
void ResetDataframe()
Definition RChunkLoader.hxx:432

ROOT::Experimental::Internal::ML::RChunkLoader::GetNumValidationEntries
std::size_t GetNumValidationEntries()
Definition RChunkLoader.hxx:438

ROOT::Experimental::Internal::ML::RChunkLoader::CreateValidationChunksIntervals
void CreateValidationChunksIntervals()
Create training chunks consisiting of block intervals of different types.
Definition RChunkLoader.hxx:295

ROOT::Experimental::Internal::ML::RChunkLoader::LoadTrainingChunk
void LoadTrainingChunk(RFlat2DMatrix &TrainChunkTensor, std::size_t chunk)
Load the nth chunk from the training dataset into a tensor.
Definition RChunkLoader.hxx:333

ROOT::Experimental::Internal::ML::RChunkLoader::fBlockSize
std::size_t fBlockSize
Definition RChunkLoader.hxx:116

ROOT::Experimental::Internal::ML::RChunkLoader::fSumVecSizes
std::size_t fSumVecSizes
Definition RChunkLoader.hxx:120

ROOT::Experimental::Internal::ML::RChunkLoader::fVecPadding
std::size_t fVecPadding
Definition RChunkLoader.hxx:121

ROOT::Experimental::Internal::ML::RChunkLoader::fNotFiltered
bool fNotFiltered
Definition RChunkLoader.hxx:133

ROOT::Experimental::Internal::ML::RChunkLoader::fTraining
std::unique_ptr< RChunkConstructor > fTraining
Definition RChunkLoader.hxx:138

ROOT::Experimental::Internal::ML::RChunkLoader::GetNumTrainingChunks
std::size_t GetNumTrainingChunks()
Definition RChunkLoader.hxx:471

ROOT::Experimental::Internal::ML::RChunkLoader::fSetSeed
std::size_t fSetSeed
Definition RChunkLoader.hxx:131

ROOT::Experimental::Internal::ML::RChunkLoader::fVecSizes
std::vector< std::size_t > fVecSizes
Definition RChunkLoader.hxx:119

ROOT::Experimental::Internal::ML::RChunkLoader::fValidation
std::unique_ptr< RChunkConstructor > fValidation
Definition RChunkLoader.hxx:139

ROOT::Experimental::Internal::ML::RChunkLoader::fEntries
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
Definition RChunkLoader.hxx:136

ROOT::Experimental::Internal::ML::RChunkLoader::CheckIfOverlap
void CheckIfOverlap(RFlat2DMatrix &Tensor1, RFlat2DMatrix &Tensor2)
Definition RChunkLoader.hxx:448

ROOT::Experimental::Internal::ML::RChunkLoader::fTensorOperators
std::unique_ptr< RFlat2DMatrixOperators > fTensorOperators
Definition RChunkLoader.hxx:126

ROOT::Experimental::Internal::ML::RChunkLoader::GetTrainingChunkSizes
std::vector< std::size_t > GetTrainingChunkSizes()
Definition RChunkLoader.hxx:434

ROOT::Experimental::Internal::ML::RChunkLoader::LoadValidationChunk
void LoadValidationChunk(RFlat2DMatrix &ValidationChunkTensor, std::size_t chunk)
Load the nth chunk from the validation dataset into a tensor.
Definition RChunkLoader.hxx:385

ROOT::RDF::RInterface
The public interface to the RDataFrame federation of classes.
Definition RInterface.hxx:123

ROOT::RDF::RInterface::Take
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
Definition RInterface.hxx:1845

ROOT::RDF::RInterface::Foreach
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
Definition RInterface.hxx:1695

ROOT::RDF::RResultPtr
Smart pointer for the return type of actions.
Definition RResultPtr.hxx:131

ROOT::RRangeCast::begin
const_iterator begin() const
Definition RRangeCast.hxx:104

ROOT::RRangeCast::end
const_iterator end() const
Definition RRangeCast.hxx:105

ROOT::Experimental::Internal::ML
Definition RBatchGenerator.hxx:38

ROOT::Internal::RDF::ChangeBeginAndEndEntries
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
Definition RInterface.cxx:20

ROOT::Detail::indices
Definition span.hxx:68

ROOT::Experimental::Internal::ML::RFlat2DMatrix
Wrapper around ROOT::RVec<float> representing a 2D matrix.
Definition RFlat2DMatrix.hxx:13

ROOT::Experimental::Internal::ML::RFlat2DMatrix::GetData
float * GetData()
Definition RFlat2DMatrix.hxx:22

vec
Definition civetweb.c:1910