15#ifndef TMVA_RCHUNKLOADER
16#define TMVA_RCHUNKLOADER
30namespace Experimental {
107template <
typename... Args>
179 std::random_device
rd;
191 for (
size_t i = 0; i <
fTraining->NumberOfDifferentBlocks.size(); i++) {
195 for (
size_t i = 0; i <
fValidation->NumberOfDifferentBlocks.size(); i++) {
200 std::vector<Long_t> indices(
BlockSizes.size());
202 for (
int i = 0; i < indices.size(); ++i) {
208 std::shuffle(indices.begin(), indices.end(),
g);
226 std::vector<std::pair<Long_t, Long_t>> BlockIntervals;
234 for (
int i = 0; i < BlockIntervals.size(); ++i) {
255 std::random_device
rd;
266 std::shuffle(
fTraining->FullBlockIntervalsInFullChunks.begin(),
267 fTraining->FullBlockIntervalsInFullChunks.end(),
g);
268 std::shuffle(
fTraining->LeftoverBlockIntervalsInFullChunks.begin(),
269 fTraining->LeftoverBlockIntervalsInFullChunks.end(),
g);
270 std::shuffle(
fTraining->FullBlockIntervalsInLeftoverChunks.begin(),
271 fTraining->FullBlockIntervalsInLeftoverChunks.end(),
g);
272 std::shuffle(
fTraining->LeftoverBlockIntervalsInLeftoverChunks.begin(),
273 fTraining->LeftoverBlockIntervalsInLeftoverChunks.end(),
g);
294 std::random_device
rd;
304 std::shuffle(
fValidation->FullBlockIntervalsInFullChunks.begin(),
306 std::shuffle(
fValidation->LeftoverBlockIntervalsInFullChunks.begin(),
307 fValidation->LeftoverBlockIntervalsInFullChunks.end(),
g);
308 std::shuffle(
fValidation->FullBlockIntervalsInLeftoverChunks.begin(),
309 fValidation->FullBlockIntervalsInLeftoverChunks.end(),
g);
310 std::shuffle(
fValidation->LeftoverBlockIntervalsInLeftoverChunks.begin(),
311 fValidation->LeftoverBlockIntervalsInLeftoverChunks.end(),
g);
343 [](
const std::pair<Long_t, Long_t>&
a,
const std::pair<Long_t, Long_t>&
b) {
344 return a.first < b.first;
361 for (std::size_t
j = 0;
j < blockSize;
j++) {
392 [](
const std::pair<Long_t, Long_t>&
a,
const std::pair<Long_t, Long_t>&
b) {
393 return a.first < b.first;
409 for (std::size_t
j = 0;
j < blockSize;
j++) {
440 std::cout <<
"Tensor consists of only unique elements" << std::endl;
457 std::cout <<
"No overlap between the tensors" << std::endl;
459 std::cout <<
"Intersection between tensors: ";
461 std::cout << num <<
" ";
463 std::cout << std::endl;
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
The public interface to the RDataFrame federation of classes.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
Smart pointer for the return type of actions.
const_iterator begin() const
const_iterator end() const
RFlat2DMatrix & fChunkTensor
std::size_t fNumChunkCols
void AssignToTensor(const T &vec, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of vectors.
std::vector< std::size_t > fMaxVecSizes
void AssignToTensor(const T &val, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of single values.
void operator()(const ColTypes &...cols)
RChunkLoaderFunctor(RFlat2DMatrix &chunkTensor, std::size_t numColumns, const std::vector< std::size_t > &maxVecSizes, float vecPadding, int i)
std::size_t GetNumValidationEntries()
void CheckIfUnique(RFlat2DMatrix &Tensor)
std::unique_ptr< RFlat2DMatrixOperators > fTensorOperators
std::unique_ptr< RChunkConstructor > fValidation
void LoadTrainingChunk(RFlat2DMatrix &TrainChunkTensor, std::size_t chunk)
Load the nth chunk from the training dataset into a tensor.
std::vector< std::string > fCols
std::size_t GetNumTrainingEntries()
std::size_t GetNumValidationChunks()
void CheckIfOverlap(RFlat2DMatrix &Tensor1, RFlat2DMatrix &Tensor2)
void LoadValidationChunk(RFlat2DMatrix &ValidationChunkTensor, std::size_t chunk)
Load the nth chunk from the validation dataset into a tensor.
std::vector< std::size_t > GetTrainingChunkSizes()
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
std::vector< std::size_t > GetValidationChunkSizes()
RChunkLoader(ROOT::RDF::RNode &rdf, const std::size_t chunkSize, const std::size_t blockSize, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
void SplitDataset()
Distribute the blocks into training and validation datasets.
void CreateValidationChunksIntervals()
Create training chunks consisiting of block intervals of different types.
void CreateTrainingChunksIntervals()
Create training chunks consisiting of block intervals of different types.
std::vector< std::size_t > fVecSizes
std::size_t fNumTrainEntries
std::size_t fNumChunkCols
std::unique_ptr< RChunkConstructor > fTraining
std::size_t fNumValidationEntries
std::size_t GetNumTrainingChunks()
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
create variable transformations
Wrapper around ROOT::RVec<float> representing a 2D matrix.