15#ifndef ROOT_INTERNAL_ML_RCHUNKLOADER
16#define ROOT_INTERNAL_ML_RCHUNKLOADER
104template <
typename... Args>
175 std::random_device
rd;
187 for (
size_t i = 0; i <
fTraining->NumberOfDifferentBlocks.size(); i++) {
191 for (
size_t i = 0; i <
fValidation->NumberOfDifferentBlocks.size(); i++) {
198 for (
int i = 0; i <
indices.size(); ++i) {
222 std::vector<std::pair<Long_t, Long_t>> BlockIntervals;
230 for (
int i = 0; i < BlockIntervals.size(); ++i) {
251 std::random_device
rd;
262 std::shuffle(
fTraining->FullBlockIntervalsInFullChunks.begin(),
263 fTraining->FullBlockIntervalsInFullChunks.end(),
g);
264 std::shuffle(
fTraining->LeftoverBlockIntervalsInFullChunks.begin(),
265 fTraining->LeftoverBlockIntervalsInFullChunks.end(),
g);
266 std::shuffle(
fTraining->FullBlockIntervalsInLeftoverChunks.begin(),
267 fTraining->FullBlockIntervalsInLeftoverChunks.end(),
g);
268 std::shuffle(
fTraining->LeftoverBlockIntervalsInLeftoverChunks.begin(),
269 fTraining->LeftoverBlockIntervalsInLeftoverChunks.end(),
g);
290 std::random_device
rd;
300 std::shuffle(
fValidation->FullBlockIntervalsInFullChunks.begin(),
302 std::shuffle(
fValidation->LeftoverBlockIntervalsInFullChunks.begin(),
303 fValidation->LeftoverBlockIntervalsInFullChunks.end(),
g);
304 std::shuffle(
fValidation->FullBlockIntervalsInLeftoverChunks.begin(),
305 fValidation->FullBlockIntervalsInLeftoverChunks.end(),
g);
306 std::shuffle(
fValidation->LeftoverBlockIntervalsInLeftoverChunks.begin(),
307 fValidation->LeftoverBlockIntervalsInLeftoverChunks.end(),
g);
340 [](
const std::pair<Long_t, Long_t> &
a,
const std::pair<Long_t, Long_t> &
b) { return a.first < b.first; });
356 for (std::size_t
j = 0;
j < blockSize;
j++) {
391 [](
const std::pair<Long_t, Long_t> &
a,
const std::pair<Long_t, Long_t> &
b) { return a.first < b.first; });
406 for (std::size_t
j = 0;
j < blockSize;
j++) {
437 std::cout <<
"Tensor consists of only unique elements" << std::endl;
454 std::cout <<
"No overlap between the tensors" << std::endl;
456 std::cout <<
"Intersection between tensors: ";
458 std::cout << num <<
" ";
460 std::cout << std::endl;
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Loading chunks made in RChunkLoader into tensors from data from RDataFrame.
std::size_t fNumChunkCols
void AssignToTensor(const T &vec, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of vectors.
void AssignToTensor(const T &val, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of single values.
std::vector< std::size_t > fMaxVecSizes
RFlat2DMatrix & fChunkTensor
RChunkLoaderFunctor(RFlat2DMatrix &chunkTensor, std::size_t numColumns, const std::vector< std::size_t > &maxVecSizes, float vecPadding, int i)
void operator()(const ColTypes &...cols)
Building and loading the chunks from the blocks and chunks constructed in RChunkConstructor.
std::size_t fNumValidationEntries
RChunkLoader(ROOT::RDF::RNode &rdf, const std::size_t chunkSize, const std::size_t blockSize, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
void CreateTrainingChunksIntervals()
Create training chunks consisiting of block intervals of different types.
void CheckIfUnique(RFlat2DMatrix &Tensor)
std::size_t fNumChunkCols
std::size_t GetNumValidationChunks()
void SplitDataset()
Distribute the blocks into training and validation datasets.
std::vector< std::size_t > GetValidationChunkSizes()
std::vector< std::string > fCols
std::size_t GetNumTrainingEntries()
std::size_t fNumTrainEntries
std::size_t GetNumValidationEntries()
void CreateValidationChunksIntervals()
Create training chunks consisiting of block intervals of different types.
void LoadTrainingChunk(RFlat2DMatrix &TrainChunkTensor, std::size_t chunk)
Load the nth chunk from the training dataset into a tensor.
std::unique_ptr< RChunkConstructor > fTraining
std::size_t GetNumTrainingChunks()
std::vector< std::size_t > fVecSizes
std::unique_ptr< RChunkConstructor > fValidation
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
void CheckIfOverlap(RFlat2DMatrix &Tensor1, RFlat2DMatrix &Tensor2)
std::unique_ptr< RFlat2DMatrixOperators > fTensorOperators
std::vector< std::size_t > GetTrainingChunkSizes()
void LoadValidationChunk(RFlat2DMatrix &ValidationChunkTensor, std::size_t chunk)
Load the nth chunk from the validation dataset into a tensor.
The public interface to the RDataFrame federation of classes.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
Smart pointer for the return type of actions.
const_iterator begin() const
const_iterator end() const
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
Wrapper around ROOT::RVec<float> representing a 2D matrix.