Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RBatchLoader.hxx
Go to the documentation of this file.
1// Author: Dante Niewenhuis, VU Amsterdam 07/2023
2// Author: Kristupas Pranckietis, Vilnius University 05/2024
3// Author: Nopphakorn Subsa-Ard, King Mongkut's University of Technology Thonburi (KMUTT) (TH) 08/2024
4// Author: Vincenzo Eduardo Padulano, CERN 10/2024
5// Author: Martin Føll, University of Oslo (UiO) & CERN 05/2025
6// Author: Silia Taider, CERN 02/2026
7
8/*************************************************************************
9 * Copyright (C) 1995-2025, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_INTERNAL_ML_RBATCHLOADER
17#define ROOT_INTERNAL_ML_RBATCHLOADER
18
19#include <condition_variable>
20#include <memory>
21#include <mutex>
22#include <queue>
23#include <string>
24#include <vector>
25
27
29/**
30 \class ROOT::Experimental::Internal::ML::RBatchLoader
31
32\brief Building and loading the batches from loaded chunks in RChunkLoader
33
34In this class the chunks that are loaded into memory (see RChunkLoader) are split into batches used in the ML training
35which are loaded into a queue. This is done for both the training and validation chunks separately.
36*/
37
39private:
40 std::size_t fBatchSize;
41 // needed for calculating the total number of batch columns when vectors columns are present
42 std::vector<std::string> fCols;
43 std::mutex &fLock;
44 std::condition_variable &fCV;
45 std::vector<std::size_t> fVecSizes;
46 std::size_t fSumVecSizes;
47 std::size_t fNumColumns;
48 std::size_t fNumEntries;
50
51 std::size_t fNumFullBatches;
52 std::size_t fNumBatches;
53 std::size_t fLeftoverBatchSize;
54
55 bool fIsActive = false;
56 bool fProducerDone = true;
57
58 // queues of flattened tensors (rows * cols)
59 std::queue<std::unique_ptr<RFlat2DMatrix>> fBatchQueue;
60
61 // current batch that is loaded into memory
62 std::unique_ptr<RFlat2DMatrix> fCurrentBatch;
63
64 // primary and secondary leftover batches used to create batches from a chunk
65 std::unique_ptr<RFlat2DMatrix> fPrimaryLeftoverBatch;
66 std::unique_ptr<RFlat2DMatrix> fSecondaryLeftoverBatch;
67
68public:
69 RBatchLoader(std::size_t batchSize, const std::vector<std::string> &cols, std::mutex &sharedMutex,
70 std::condition_variable &sharedCV, const std::vector<std::size_t> &vecSizes = {},
71 std::size_t numEntries = 0, bool dropRemainder = false);
72
73 void Activate();
74 void DeActivate();
75 void Reset();
76 void MarkProducerDone();
77
78 std::unique_ptr<RFlat2DMatrix> CreateBatch(RFlat2DMatrix &chunkTensor, std::size_t idxs);
81
82 bool isProducerDone() { return fProducerDone; }
83 std::size_t GetNumBatches() { return fNumBatches; }
84 std::size_t GetNumEntries() { return fNumEntries; }
85 std::size_t GetNumRemainderRows() { return fLeftoverBatchSize; }
86 std::size_t GetNumBatchQueue() { return fBatchQueue.size(); }
87};
88
89} // namespace ROOT::Experimental::Internal::ML
90
91#endif // ROOT_INTERNAL_ML_RBATCHLOADER
Building and loading the batches from loaded chunks in RChunkLoader.
RFlat2DMatrix GetBatch()
Loading the batch from the queue.
std::unique_ptr< RFlat2DMatrix > fSecondaryLeftoverBatch
std::queue< std::unique_ptr< RFlat2DMatrix > > fBatchQueue
void Reset()
Reset the batchloader state.
void Activate()
Activate the batchloader. This means that batches can be created and loaded.
void MarkProducerDone()
Signal that the producer has finished pushing all batches for this epoch.
void CreateBatches(RFlat2DMatrix &chunkTensor, bool isLastBatch)
Creating the batches from a chunk and add them to the queue.
void DeActivate()
DeActivate the batchloader.
std::unique_ptr< RFlat2DMatrix > fPrimaryLeftoverBatch
std::unique_ptr< RFlat2DMatrix > fCurrentBatch
RBatchLoader(std::size_t batchSize, const std::vector< std::string > &cols, std::mutex &sharedMutex, std::condition_variable &sharedCV, const std::vector< std::size_t > &vecSizes={}, std::size_t numEntries=0, bool dropRemainder=false)
std::unique_ptr< RFlat2DMatrix > CreateBatch(RFlat2DMatrix &chunkTensor, std::size_t idxs)
Return a batch of data as a unique pointer.
Wrapper around ROOT::RVec<float> representing a 2D matrix.