Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RChunkLoader.hxx
Go to the documentation of this file.
1#ifndef TMVA_CHUNKLOADER
2#define TMVA_CHUNKLOADER
3
4#include <iostream>
5#include <vector>
6
7#include "TMVA/RTensor.hxx"
8#include "ROOT/RDataFrame.hxx"
9#include "ROOT/RVec.hxx"
10
11#include "ROOT/RLogger.hxx"
12
13namespace TMVA {
14namespace Experimental {
15namespace Internal {
16
17// RChunkLoader class used to load content of a RDataFrame onto a RTensor.
18template <typename First, typename... Rest>
20
21private:
22 std::size_t fOffset = 0;
23 std::size_t fVecSizeIdx = 0;
24 std::vector<std::size_t> fMaxVecSizes;
25
27
29
30 /// \brief Load the final given value into fChunkTensor
31 /// \tparam First_T
32 /// \param first
33 template <typename First_T>
34 void AssignToTensor(First_T first)
35 {
36 fChunkTensor.GetData()[fOffset++] = first;
37 }
38
39 /// \brief Load the final given value into fChunkTensor
40 /// \tparam VecType
41 /// \param first
42 template <typename VecType>
44 {
45 AssignVector(first);
46 }
47
48 /// \brief Recursively loop through the given values, and load them onto the fChunkTensor
49 /// \tparam First_T
50 /// \tparam ...Rest_T
51 /// \param first
52 /// \param ...rest
53 template <typename First_T, typename... Rest_T>
54 void AssignToTensor(First_T first, Rest_T... rest)
55 {
56 fChunkTensor.GetData()[fOffset++] = first;
57
58 AssignToTensor(std::forward<Rest_T>(rest)...);
59 }
60
61 /// \brief Recursively loop through the given values, and load them onto the fChunkTensor
62 /// \tparam VecType
63 /// \tparam ...Rest_T
64 /// \param first
65 /// \param ...rest
66 template <typename VecType, typename... Rest_T>
67 void AssignToTensor(const ROOT::RVec<VecType> &first, Rest_T... rest)
68 {
69 AssignVector(first);
70
71 AssignToTensor(std::forward<Rest_T>(rest)...);
72 }
73
74 /// \brief Loop through the values of a given vector and load them into the RTensor
75 /// Note: the given vec_size does not have to be the same size as the given vector
76 /// If the size is bigger than the given vector, zeros are used as padding.
77 /// If the size is smaller, the remaining values are ignored.
78 /// \tparam VecType
79 /// \param vec
80 template <typename VecType>
82 {
83 std::size_t max_vec_size = fMaxVecSizes[fVecSizeIdx++];
84 std::size_t vec_size = vec.size();
85
86 for (std::size_t i = 0; i < max_vec_size; i++) {
87 if (i < vec_size) {
89 } else {
91 }
92 }
93 }
94
95public:
97 const std::vector<std::size_t> &maxVecSizes = std::vector<std::size_t>(),
98 const float vecPadding = 0.0)
99 : fChunkTensor(chunkTensor), fMaxVecSizes(maxVecSizes), fVecPadding(vecPadding)
100 {
101 }
102
103 /// \brief Loop through all columns of an event and put their values into an RTensor
104 /// \param first
105 /// \param ...rest
106 void operator()(First first, Rest... rest)
107 {
108 fVecSizeIdx = 0;
109 AssignToTensor(std::forward<First>(first), std::forward<Rest>(rest)...);
110 }
111};
112
113template <typename... Args>
115
116private:
117 std::string fTreeName;
118 std::string fFileName;
119 std::size_t fChunkSize;
120 std::size_t fNumColumns;
121
122 std::vector<std::string> fCols;
123 std::string fFilters;
124
125 std::vector<std::size_t> fVecSizes;
126 std::size_t fVecPadding;
127
128public:
129 /// \brief Constructor for the RChunkLoader
130 /// \param treeName
131 /// \param fileName
132 /// \param chunkSize
133 /// \param cols
134 /// \param filters
135 /// \param vecSizes
136 /// \param vecPadding
137 RChunkLoader(const std::string &treeName, const std::string &fileName, const std::size_t chunkSize,
138 const std::vector<std::string> &cols, const std::string &filters = "",
139 const std::vector<std::size_t> &vecSizes = {}, const float vecPadding = 0.0)
140 : fTreeName(treeName),
141 fFileName(fileName),
142 fChunkSize(chunkSize),
143 fCols(cols),
145 fVecSizes(vecSizes),
146 fVecPadding(vecPadding),
147 fNumColumns(cols.size())
148 {
149 }
150
151 /// \brief Load a chunk of data using the RChunkLoaderFunctor
152 /// \param chunkTensor
153 /// \param currentRow
154 /// \return A pair of size_t defining the number of events processed and how many passed all filters
155 std::pair<std::size_t, std::size_t>
156 LoadChunk(TMVA::Experimental::RTensor<float> &chunkTensor, const std::size_t currentRow)
157 {
158 RChunkLoaderFunctor<Args...> func(chunkTensor, fVecSizes, fVecPadding);
159
160 // Create TDataFrame of the chunk
161 // Use RDatasetSpec to start reading at the current row
162 long long start_l = currentRow;
166 .WithGlobalRange({start_l, std::numeric_limits<Long64_t>::max()});
167
168 ROOT::RDataFrame x_rdf(x_spec);
169
170 // Load events if filters are given
171 if (fFilters.size() > 0) {
172 return loadFiltered(x_rdf, func);
173 }
174
175 // load events if no filters are given
176 return loadNonFiltered(x_rdf, func);
177 }
178
179private:
180 /// \brief Add filters to the RDataFrame and load a chunk of data
181 /// \param x_rdf
182 /// \param func
183 /// \return A pair of size_t defining the number of events processed and how many passed all filters
184 std::pair<std::size_t, std::size_t> loadFiltered(ROOT::RDataFrame &x_rdf, RChunkLoaderFunctor<Args...> &func)
185 {
186 // Add the given filters to the RDataFrame
187 auto x_filter = x_rdf.Filter(fFilters, "RBatchGenerator_Filter");
188
189 // add range to the DataFrame
190 auto x_ranged = x_filter.Range(fChunkSize);
191 auto myReport = x_ranged.Report();
192
193 // load data
194 x_ranged.Foreach(func, fCols);
195
196 // Use the report to gather the number of events processed and passed.
197 // passed_events is used to determine the starting event of the next chunk
198 // processed_events is used to determine if the end of the database is reached.
199 std::size_t processed_events = myReport.begin()->GetAll();
200 std::size_t passed_events = (myReport.end() - 1)->GetPass();
201
202 return std::make_pair(processed_events, passed_events);
203 }
204
205 /// \brief Loop over the events in the dataframe untill either the end of the dataframe
206 /// is reached, or a full chunk is loaded
207 /// \param x_rdf
208 /// \param func
209 /// \return A pair of size_t defining the number of events processed and how many passed all filters
210 std::pair<std::size_t, std::size_t> loadNonFiltered(ROOT::RDataFrame &x_rdf, RChunkLoaderFunctor<Args...> &func)
211 {
212 // add range
213 auto x_ranged = x_rdf.Range(fChunkSize);
214 // auto x_ranged = x_rdf.Range(currentRow, currentRow + fChunkSize);
215 auto myCount = x_ranged.Count();
216
217 // load data
218 x_ranged.Foreach(func, fCols);
219
220 // get loading info
221 std::size_t processed_events = myCount.GetValue();
222 std::size_t passed_events = myCount.GetValue();
223 return std::make_pair(processed_events, passed_events);
224 }
225};
226
227} // namespace Internal
228} // namespace Experimental
229} // namespace TMVA
230#endif // TMVA_CHUNKLOADER
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
const char * filters[]
The dataset specification for RDataFrame.
RDatasetSpec & AddSample(RSample sample)
Add sample (RSample class object) to the RDatasetSpec object.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1529
void AssignVector(const ROOT::RVec< VecType > &vec)
Loop through the values of a given vector and load them into the RTensor Note: the given vec_size doe...
void AssignToTensor(const ROOT::RVec< VecType > &first, Rest_T... rest)
Recursively loop through the given values, and load them onto the fChunkTensor.
void AssignToTensor(First_T first)
Load the final given value into fChunkTensor.
void AssignToTensor(First_T first, Rest_T... rest)
Recursively loop through the given values, and load them onto the fChunkTensor.
RChunkLoaderFunctor(TMVA::Experimental::RTensor< float > &chunkTensor, const std::vector< std::size_t > &maxVecSizes=std::vector< std::size_t >(), const float vecPadding=0.0)
void operator()(First first, Rest... rest)
Loop through all columns of an event and put their values into an RTensor.
void AssignToTensor(const ROOT::RVec< VecType > &first)
Load the final given value into fChunkTensor.
TMVA::Experimental::RTensor< float > & fChunkTensor
std::pair< std::size_t, std::size_t > loadNonFiltered(ROOT::RDataFrame &x_rdf, RChunkLoaderFunctor< Args... > &func)
Loop over the events in the dataframe untill either the end of the dataframe is reached,...
std::pair< std::size_t, std::size_t > loadFiltered(ROOT::RDataFrame &x_rdf, RChunkLoaderFunctor< Args... > &func)
Add filters to the RDataFrame and load a chunk of data.
std::pair< std::size_t, std::size_t > LoadChunk(TMVA::Experimental::RTensor< float > &chunkTensor, const std::size_t currentRow)
Load a chunk of data using the RChunkLoaderFunctor.
RChunkLoader(const std::string &treeName, const std::string &fileName, const std::size_t chunkSize, const std::vector< std::string > &cols, const std::string &filters="", const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0)
Constructor for the RChunkLoader.
RTensor is a container with contiguous memory and shape information.
Definition RTensor.hxx:162
create variable transformations