11#ifndef ROOT_INTERNAL_ML_RDATASETLOADER
12#define ROOT_INTERNAL_ML_RDATASETLOADER
34template <
typename... ColTypes>
49 template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value,
int> = 0>
53 std::size_t vec_size =
vec.size();
54 if (vec_size < max_vec_size)
68 template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value,
int> = 0>
77 const std::vector<std::size_t> &maxVecSizes,
float vecPadding,
int i)
102template <
typename... Args>
134 RDatasetLoader(
const std::vector<ROOT::RDF::RNode> &rdfs,
const float validationSplit,
135 const std::vector<std::string> &cols,
const std::vector<std::size_t> &vecSizes = {},
136 const float vecPadding = 0.0,
bool shuffle =
true,
const std::size_t setSeed = 0)
160 const std::size_t NumEntries = Entries->size();
163 Entries->push_back((*Entries)[NumEntries - 1] + 1);
166 std::size_t NumValidationEntries =
static_cast<std::size_t
>(
fValidationSplit * NumEntries);
167 std::size_t NumTrainingEntries = NumEntries - NumValidationEntries;
178 std::size_t datasetEntry = 0;
179 for (std::size_t j = 0; j < NumEntries; j++) {
205 for (
auto &rdf :
f_rdfs) {
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Loading chunks made in RDatasetLoader into tensors from data from RDataFrame.
void operator()(const ColTypes &...cols)
RFlat2DMatrix & fDatasetTensor
void AssignToTensor(const T &val, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of single values.
std::size_t fNumDatasetCols
std::vector< std::size_t > fMaxVecSizes
void AssignToTensor(const T &vec, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of vectors.
RDatasetLoaderFunctor(RFlat2DMatrix &datasetTensor, std::size_t numColumns, const std::vector< std::size_t > &maxVecSizes, float vecPadding, int i)
std::vector< std::size_t > fVecSizes
std::vector< ROOT::RDF::RNode > f_rdfs
void SplitDataframe(ROOT::RDF::RNode &rdf, RFlat2DMatrix &TrainingDataset, RFlat2DMatrix &ValidationDataset)
Split an individual dataframe into a training and validation dataset.
std::size_t GetNumValidationEntries()
std::vector< RFlat2DMatrix > fValidationDatasets
std::unique_ptr< RFlat2DMatrixOperators > fTensorOperators
std::vector< RFlat2DMatrix > GetTrainingDatasets()
std::size_t fNumValidationEntries
void SplitDatasets()
Split the dataframes in a training and validation dataset.
std::vector< std::string > fCols
std::vector< RFlat2DMatrix > GetValidationDatasets()
RFlat2DMatrix GetValidationDataset()
std::vector< RFlat2DMatrix > fTrainingDatasets
RFlat2DMatrix GetTrainingDataset()
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
RDatasetLoader(const std::vector< ROOT::RDF::RNode > &rdfs, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
RFlat2DMatrix fValidationDataset
void ConcatenateDatasets()
Concatenate the datasets to a dataset.
std::size_t fNumTrainingEntries
RFlat2DMatrix fTrainingDataset
std::size_t fNumDatasetCols
std::size_t GetNumTrainingEntries()
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
Smart pointer for the return type of actions.
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
RInterface<::ROOT::Detail::RDF::RNodeBase > RNode
Wrapper around ROOT::RVec<float> representing a 2D matrix.
std::size_t GetRows() const