Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDatasetSpec.cxx
Go to the documentation of this file.
1// Author: Vincenzo Eduardo Padulano CERN/UPV, Ivan Kabadzhov CERN 06/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include <stdexcept> // std::logic_error
13
14namespace ROOT {
15
16namespace RDF {
17
18namespace Experimental {
19
21
23
24RDatasetSpec::REntryRange::REntryRange(Long64_t begin, Long64_t end) : fBegin(begin), fEnd(end)
25{
26 if (fBegin > fEnd)
27 throw std::logic_error("The starting entry cannot be larger than the ending entry in the "
28 "creation of a dataset specification.");
29}
30
31////////////////////////////////////////////////////////////////////////////////
32/// \brief Returns the collection of the dataset's sample names.
33const std::vector<std::string> RDatasetSpec::GetSampleNames() const
34{
35 std::vector<std::string> sampleNames;
36 sampleNames.reserve(fSamples.size());
37 for (const auto &sample : fSamples)
38 sampleNames.emplace_back(sample.GetSampleName());
39 return sampleNames;
40}
41
42////////////////////////////////////////////////////////////////////////////////
43/// \brief Returns the collection of the dataset's tree names.
44const std::vector<std::string> RDatasetSpec::GetTreeNames() const
45{
46 std::vector<std::string> treeNames;
47 for (const auto &sample : fSamples) {
48 const auto &trees = sample.GetTreeNames();
49 treeNames.insert(std::end(treeNames), std::begin(trees), std::end(trees));
50 }
51 return treeNames;
52}
53////////////////////////////////////////////////////////////////////////////////
54/// \brief Returns the collection of the dataset's paths to files, or globs if specified in input.
55const std::vector<std::string> RDatasetSpec::GetFileNameGlobs() const
56{
57 std::vector<std::string> fileNames;
58 for (const auto &sample : fSamples) {
59 const auto &files = sample.GetFileNameGlobs();
60 fileNames.insert(std::end(fileNames), std::begin(files), std::end(files));
61 }
62 return fileNames;
63}
64////////////////////////////////////////////////////////////////////////////////
65/// \brief Returns the collection of the dataset's metadata (RMetaData class objects).
66const std::vector<RMetaData> RDatasetSpec::GetMetaData() const
67{
68 std::vector<RMetaData> metaDatas;
69 metaDatas.reserve(fSamples.size());
70 for (const auto &sample : fSamples)
71 metaDatas.emplace_back(sample.GetMetaData());
72 return metaDatas;
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// \brief Returns the reference to the friend tree information.
78{
79 return fFriendInfo;
80}
81
82////////////////////////////////////////////////////////////////////////////////
83/// \brief Returns the first entry as defined by the global range provided in the specification.
84/// The first entry is inclusive.
86{
87 return fEntryRange.fBegin;
88}
89
90////////////////////////////////////////////////////////////////////////////////
91/// \brief Returns the last entry as defined by the global range provided in the specification.
92/// The last entry is exclusive.
94{
95 return fEntryRange.fEnd;
96}
97
98////////////////////////////////////////////////////////////////////////////////
99/// \brief Returns a collection of instances of the RSample class.
100/// RSample class represents a sample i.e. a grouping of trees (and their corresponding fileglobs) and, optionally, the
101/// sample's metadata.
102std::vector<RSample> RDatasetSpec::MoveOutSamples()
103{
104 return std::move(fSamples);
105}
106
107////////////////////////////////////////////////////////////////////////////////
108/// \brief Add sample (RSample class object) to the RDatasetSpec object.
109/// \param[in] sample RSample class object.
110/// RSample class represents a sample i.e. a grouping of trees (and their corresponding fileglobs) and, optionally, the
111/// sample's metadata.
112/// ### Example usage:
113/// Our goal is to create an RDataFrame from the RDatasetSpec object.
114/// In order to do that, we need to create an RSample object first.
115/// In order to make this example even fuller, before we create the RSample object,
116/// we also create the RMetaData object which will be associated with our RSample object.
117/// Note that adding this metadata information to the RSample object is optional.
118/// ~~~{.cpp}
119/// // Create the RMetaData object which will be used to create the RSample object.
120/// ROOT::RDF::Experimental::RMetaData meta;
121/// meta.Add("sample_name", "name");
122/// // Create the RSample object "mySample" with sample name "mySampleName", tree name "outputTree1",
123/// // file name "outputFile.root" and associated metadata information.
124/// ROOT::RDF::Experimental::RSample mySample("mySampleName", "outputTree1", "outputFile.root", meta);
125/// // Create the RDatasetSpec object to which we add the sample (RSample object).
126/// ROOT::RDF::Experimental::RDatasetSpec spec;
127/// spec.AddSample(mySample);
128/// // Finally, create an RDataFrame from the RDatasetSpec object.
129/// auto df = ROOT::RDataFrame(spec);
130/// ~~~
132{
133 sample.SetSampleId(fSamples.size());
134 fSamples.push_back(std::move(sample));
135 return *this;
136}
137
138////////////////////////////////////////////////////////////////////////////////
139/// \brief Add friend tree to RDatasetSpec object
140/// \param[in] treeName Name of the tree.
141/// \param[in] fileNameGlob Path to the file in which the tree is stored. Refer to TChain::Add for globbing rules.
142/// \param[in] alias Alias for this friend.
143///
144/// ### Example usage:
145/// ~~~{.cpp}
146/// ROOT::RDF::Experimental::RSample s("mySample", "outputTree1", "outputFile.root");
147/// ROOT::RDF::Experimental::RDatasetSpec spec;
148/// spec.AddSample(s);
149/// // Add friend tree "outputTree2" with the alias "myTreeFriend"
150/// spec.WithGlobalFriends("outputTree2", "outputFile.root", "myTreeFriend");
151/// auto df = ROOT::RDataFrame(spec);
152/// ~~~
154RDatasetSpec::WithGlobalFriends(const std::string &treeName, const std::string &fileNameGlob, const std::string &alias)
155{
156 fFriendInfo.AddFriend(treeName, fileNameGlob, alias);
157 return *this;
158}
159
160////////////////////////////////////////////////////////////////////////////////
161/// \brief Add friend tree to RDatasetSpec object
162///
163/// \param[in] treeName Name of the tree.
164/// \param[in] fileNameGlobs Collection of paths to the files in which the tree is stored. Refer to TChain::Add for
165/// globbing rules. \param[in] alias Alias for this friend.
167 const std::vector<std::string> &fileNameGlobs, const std::string &alias)
168{
169 fFriendInfo.AddFriend(treeName, fileNameGlobs, alias);
170 return *this;
171}
172
173////////////////////////////////////////////////////////////////////////////////
174/// \brief Add friend tree to RDatasetSpec object
175/// \param[in] treeAndFileNameGlobs Collection of pairs of paths to trees and paths to files.
176/// \param[in] alias Alias for this friend.
178RDatasetSpec::WithGlobalFriends(const std::vector<std::pair<std::string, std::string>> &treeAndFileNameGlobs,
179 const std::string &alias)
180{
181 fFriendInfo.AddFriend(treeAndFileNameGlobs, alias);
182 return *this;
183}
184
185////////////////////////////////////////////////////////////////////////////////
186/// \brief Add friend tree to RDatasetSpec object
187/// \param[in] treeNames Collection of paths to trees.
188/// \param[in] fileNameGlobs Collection of paths to files.
189/// \param[in] alias Alias for this friend.
190RDatasetSpec &RDatasetSpec::WithGlobalFriends(const std::vector<std::string> &treeNames,
191 const std::vector<std::string> &fileNameGlobs, const std::string &alias)
192{
193 if (treeNames.size() != 1 && treeNames.size() != fileNameGlobs.size())
194 throw std::logic_error("Mismatch between number of trees and file globs.");
195 std::vector<std::pair<std::string, std::string>> target;
196 target.reserve(fileNameGlobs.size());
197 for (auto i = 0u; i < fileNameGlobs.size(); ++i)
198 target.emplace_back(std::make_pair((treeNames.size() == 1u ? treeNames[0] : treeNames[i]), fileNameGlobs[i]));
200 return *this;
201}
202
203////////////////////////////////////////////////////////////////////////////////
204/// \brief Create an RDatasetSpec object for a given range of entries
205/// \param[in] entryRange
206///
207/// ## Example usage:
208/// ~~~{.cpp}
209/// ROOT::RDF::Experimental::RSample s("mySample", "outputTree1", "outputFile.root");
210/// ROOT::RDF::Experimental::RDatasetSpec spec;
211/// spec.AddSample(s);
212/// // Set the entries range to be processed: including entry 1 and excluding entry 10.
213/// spec.WithGlobalRange({1, 10});
214/// auto df = ROOT::RDataFrame(spec);
215/// ~~~
217{
218 fEntryRange = entryRange;
219 return *this;
220}
221
222} // namespace Experimental
223} // namespace RDF
224} // namespace ROOT
long long Long64_t
Definition RtypesCore.h:69
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
The dataset specification for RDataFrame.
const std::vector< std::string > GetFileNameGlobs() const
Returns the collection of the dataset's paths to files, or globs if specified in input.
std::vector< RSample > MoveOutSamples()
Returns a collection of instances of the RSample class.
REntryRange fEntryRange
Start (inclusive) and end (exclusive) entry for the dataset processing.
RDatasetSpec & WithGlobalFriends(const std::string &treeName, const std::string &fileNameGlob, const std::string &alias="")
Add friend tree to RDatasetSpec object.
const std::vector< std::string > GetTreeNames() const
Returns the collection of the dataset's tree names.
const ROOT::TreeUtils::RFriendInfo & GetFriendInfo() const
Returns the reference to the friend tree information.
Long64_t GetEntryRangeBegin() const
Returns the first entry as defined by the global range provided in the specification.
const std::vector< RMetaData > GetMetaData() const
Returns the collection of the dataset's metadata (RMetaData class objects).
RDatasetSpec & AddSample(RSample sample)
Add sample (RSample class object) to the RDatasetSpec object.
ROOT::TreeUtils::RFriendInfo fFriendInfo
List of friends.
RDatasetSpec & WithGlobalRange(const RDatasetSpec::REntryRange &entryRange={})
Create an RDatasetSpec object for a given range of entries.
std::vector< RSample > fSamples
List of samples.
const std::vector< std::string > GetSampleNames() const
Returns the collection of the dataset's sample names.
Long64_t GetEntryRangeEnd() const
Returns the last entry as defined by the global range provided in the specification.
Class representing a sample which is a grouping of trees and their fileglobs, and,...
Definition RSample.hxx:39
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Information about friend trees of a certain TTree or TChain object.
void AddFriend(const std::string &treeName, const std::string &fileNameGlob, const std::string &alias="", Long64_t nEntries=TTree::kMaxEntries, TVirtualIndex *indexInfo=nullptr)
Add information of a single friend.