Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFActionHelpers.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include "ROOT/RDF/Utils.hxx" // CacheLineStep
13#include "ROOT/RNTuple.hxx" // ValidateSnapshotRNTupleOutput
14
15namespace ROOT {
16namespace Internal {
17namespace RDF {
18
20{
21 *h = TStatistic();
22}
23// cannot safely re-initialize variations of the result, hence error out
25{
26 throw std::runtime_error(
27 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
28 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
29}
30
32{
33 h->SetDirectory(nullptr);
34}
36
37CountHelper::CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots)
38 : fResultCount(resultCount), fCounts(nSlots, 0)
39{
40}
41
42void CountHelper::Exec(unsigned int slot)
43{
44 fCounts[slot]++;
45}
46
47void CountHelper::Finalize()
48{
49 *fResultCount = 0;
50 for (auto &c : fCounts) {
51 *fResultCount += c;
52 }
53}
54
55ULong64_t &CountHelper::PartialUpdate(unsigned int slot)
56{
57 return fCounts[slot];
58}
59
60void BufferedFillHelper::UpdateMinMax(unsigned int slot, double v)
61{
62 auto &thisMin = fMin[slot * CacheLineStep<BufEl_t>()];
63 auto &thisMax = fMax[slot * CacheLineStep<BufEl_t>()];
64 thisMin = std::min(thisMin, v);
65 thisMax = std::max(thisMax, v);
66}
67
68BufferedFillHelper::BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots)
69 : fResultHist(h), fNSlots(nSlots), fBufSize(fgTotalBufSize / nSlots), fPartialHists(fNSlots),
70 fMin(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::max()),
71 fMax(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::lowest())
72{
73 fBuffers.reserve(fNSlots);
74 fWBuffers.reserve(fNSlots);
75 for (unsigned int i = 0; i < fNSlots; ++i) {
76 Buf_t v;
77 v.reserve(fBufSize);
78 fBuffers.emplace_back(v);
79 fWBuffers.emplace_back(v);
80 }
81}
82
83void BufferedFillHelper::Exec(unsigned int slot, double v)
84{
86 fBuffers[slot].emplace_back(v);
87}
88
89void BufferedFillHelper::Exec(unsigned int slot, double v, double w)
90{
92 fBuffers[slot].emplace_back(v);
93 fWBuffers[slot].emplace_back(w);
94}
95
96Hist_t &BufferedFillHelper::PartialUpdate(unsigned int slot)
97{
99 // TODO it is inefficient to re-create the partial histogram everytime the callback is called
100 // ideally we could incrementally fill it with the latest entries in the buffers
101 partialHist = std::make_unique<Hist_t>(*fResultHist);
102 auto weights = fWBuffers[slot].empty() ? nullptr : fWBuffers[slot].data();
103 partialHist->FillN(fBuffers[slot].size(), fBuffers[slot].data(), weights);
104 return *partialHist;
105}
106
107void BufferedFillHelper::Finalize()
108{
109 for (unsigned int i = 0; i < fNSlots; ++i) {
110 if (!fWBuffers[i].empty() && fBuffers[i].size() != fWBuffers[i].size()) {
111 throw std::runtime_error("Cannot fill weighted histogram with values in containers of different sizes.");
112 }
113 }
114
115 BufEl_t globalMin = *std::min_element(fMin.begin(), fMin.end());
116 BufEl_t globalMax = *std::max_element(fMax.begin(), fMax.end());
117
118 if (fResultHist->CanExtendAllAxes() && globalMin != std::numeric_limits<BufEl_t>::max() &&
119 globalMax != std::numeric_limits<BufEl_t>::lowest()) {
120 fResultHist->SetBins(fResultHist->GetNbinsX(), globalMin, globalMax);
121 }
122
123 for (unsigned int i = 0; i < fNSlots; ++i) {
124 auto weights = fWBuffers[i].empty() ? nullptr : fWBuffers[i].data();
125 fResultHist->FillN(fBuffers[i].size(), fBuffers[i].data(), weights);
126 }
127}
128
129MeanHelper::MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
131{
132}
133
134void MeanHelper::Exec(unsigned int slot, double v)
135{
136 fCounts[slot]++;
137 // Kahan Sum:
138 double y = v - fCompensations[slot];
139 double t = fSums[slot] + y;
140 fCompensations[slot] = (t - fSums[slot]) - y;
141 fSums[slot] = t;
142}
143
144void MeanHelper::Finalize()
145{
146 double sumOfSums = 0;
147 // Kahan Sum:
148 double compensation(0);
149 double y(0);
150 double t(0);
151 for (auto &m : fSums) {
152 y = m - compensation;
153 t = sumOfSums + y;
154 compensation = (t - sumOfSums) - y;
155 sumOfSums = t;
156 }
158 for (auto &c : fCounts)
159 sumOfCounts += c;
161}
162
163double &MeanHelper::PartialUpdate(unsigned int slot)
164{
165 fPartialMeans[slot] = fSums[slot] / fCounts[slot];
166 return fPartialMeans[slot];
167}
168
169StdDevHelper::StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
170 : fNSlots(nSlots), fResultStdDev(meanVPtr), fCounts(nSlots, 0), fMeans(nSlots, 0), fDistancesfromMean(nSlots, 0)
171{
172}
173
174void StdDevHelper::Exec(unsigned int slot, double v)
175{
176 // Applies the Welford's algorithm to the stream of values received by the thread
177 auto count = ++fCounts[slot];
178 auto delta = v - fMeans[slot];
179 auto mean = fMeans[slot] + delta / count;
180 auto delta2 = v - mean;
181 auto distance = fDistancesfromMean[slot] + delta * delta2;
182
183 fCounts[slot] = count;
184 fMeans[slot] = mean;
186}
187
188void StdDevHelper::Finalize()
189{
190 // Evaluates and merges the partial result of each set of data to get the overall standard deviation.
191 double totalElements = 0;
192 for (auto c : fCounts) {
193 totalElements += c;
194 }
195 if (totalElements == 0 || totalElements == 1) {
196 // Std deviation is not defined for 1 element.
197 *fResultStdDev = 0;
198 return;
199 }
200
201 double overallMean = 0;
202 for (unsigned int i = 0; i < fNSlots; ++i) {
203 overallMean += fCounts[i] * fMeans[i];
204 }
206
207 double variance = 0;
208 for (unsigned int i = 0; i < fNSlots; ++i) {
209 if (fCounts[i] == 0) {
210 continue;
211 }
212 auto setVariance = fDistancesfromMean[i] / (fCounts[i]);
213 variance += (fCounts[i]) * (setVariance + std::pow((fMeans[i] - overallMean), 2));
214 }
215
217 *fResultStdDev = std::sqrt(variance);
218}
219
220// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
221#if __GNUC__ > 5
231#endif
232
234 const std::string &fileName)
235{
236 TString fileMode = opts.fMode;
237 fileMode.ToLower();
238 if (fileMode != "update")
239 return;
240
241 // output file opened in "update" mode: must check whether output TTree is already present in file
242 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
243 if (!outFile || outFile->IsZombie())
244 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
245
246 TObject *outTree = outFile->Get(treeName.c_str());
247 if (outTree == nullptr)
248 return;
249
250 // object called treeName is already present in the file
251 if (opts.fOverwriteIfExists) {
252 if (outTree->InheritsFrom("TTree")) {
253 static_cast<TTree *>(outTree)->Delete("all");
254 } else {
255 outFile->Delete(treeName.c_str());
256 }
257 } else {
258 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
259 "\". If you want to delete the original tree and write another, please set "
260 "RSnapshotOptions::fOverwriteIfExists to true.";
261 throw std::invalid_argument(msg);
262 }
263}
264
266 const std::string &fileName)
267{
268 TString fileMode = opts.fMode;
269 fileMode.ToLower();
270 if (fileMode != "update")
271 return;
272
273 // output file opened in "update" mode: must check whether output RNTuple is already present in file
274 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
275 if (!outFile || outFile->IsZombie())
276 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
277
278 auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());
279
280 if (outNTuple) {
281 if (opts.fOverwriteIfExists) {
282 outFile->Delete((ntupleName + ";*").c_str());
283 return;
284 } else {
285 const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
286 "\". If you want to delete the original ntuple and write another, please set "
287 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
288 throw std::invalid_argument(msg);
289 }
290 }
291
292 // Also check if there is any object other than an RNTuple with the provided ntupleName.
293 TObject *outObj = outFile->Get(ntupleName.c_str());
294
295 if (!outObj)
296 return;
297
298 // An object called ntupleName is already present in the file.
299 if (opts.fOverwriteIfExists) {
300 if (auto tree = dynamic_cast<TTree *>(outObj)) {
301 tree->Delete("all");
302 } else {
303 outFile->Delete((ntupleName + ";*").c_str());
304 }
305 } else {
306 const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
307 "\". If you want to delete the original object and write a new RNTuple, please set "
308 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
309 throw std::invalid_argument(msg);
310 }
311}
312
313} // end NS RDF
314} // end NS Internal
315} // end NS ROOT
316
317namespace {
318void CreateCStyleArrayBranch(TTree *inputTree, TTree &outputTree, ROOT::Internal::RDF::RBranchSet &outputBranches,
319 const std::string &inputBranchName, const std::string &outputBranchName, int basketSize)
320{
321 TBranch *inputBranch = nullptr;
322 if (inputTree) {
323 inputBranch = inputTree->GetBranch(inputBranchName.c_str());
324 if (!inputBranch) // try harder
325 inputBranch = inputTree->FindBranch(inputBranchName.c_str());
326 }
327 if (!inputBranch)
328 return;
329 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
330 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
331 return;
332 // must construct the leaflist for the output branch and create the branch in the output tree
333 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
334 if (!leaf)
335 return;
336 const auto bname = leaf->GetName();
337 auto *sizeLeaf = leaf->GetLeafCount();
338 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
339
340 // We proceed only if branch is a fixed-or-variable-sized array
341 if (sizeLeaf || leaf->GetLenStatic() > 1) {
342 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
343 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
344 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
345 // branch to be successful. So we create the size leaf here.
347 // Use Original basket size for Existing Branches otherwise use Custom basket Size.
348 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
349 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
350 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
352 }
353
354 const auto btype = leaf->GetTypeName();
356 if (rootbtype == ' ') {
357 Warning("Snapshot",
358 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
359 "leaf is of type '%s'. This column will not be written out.",
360 bname, btype);
361 return;
362 }
363
364 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
365 // Use original basket size for existing branches and new basket size for new branches
366 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
367 auto *outputBranch =
368 outputTree.Branch(outputBranchName.c_str(), static_cast<void *>(nullptr), leaflist.c_str(), bufSize);
369 outputBranch->SetTitle(inputBranch->GetTitle());
371 }
372}
373} // namespace
374
375void ROOT::Internal::RDF::SetEmptyBranchesHelper(TTree *inputTree, TTree &outputTree,
376 ROOT::Internal::RDF::RBranchSet &outputBranches,
377 const std::string &inputBranchName,
378 const std::string &outputBranchName, const std::type_info &typeInfo,
379 int basketSize)
380{
381 const auto bufSize = (basketSize > 0) ? basketSize : 32000;
383 if (!classPtr) {
384 // Case of a leaflist of fundamental type, logic taken from
385 // TTree::BranchImpRef(const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize,
386 // Int_t splitlevel)
389 if (rootTypeChar == ' ') {
390 Warning(
391 "Snapshot",
392 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
393 "column will not be written out.",
394 outputBranchName.c_str());
395 return;
396 }
397 std::string leafList{outputBranchName + '/' + rootTypeChar};
398 auto *outputBranch =
399 outputTree.Branch(outputBranchName.c_str(), static_cast<void *>(nullptr), leafList.c_str(), bufSize);
401 return;
402 }
403
404 // Find if there is an input branch, check for cases where we need a leaflist (e.g. C-style arrays)
406
407 // General case
409 auto *outputBranch = outputTree.Branch(outputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
411 }
412}
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Definition RtypesCore.h:70
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:65
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3074
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4116
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:108
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Mother of all ROOT objects.
Definition TObject.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Basic string class.
Definition TString.h:139
A TTree represents a columnar dataset.
Definition TTree.h:84
Double_t y[n]
Definition legend1.C:17
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:269
void ResetIfPossible(TStatistic *h)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:129
constexpr std::size_t CacheLineStep()
Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
Definition Utils.hxx:232
void EnsureValidSnapshotRNTupleOutput(const RSnapshotOptions &opts, const std::string &ntupleName, const std::string &fileName)
void EnsureValidSnapshotTTreeOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
void UnsetDirectoryIfPossible(TH1 *h)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
A collection of options to steer the creation of the dataset on file.
TMarker m
Definition textangle.C:8