Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFActionHelpers.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include "ROOT/RDF/Utils.hxx" // CacheLineStep
13
14namespace ROOT {
15namespace Internal {
16namespace RDF {
17
18CountHelper::CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots)
19 : fResultCount(resultCount), fCounts(nSlots, 0)
20{
21}
22
23void CountHelper::Exec(unsigned int slot)
24{
25 fCounts[slot]++;
26}
27
28void CountHelper::Finalize()
29{
30 *fResultCount = 0;
31 for (auto &c : fCounts) {
32 *fResultCount += c;
33 }
34}
35
36ULong64_t &CountHelper::PartialUpdate(unsigned int slot)
37{
38 return fCounts[slot];
39}
40
41void BufferedFillHelper::UpdateMinMax(unsigned int slot, double v)
42{
43 auto &thisMin = fMin[slot * CacheLineStep<BufEl_t>()];
44 auto &thisMax = fMax[slot * CacheLineStep<BufEl_t>()];
45 thisMin = std::min(thisMin, v);
46 thisMax = std::max(thisMax, v);
47}
48
49BufferedFillHelper::BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots)
50 : fResultHist(h), fNSlots(nSlots), fBufSize(fgTotalBufSize / nSlots), fPartialHists(fNSlots),
51 fMin(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::max()),
52 fMax(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::lowest())
53{
54 fBuffers.reserve(fNSlots);
55 fWBuffers.reserve(fNSlots);
56 for (unsigned int i = 0; i < fNSlots; ++i) {
57 Buf_t v;
58 v.reserve(fBufSize);
59 fBuffers.emplace_back(v);
60 fWBuffers.emplace_back(v);
61 }
62}
63
64void BufferedFillHelper::Exec(unsigned int slot, double v)
65{
66 UpdateMinMax(slot, v);
67 fBuffers[slot].emplace_back(v);
68}
69
70void BufferedFillHelper::Exec(unsigned int slot, double v, double w)
71{
72 UpdateMinMax(slot, v);
73 fBuffers[slot].emplace_back(v);
74 fWBuffers[slot].emplace_back(w);
75}
76
77Hist_t &BufferedFillHelper::PartialUpdate(unsigned int slot)
78{
79 auto &partialHist = fPartialHists[slot];
80 // TODO it is inefficient to re-create the partial histogram everytime the callback is called
81 // ideally we could incrementally fill it with the latest entries in the buffers
82 partialHist = std::make_unique<Hist_t>(*fResultHist);
83 auto weights = fWBuffers[slot].empty() ? nullptr : fWBuffers[slot].data();
84 partialHist->FillN(fBuffers[slot].size(), fBuffers[slot].data(), weights);
85 return *partialHist;
86}
87
88void BufferedFillHelper::Finalize()
89{
90 for (unsigned int i = 0; i < fNSlots; ++i) {
91 if (!fWBuffers[i].empty() && fBuffers[i].size() != fWBuffers[i].size()) {
92 throw std::runtime_error("Cannot fill weighted histogram with values in containers of different sizes.");
93 }
94 }
95
96 BufEl_t globalMin = *std::min_element(fMin.begin(), fMin.end());
97 BufEl_t globalMax = *std::max_element(fMax.begin(), fMax.end());
98
99 if (fResultHist->CanExtendAllAxes() && globalMin != std::numeric_limits<BufEl_t>::max() &&
100 globalMax != std::numeric_limits<BufEl_t>::lowest()) {
101 fResultHist->SetBins(fResultHist->GetNbinsX(), globalMin, globalMax);
102 }
103
104 for (unsigned int i = 0; i < fNSlots; ++i) {
105 auto weights = fWBuffers[i].empty() ? nullptr : fWBuffers[i].data();
106 fResultHist->FillN(fBuffers[i].size(), fBuffers[i].data(), weights);
107 }
108}
109
110MeanHelper::MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
111 : fResultMean(meanVPtr), fCounts(nSlots, 0), fSums(nSlots, 0), fPartialMeans(nSlots), fCompensations(nSlots)
112{
113}
114
115void MeanHelper::Exec(unsigned int slot, double v)
116{
117 fCounts[slot]++;
118 // Kahan Sum:
119 double y = v - fCompensations[slot];
120 double t = fSums[slot] + y;
121 fCompensations[slot] = (t - fSums[slot]) - y;
122 fSums[slot] = t;
123}
124
125void MeanHelper::Finalize()
126{
127 double sumOfSums = 0;
128 // Kahan Sum:
129 double compensation(0);
130 double y(0);
131 double t(0);
132 for (auto &m : fSums) {
133 y = m - compensation;
134 t = sumOfSums + y;
135 compensation = (t - sumOfSums) - y;
136 sumOfSums = t;
137 }
138 ULong64_t sumOfCounts = 0;
139 for (auto &c : fCounts)
140 sumOfCounts += c;
141 *fResultMean = sumOfSums / (sumOfCounts > 0 ? sumOfCounts : 1);
142}
143
144double &MeanHelper::PartialUpdate(unsigned int slot)
145{
146 fPartialMeans[slot] = fSums[slot] / fCounts[slot];
147 return fPartialMeans[slot];
148}
149
150template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
151template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
152template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
153template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
154template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
155
156StdDevHelper::StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
157 : fNSlots(nSlots), fResultStdDev(meanVPtr), fCounts(nSlots, 0), fMeans(nSlots, 0), fDistancesfromMean(nSlots, 0)
158{
159}
160
161void StdDevHelper::Exec(unsigned int slot, double v)
162{
163 // Applies the Welford's algorithm to the stream of values received by the thread
164 auto count = ++fCounts[slot];
165 auto delta = v - fMeans[slot];
166 auto mean = fMeans[slot] + delta / count;
167 auto delta2 = v - mean;
168 auto distance = fDistancesfromMean[slot] + delta * delta2;
169
170 fCounts[slot] = count;
171 fMeans[slot] = mean;
172 fDistancesfromMean[slot] = distance;
173}
174
175void StdDevHelper::Finalize()
176{
177 // Evaluates and merges the partial result of each set of data to get the overall standard deviation.
178 double totalElements = 0;
179 for (auto c : fCounts) {
180 totalElements += c;
181 }
182 if (totalElements == 0 || totalElements == 1) {
183 // Std deviation is not defined for 1 element.
184 *fResultStdDev = 0;
185 return;
186 }
187
188 double overallMean = 0;
189 for (unsigned int i = 0; i < fNSlots; ++i) {
190 overallMean += fCounts[i] * fMeans[i];
191 }
192 overallMean = overallMean / totalElements;
193
194 double variance = 0;
195 for (unsigned int i = 0; i < fNSlots; ++i) {
196 if (fCounts[i] == 0) {
197 continue;
198 }
199 auto setVariance = fDistancesfromMean[i] / (fCounts[i]);
200 variance += (fCounts[i]) * (setVariance + std::pow((fMeans[i] - overallMean), 2));
201 }
202
203 variance = variance / (totalElements - 1);
204 *fResultStdDev = std::sqrt(variance);
205}
206
207template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
208template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
209template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
210template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
211template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
212
213// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
214#if __GNUC__ > 5
215template class TakeHelper<bool, bool, std::vector<bool>>;
216template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
217template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
218template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
219template class TakeHelper<int, int, std::vector<int>>;
220template class TakeHelper<long, long, std::vector<long>>;
221template class TakeHelper<long long, long long, std::vector<long long>>;
222template class TakeHelper<float, float, std::vector<float>>;
223template class TakeHelper<double, double, std::vector<double>>;
224#endif
225
226void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
227{
228 TString fileMode = opts.fMode;
229 fileMode.ToLower();
230 if (fileMode != "update")
231 return;
232
233 // output file opened in "update" mode: must check whether output TTree is already present in file
234 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
235 if (!outFile || outFile->IsZombie())
236 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
237
238 TObject *outTree = outFile->Get(treeName.c_str());
239 if (outTree == nullptr)
240 return;
241
242 // object called treeName is already present in the file
243 if (opts.fOverwriteIfExists) {
244 if (outTree->InheritsFrom("TTree")) {
245 static_cast<TTree *>(outTree)->Delete("all");
246 } else {
247 outFile->Delete(treeName.c_str());
248 }
249 } else {
250 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
251 "\". If you want to delete the original tree and write another, please set "
252 "RSnapshotOptions::fOverwriteIfExists to true.";
253 throw std::invalid_argument(msg);
254 }
255}
256
257} // end NS RDF
258} // end NS Internal
259} // end NS ROOT
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Definition RtypesCore.h:81
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4089
Mother of all ROOT objects.
Definition TObject.h:41
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition TObject.cxx:525
Basic string class.
Definition TString.h:139
void ToLower()
Change string to lower-case.
Definition TString.cxx:1182
A TTree represents a columnar dataset.
Definition TTree.h:79
void Delete(Option_t *option="") override
Delete this tree from memory or/and disk.
Definition TTree.cxx:3747
Double_t y[n]
Definition legend1.C:17
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
constexpr std::size_t CacheLineStep()
Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
Definition Utils.hxx:222
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
__device__ AFloat max(AFloat x, AFloat y)
Definition Kernels.cuh:207
A collection of options to steer the creation of the dataset on file.
std::string fMode
Mode of creation of output file.
bool fOverwriteIfExists
If fMode is "UPDATE", overwrite object in output file if it already exists.
TMarker m
Definition textangle.C:8