19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
81 auto it = std::find(fNames.begin(), fNames.end(),
name);
82 if (it == fNames.end())
84 return fBranches[std::distance(fNames.begin(), it)];
87 void Insert(
const std::string &
name,
TBranch *address)
89 if (address ==
nullptr) {
90 throw std::logic_error(
"Trying to insert a null branch address.");
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error(
"Trying to insert a branch address that's already present.");
95 if (std::find(fNames.begin(), fNames.end(),
name) != fNames.end()) {
96 throw std::logic_error(
"Trying to insert a branch name that's already present.");
98 fNames.emplace_back(
name);
99 fBranches.emplace_back(address);
112 [](
TBranch *
b) { return b->GetAddress() == nullptr; });
121 std::string
msg =
"RDataFrame::Snapshot:";
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
130 msg.resize(
msg.size() - 2);
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
134 msg +=
" not part of the set of branches that are being written out.";
135 throw std::runtime_error(
msg);
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
158 template <
typename... Args>
159 void Exec(
unsigned int slot, Args &&... args)
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>
::value,
"");
170 std::string GetActionName() {
return "ForeachSlot"; }
190 return std::make_unique<RMergeableCount>(*
fResultCount);
195 std::string GetActionName() {
return "Count"; }
204template <
typename RNode_t>
206 std::shared_ptr<RCutFlowReport>
fReport;
219 void Exec(
unsigned int ) {}
227 std::string GetActionName() {
return "Report"; }
243 using Buf_t = std::vector<BufEl_t>;
248 unsigned int fNSlots;
249 unsigned int fBufSize;
263 void Exec(
unsigned int slot,
double v,
double w);
277 void Exec(
unsigned int slot,
const T &
vs,
const W &ws)
306 void Exec(
unsigned int slot,
const T
v,
const W &ws)
316 Hist_t &PartialUpdate(
unsigned int);
325 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
328 std::string GetActionName()
337 result->SetDirectory(
nullptr);
344template <
typename HIST = Hist_t>
346 std::vector<HIST *> fObjects;
359 throw std::runtime_error(
360 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
361 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
365 h->SetDirectory(
nullptr);
372 auto Merge(std::vector<H *> &
objs,
int )
382 template <
typename H>
383 auto Merge(std::vector<H *> &
objs,
double )
384 ->
decltype(
objs[0]->Merge(std::vector<HIST *>{}),
void())
390 template <
typename T>
393 static_assert(
sizeof(T) < 0,
394 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
398 template <
typename T>
423 return std::begin(val);
428 std::size_t GetSize(
const T &)
435 std::size_t GetSize(
const T &val)
437#if __cplusplus >= 201703L
438 return std::size(val);
444 template <std::size_t
ColIdx,
typename End_t,
typename...
Its>
450 auto nop = [](
auto &&...) {};
462 fObjects[0] =
h.get();
464 for (
unsigned int i = 1; i <
nSlots; ++i) {
465 fObjects[i] =
new HIST(*fObjects[0]);
473 template <
typename...
ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>
::value,
int> = 0>
476 fObjects[
slot]->Fill(
x...);
480 template <
typename...
Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>
::value,
int> = 0>
484 constexpr std::array<
bool,
sizeof...(Xs)>
isContainer{IsDataContainer<Xs>::value...};
489 static_assert(
colidx <
sizeof...(Xs),
"Error: index of collection-type argument not found.");
495 std::array<std::size_t,
sizeof...(xs)>
sizes = {{GetSize(
xs)...}};
497 for (std::size_t i = 0; i <
sizeof...(xs); ++i) {
499 throw std::runtime_error(
"Cannot fill histogram with values in containers of different sizes.");
506 template <
typename T = HIST>
509 static_assert(
sizeof(T) < 0,
510 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
511 "columns passed did not match the signature of the object's `Fill` method.");
518 if (fObjects.size() == 1)
524 for (
auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
528 HIST &PartialUpdate(
unsigned int slot) {
return *fObjects[
slot]; }
533 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
538 std::string GetActionName()
540 return std::string(fObjects[0]->
IsA()->GetName()) +
"\n" + std::string(fObjects[0]->GetName());
545 std::string GetActionName()
547 return "Fill custom object";
550 template <
typename H = HIST>
565 std::vector<::TGraph *> fGraphs;
573 fGraphs[0] =
g.get();
575 for (
unsigned int i = 1; i <
nSlots; ++i) {
576 fGraphs[i] =
new TGraph(*fGraphs[0]);
584 template <
typename X0,
typename X1,
585 std::enable_if_t<IsDataContainer<X0>::value && IsDataContainer<X1>::value,
int> = 0>
588 if (
x0s.size() !=
x1s.size()) {
589 throw std::runtime_error(
"Cannot fill Graph with values in containers of different sizes.");
601 template <
typename X0,
typename X1,
602 std::enable_if_t<!IsDataContainer<X0>::value && !IsDataContainer<X1>::value,
int> = 0>
614 throw std::runtime_error(
"Graph was applied to a mix of scalar values and collections. This is not supported.");
619 const auto nSlots = fGraphs.size();
624 l.Add(fGraphs[
slot]);
632 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
635 std::string GetActionName() {
return "Graph"; }
637 Result_t &PartialUpdate(
unsigned int slot) {
return *fGraphs[
slot]; }
664 for (
unsigned int i = 1; i <
nSlots; ++i) {
674 typename X,
typename Y,
typename EXL,
typename EXH,
typename EYL,
typename EYH,
675 std::enable_if_t<IsDataContainer<X>::value && IsDataContainer<Y>::value && IsDataContainer<EXL>::value &&
676 IsDataContainer<EXH>::value && IsDataContainer<EYL>::value && IsDataContainer<EYH>::value,
681 if ((
xs.size() !=
ys.size()) || (
xs.size() !=
exls.size()) || (
xs.size() !=
exhs.size()) ||
682 (
xs.size() !=
eyls.size()) || (
xs.size() !=
eyhs.size())) {
683 throw std::runtime_error(
"Cannot fill GraphAsymmErrors with values in containers of different sizes.");
686 auto xsIt = std::begin(
xs);
687 auto ysIt = std::begin(
ys);
692 while (
xsIt != std::end(
xs)) {
701 typename X,
typename Y,
typename EXL,
typename EXH,
typename EYL,
typename EYH,
702 std::enable_if_t<!IsDataContainer<X>::value && !IsDataContainer<Y>::value && !IsDataContainer<EXL>::value &&
703 !IsDataContainer<EXH>::value && !IsDataContainer<EYL>::value && !IsDataContainer<EYH>::value,
715 template <
typename X,
typename Y,
typename EXL,
typename EXH,
typename EYL,
typename EYH,
719 throw std::runtime_error(
720 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
741 std::string GetActionName() {
return "GraphAsymmErrors"; }
747 auto &
result = *
static_cast<std::shared_ptr<TGraphAsymmErrors> *
>(
newResult);
759template <
typename V,
typename COLL>
765template <
typename COLL>
772template <
typename RealT_t,
typename T,
typename COLL>
781 for (
unsigned int i = 1; i <
nSlots; ++i)
782 fColls.emplace_back(std::make_shared<COLL>());
796 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
809 std::string GetActionName() {
return "Take"; }
821template <
typename RealT_t,
typename T>
823 :
public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
831 for (
unsigned int i = 1; i <
nSlots; ++i) {
832 auto v = std::make_shared<std::vector<T>>();
854 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
860 std::vector<T> &PartialUpdate(
unsigned int slot) {
return *
fColls[
slot]; }
862 std::string GetActionName() {
return "Take"; }
866 auto &
result = *
static_cast<std::shared_ptr<std::vector<T>
> *>(
newResult);
874template <
typename RealT_t,
typename COLL>
876 :
public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
884 for (
unsigned int i = 1; i <
nSlots; ++i)
885 fColls.emplace_back(std::make_shared<COLL>());
899 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
901 for (
auto &
v : *
coll) {
907 std::string GetActionName() {
return "Take"; }
919template <
typename RealT_t>
921 :
public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
930 for (
unsigned int i = 1; i <
nSlots; ++i) {
931 auto v = std::make_shared<std::vector<RealT_t>>();
953 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
959 std::string GetActionName() {
return "Take"; }
972template <
typename RealT_t,
typename T,
typename COLL>
974template <
typename RealT_t,
typename T>
976template <
typename RealT_t,
typename COLL>
978template <
typename RealT_t>
994template <
typename ResultType>
1021 *
fResultMin = std::numeric_limits<ResultType>::max();
1029 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1034 std::string GetActionName() {
return "Min"; }
1038 auto &
result = *
static_cast<std::shared_ptr<ResultType> *
>(
newResult);
1050template <
typename ResultType>
1077 *
fResultMax = std::numeric_limits<ResultType>::lowest();
1086 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1091 std::string GetActionName() {
return "Max"; }
1095 auto &
result = *
static_cast<std::shared_ptr<ResultType> *
>(
newResult);
1107template <
typename ResultType>
1116 template <
typename T = ResultType>
1122 template <
typename T = ResultType,
typename Dummy =
int>
1150 for (
auto &&
v :
vs) {
1176 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1181 std::string GetActionName() {
return "Sum"; }
1185 auto &
result = *
static_cast<std::shared_ptr<ResultType> *
>(
newResult);
1193 std::vector<ULong64_t> fCounts;
1194 std::vector<double>
fSums;
1208 for (
auto &&
v :
vs) {
1226 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1230 double &PartialUpdate(
unsigned int slot);
1232 std::string GetActionName() {
return "Mean"; }
1241extern template void MeanHelper::Exec(
unsigned int,
const std::vector<float> &);
1242extern template void MeanHelper::Exec(
unsigned int,
const std::vector<double> &);
1243extern template void MeanHelper::Exec(
unsigned int,
const std::vector<char> &);
1244extern template void MeanHelper::Exec(
unsigned int,
const std::vector<int> &);
1245extern template void MeanHelper::Exec(
unsigned int,
const std::vector<unsigned int> &);
1249 unsigned int fNSlots;
1252 std::vector<ULong64_t> fCounts;
1254 std::vector<double> fMeans;
1268 for (
auto &&
v :
vs) {
1280 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1282 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) /
static_cast<Double_t>(
counts);
1286 std::string GetActionName() {
return "StdDev"; }
1295extern template void StdDevHelper::Exec(
unsigned int,
const std::vector<float> &);
1296extern template void StdDevHelper::Exec(
unsigned int,
const std::vector<double> &);
1297extern template void StdDevHelper::Exec(
unsigned int,
const std::vector<char> &);
1298extern template void StdDevHelper::Exec(
unsigned int,
const std::vector<int> &);
1299extern template void StdDevHelper::Exec(
unsigned int,
const std::vector<unsigned int> &);
1301template <
typename PrevNodeType>
1306 std::shared_ptr<PrevNodeType> fPrevNode;
1307 size_t fEntriesToProcess;
1310 DisplayHelper(
size_t nRows,
const std::shared_ptr<Display_t> &
d,
const std::shared_ptr<PrevNodeType> &prevNode)
1314 DisplayHelper(DisplayHelper &&) =
default;
1315 DisplayHelper(
const DisplayHelper &) =
delete;
1318 template <
typename...
Columns>
1321 if (fEntriesToProcess == 0)
1325 --fEntriesToProcess;
1327 if (fEntriesToProcess == 0) {
1332 fPrevNode->StopProcessing();
1340 std::string GetActionName() {
return "Display"; }
1343template <
typename T>
1349template <
typename T>
1355template <
typename T>
1418template <
typename T>
1436 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1437 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1438 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1476 const auto bname =
leaf->GetName();
1492 const auto btype =
leaf->GetTypeName();
1496 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1497 "column will not be written out.",
1515 std::string fFileName;
1516 std::string fDirName;
1517 std::string fTreeName;
1519 std::unique_ptr<TFile> fOutputFile;
1526 std::vector<TBranch *> fBranches;
1527 std::vector<void *> fBranchAddresses;
1529 std::vector<bool> fIsDefine;
1547 if (!fTreeName.empty() && !fOutputFile && fOptions.
fLazy)
1548 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
1570 template <std::size_t...
S>
1579 int expander[] = {(fBranches[
S] && fBranchAddresses[
S] != GetData(values)
1580 ? fBranches[
S]->SetAddress(GetData(values)),
1581 fBranchAddresses[
S] = GetData(values), 0 : 0, 0)...,
1586 template <std::size_t...
S>
1591 fBranches[S], fBranchAddresses[S], &values,
fOutputBranches, fIsDefine[S]),
1604 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
1607 if (!fDirName.empty()) {
1611 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
1613 outputDir = fOutputFile->mkdir(fDirName.c_str());
1617 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.
fSplitLevel,
outputDir);
1626 assert(fOutputFile !=
nullptr);
1632 fOutputFile->Close();
1635 std::string GetActionName() {
return "Snapshot"; }
1655 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
1664 unsigned int fNSlots;
1665 std::unique_ptr<ROOT::TBufferMerger> fMerger;
1666 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>>
fOutputFiles;
1669 std::string fFileName;
1670 std::string fDirName;
1671 std::string fTreeName;
1675 std::vector<TTree *> fInputTrees;
1677 std::vector<std::vector<TBranch *>> fBranches;
1679 std::vector<std::vector<void *>> fBranchAddresses;
1681 std::vector<bool> fIsDefine;
1701 if (!fTreeName.empty() && fOptions.
fLazy &&
1703 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
1714 if (!fDirName.empty()) {
1729 fInputTrees[
slot] =
r->GetTree();
1759 template <std::size_t...
S>
1768 int expander[] = {(fBranches[
slot][
S] && fBranchAddresses[
slot][
S] != GetData(values)
1769 ? fBranches[
slot][
S]->SetAddress(GetData(values)),
1770 fBranchAddresses[
slot][
S] = GetData(values), 0 : 0, 0)...,
1776 template <std::size_t...
S>
1794 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
1795 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(
out_file));
1813 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1821 std::string GetActionName() {
return "Snapshot"; }
1841 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
1847template <
typename Acc,
typename Merge,
typename R,
typename T,
typename U,
1850 :
public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1853 std::shared_ptr<U> fResult;
1874 template <
bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_,
int> = 0>
1880 template <
bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_,
int> = 0>
1889 bool MergeAll = std::is_same<void, MergeRet>::value>
1890 std::enable_if_t<MergeAll, void> Finalize()
1898 std::enable_if_t<MergeTwoByTwo, void> Finalize(...)
1901 *fResult = fMerge(*fResult,
acc);
1906 std::string GetActionName() {
return "Aggregate"; }
Handle_t Display_t
Display handle.
#define R(a, b, c, d, e, f, g, h, i)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
#define R__CLING_PTRCHECK(ONOFF)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
TClass * IsA() const override
TTime operator*(const TTime &t1, const TTime &t2)
Base class for action helpers, see RInterface::Book() for more information.
This class is the textual representation of the content of a columnar dataset.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
Collection abstract base class.
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a double per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Statistical variable, defined by its mean and variance (RMS).
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
RooCmdArg Columns(Int_t ncol)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
constexpr std::size_t FindIdxTrue(const T &arr)
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
static uint64_t sum(uint64_t i)