19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
81 auto it = std::find(fNames.begin(), fNames.end(),
name);
82 if (it == fNames.end())
84 return fBranches[std::distance(fNames.begin(), it)];
87 void Insert(
const std::string &
name,
TBranch *address)
89 if (address ==
nullptr) {
90 throw std::logic_error(
"Trying to insert a null branch address.");
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error(
"Trying to insert a branch address that's already present.");
95 if (std::find(fNames.begin(), fNames.end(),
name) != fNames.end()) {
96 throw std::logic_error(
"Trying to insert a branch name that's already present.");
98 fNames.emplace_back(
name);
99 fBranches.emplace_back(address);
112 [](
TBranch *
b) { return b->GetAddress() == nullptr; });
121 std::string
msg =
"RDataFrame::Snapshot:";
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
130 msg.resize(
msg.size() - 2);
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
134 msg +=
" not part of the set of branches that are being written out.";
135 throw std::runtime_error(
msg);
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
158 template <
typename... Args>
159 void Exec(
unsigned int slot, Args &&... args)
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>
::value,
"");
170 std::string GetActionName() {
return "ForeachSlot"; }
190 return std::make_unique<RMergeableCount>(*
fResultCount);
195 std::string GetActionName() {
return "Count"; }
204template <
typename RNode_t>
206 std::shared_ptr<RCutFlowReport>
fReport;
219 void Exec(
unsigned int ) {}
227 std::string GetActionName() {
return "Report"; }
243 using Buf_t = std::vector<BufEl_t>;
245 std::vector<Buf_t> fBuffers;
248 unsigned int fNSlots;
249 unsigned int fBufSize;
263 void Exec(
unsigned int slot,
double v,
double w);
277 void Exec(
unsigned int slot,
const T &
vs,
const W &ws)
306 void Exec(
unsigned int slot,
const T
v,
const W &ws)
316 Hist_t &PartialUpdate(
unsigned int);
325 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
328 std::string GetActionName()
337 result->SetDirectory(
nullptr);
344template <
typename HIST = Hist_t>
346 std::vector<HIST *> fObjects;
359 throw std::runtime_error(
360 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
361 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
365 h->SetDirectory(
nullptr);
372 auto Merge(std::vector<H *> &
objs,
int )
382 template <
typename H>
383 auto Merge(std::vector<H *> &
objs,
double )
384 ->
decltype(
objs[0]->Merge(std::vector<HIST *>{}),
void())
390 template <
typename T>
393 static_assert(
sizeof(T) < 0,
394 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
398 template <
typename T>
423 return std::begin(val);
428 std::size_t GetSize(
const T &)
435 std::size_t GetSize(
const T &val)
437#if __cplusplus >= 201703L
438 return std::size(val);
444 template <std::size_t
ColIdx,
typename End_t,
typename...
Its>
450 auto nop = [](
auto &&...) {};
462 fObjects[0] =
h.get();
464 for (
unsigned int i = 1; i <
nSlots; ++i) {
465 fObjects[i] =
new HIST(*fObjects[0]);
473 template <
typename...
ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>
::value,
int> = 0>
476 fObjects[
slot]->Fill(
x...);
480 template <
typename...
Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>
::value,
int> = 0>
484 constexpr std::array<
bool,
sizeof...(Xs)>
isContainer{IsDataContainer<Xs>::value...};
489 static_assert(
colidx <
sizeof...(Xs),
"Error: index of collection-type argument not found.");
495 std::array<std::size_t,
sizeof...(xs)>
sizes = {{GetSize(
xs)...}};
497 for (std::size_t i = 0; i <
sizeof...(xs); ++i) {
499 throw std::runtime_error(
"Cannot fill histogram with values in containers of different sizes.");
506 template <
typename T = HIST>
509 static_assert(
sizeof(T) < 0,
510 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
511 "columns passed did not match the signature of the object's `Fill` method.");
518 if (fObjects.size() == 1)
524 for (
auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
528 HIST &PartialUpdate(
unsigned int slot) {
return *fObjects[
slot]; }
533 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
538 std::string GetActionName()
540 return std::string(fObjects[0]->
IsA()->GetName()) +
"\n" + std::string(fObjects[0]->GetName());
545 std::string GetActionName()
547 return "Fill custom object";
550 template <
typename H = HIST>
565 std::vector<::TGraph *> fGraphs;
573 fGraphs[0] =
g.get();
575 for (
unsigned int i = 1; i <
nSlots; ++i) {
576 fGraphs[i] =
new TGraph(*fGraphs[0]);
584 template <
typename X0,
typename X1,
585 std::enable_if_t<IsDataContainer<X0>::value && IsDataContainer<X1>::value,
int> = 0>
588 if (
x0s.size() !=
x1s.size()) {
589 throw std::runtime_error(
"Cannot fill Graph with values in containers of different sizes.");
601 template <
typename X0,
typename X1,
602 std::enable_if_t<!IsDataContainer<X0>::value && !IsDataContainer<X1>::value,
int> = 0>
614 throw std::runtime_error(
"Graph was applied to a mix of scalar values and collections. This is not supported.");
619 const auto nSlots = fGraphs.size();
624 l.Add(fGraphs[
slot]);
632 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
635 std::string GetActionName() {
return "Graph"; }
637 Result_t &PartialUpdate(
unsigned int slot) {
return *fGraphs[
slot]; }
664 for (
unsigned int i = 1; i <
nSlots; ++i) {
674 typename X,
typename Y,
typename EXL,
typename EXH,
typename EYL,
typename EYH,
675 std::enable_if_t<IsDataContainer<X>::value && IsDataContainer<Y>::value && IsDataContainer<EXL>::value &&
676 IsDataContainer<EXH>::value && IsDataContainer<EYL>::value && IsDataContainer<EYH>::value,
681 if ((
xs.size() !=
ys.size()) || (
xs.size() !=
exls.size()) || (
xs.size() !=
exhs.size()) ||
682 (
xs.size() !=
eyls.size()) || (
xs.size() !=
eyhs.size())) {
683 throw std::runtime_error(
"Cannot fill GraphAsymmErrors with values in containers of different sizes.");
686 auto xsIt = std::begin(
xs);
687 auto ysIt = std::begin(
ys);
692 while (
xsIt != std::end(
xs)) {
701 typename X,
typename Y,
typename EXL,
typename EXH,
typename EYL,
typename EYH,
702 std::enable_if_t<!IsDataContainer<X>::value && !IsDataContainer<Y>::value && !IsDataContainer<EXL>::value &&
703 !IsDataContainer<EXH>::value && !IsDataContainer<EYL>::value && !IsDataContainer<EYH>::value,
715 template <
typename X,
typename Y,
typename EXL,
typename EXH,
typename EYL,
typename EYH,
719 throw std::runtime_error(
720 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
741 std::string GetActionName() {
return "GraphAsymmErrors"; }
747 auto &
result = *
static_cast<std::shared_ptr<TGraphAsymmErrors> *
>(
newResult);
759template <
typename V,
typename COLL>
765template <
typename COLL>
772template <
typename RealT_t,
typename T,
typename COLL>
781 for (
unsigned int i = 1; i <
nSlots; ++i)
782 fColls.emplace_back(std::make_shared<COLL>());
796 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
809 std::string GetActionName() {
return "Take"; }
821template <
typename RealT_t,
typename T>
823 :
public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
831 for (
unsigned int i = 1; i <
nSlots; ++i) {
832 auto v = std::make_shared<std::vector<T>>();
854 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
860 std::vector<T> &PartialUpdate(
unsigned int slot) {
return *
fColls[
slot]; }
862 std::string GetActionName() {
return "Take"; }
866 auto &
result = *
static_cast<std::shared_ptr<std::vector<T>
> *>(
newResult);
874template <
typename RealT_t,
typename COLL>
876 :
public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
884 for (
unsigned int i = 1; i <
nSlots; ++i)
885 fColls.emplace_back(std::make_shared<COLL>());
899 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
901 for (
auto &
v : *
coll) {
907 std::string GetActionName() {
return "Take"; }
919template <
typename RealT_t>
921 :
public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
930 for (
unsigned int i = 1; i <
nSlots; ++i) {
931 auto v = std::make_shared<std::vector<RealT_t>>();
953 for (
unsigned int i = 1; i <
fColls.size(); ++i) {
959 std::string GetActionName() {
return "Take"; }
972template <
typename RealT_t,
typename T,
typename COLL>
974template <
typename RealT_t,
typename T>
976template <
typename RealT_t,
typename COLL>
978template <
typename RealT_t>
994template <
typename ResultType>
1021 *
fResultMin = std::numeric_limits<ResultType>::max();
1029 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1034 std::string GetActionName() {
return "Min"; }
1038 auto &
result = *
static_cast<std::shared_ptr<ResultType> *
>(
newResult);
1043template <
typename ResultType>
1070 *
fResultMax = std::numeric_limits<ResultType>::lowest();
1079 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1084 std::string GetActionName() {
return "Max"; }
1088 auto &
result = *
static_cast<std::shared_ptr<ResultType> *
>(
newResult);
1093template <
typename ResultType>
1102 template <
typename T = ResultType>
1108 template <
typename T = ResultType,
typename Dummy =
int>
1136 for (
auto &&
v :
vs) {
1162 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1167 std::string GetActionName() {
return "Sum"; }
1171 auto &
result = *
static_cast<std::shared_ptr<ResultType> *
>(
newResult);
1179 std::vector<ULong64_t> fCounts;
1180 std::vector<double>
fSums;
1194 for (
auto &&
v :
vs) {
1212 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1216 double &PartialUpdate(
unsigned int slot);
1218 std::string GetActionName() {
return "Mean"; }
1229 unsigned int fNSlots;
1232 std::vector<ULong64_t> fCounts;
1234 std::vector<double> fMeans;
1248 for (
auto &&
v :
vs) {
1260 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1262 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) /
static_cast<Double_t>(
counts);
1266 std::string GetActionName() {
return "StdDev"; }
1275template <
typename PrevNodeType>
1280 std::shared_ptr<PrevNodeType> fPrevNode;
1281 size_t fEntriesToProcess;
1284 DisplayHelper(
size_t nRows,
const std::shared_ptr<Display_t> &
d,
const std::shared_ptr<PrevNodeType> &prevNode)
1288 DisplayHelper(DisplayHelper &&) =
default;
1289 DisplayHelper(
const DisplayHelper &) =
delete;
1292 template <
typename...
Columns>
1295 if (fEntriesToProcess == 0)
1299 --fEntriesToProcess;
1301 if (fEntriesToProcess == 0) {
1306 fPrevNode->StopProcessing();
1314 std::string GetActionName() {
return "Display"; }
1317template <
typename T>
1323template <
typename T>
1329template <
typename T>
1392template <
typename T>
1410 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1411 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1412 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1450 const auto bname =
leaf->GetName();
1466 const auto btype =
leaf->GetTypeName();
1470 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1471 "column will not be written out.",
1489 std::string fFileName;
1490 std::string fDirName;
1491 std::string fTreeName;
1493 std::unique_ptr<TFile> fOutputFile;
1500 std::vector<TBranch *> fBranches;
1501 std::vector<void *> fBranchAddresses;
1503 std::vector<bool> fIsDefine;
1521 if (!fTreeName.empty() && !fOutputFile && fOptions.
fLazy)
1522 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
1544 template <std::size_t...
S>
1553 int expander[] = {(fBranches[
S] && fBranchAddresses[
S] != GetData(values)
1554 ? fBranches[
S]->SetAddress(GetData(values)),
1555 fBranchAddresses[
S] = GetData(values), 0 : 0, 0)...,
1560 template <std::size_t...
S>
1565 fBranches[S], fBranchAddresses[S], &values,
fOutputBranches, fIsDefine[S]),
1578 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
1581 if (!fDirName.empty()) {
1585 outputDir = fOutputFile->mkdir(fDirName.c_str(),
"",
true);
1587 outputDir = fOutputFile->mkdir(fDirName.c_str());
1591 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.
fSplitLevel,
outputDir);
1600 assert(fOutputFile !=
nullptr);
1606 fOutputFile->Close();
1609 std::string GetActionName() {
return "Snapshot"; }
1629 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
1638 unsigned int fNSlots;
1639 std::unique_ptr<ROOT::TBufferMerger> fMerger;
1640 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>>
fOutputFiles;
1643 std::string fFileName;
1644 std::string fDirName;
1645 std::string fTreeName;
1649 std::vector<TTree *> fInputTrees;
1651 std::vector<std::vector<TBranch *>> fBranches;
1653 std::vector<std::vector<void *>> fBranchAddresses;
1655 std::vector<bool> fIsDefine;
1677 Warning(
"Snapshot",
"A lazy Snapshot action was booked but never triggered.");
1688 if (!fDirName.empty()) {
1703 fInputTrees[
slot] =
r->GetTree();
1733 template <std::size_t...
S>
1742 int expander[] = {(fBranches[
slot][
S] && fBranchAddresses[
slot][
S] != GetData(values)
1743 ? fBranches[
slot][
S]->SetAddress(GetData(values)),
1744 fBranchAddresses[
slot][
S] = GetData(values), 0 : 0, 0)...,
1750 template <std::size_t...
S>
1768 throw std::runtime_error(
"Snapshot: could not create output file " + fFileName);
1769 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(
out_file));
1787 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1795 std::string GetActionName() {
return "Snapshot"; }
1815 const std::string
finalName = *
reinterpret_cast<const std::string *
>(
newName);
1821template <
typename Acc,
typename Merge,
typename R,
typename T,
typename U,
1824 :
public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1827 std::shared_ptr<U> fResult;
1848 template <
bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_,
int> = 0>
1854 template <
bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_,
int> = 0>
1863 bool MergeAll = std::is_same<void, MergeRet>::value>
1864 std::enable_if_t<MergeAll, void> Finalize()
1872 std::enable_if_t<MergeTwoByTwo, void> Finalize(...)
1875 *fResult = fMerge(*fResult,
acc);
1880 std::string GetActionName() {
return "Aggregate"; }
Handle_t Display_t
Display handle.
#define R(a, b, c, d, e, f, g, h, i)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
TClass * IsA() const override
TTime operator*(const TTime &t1, const TTime &t2)
Base class for action helpers, see RInterface::Book() for more information.
This class is the textual representation of the content of a columnar dataset.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
A TTree is a list of TBranches.
TClassRef is used to implement a permanent reference to a TClass object.
Collection abstract base class.
TDirectory::TContext keeps track and restore the current directory.
Describe directory structure in memory.
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
1-D histogram with a double per channel (see TH1 documentation)
TH1 is the base class of all histogram classes in ROOT.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Statistical variable, defined by its mean and variance (RMS).
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
RooCmdArg Columns(Int_t ncol)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
constexpr std::size_t FindIdxTrue(const T &arr)
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
static uint64_t sum(uint64_t i)