Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFilterWithMissingValues.hxx
Go to the documentation of this file.
1// Author: Vincenzo Eduardo Padulano CERN 09/2024
2
3/*************************************************************************
4 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RFilterWithMissingValues
12#define ROOT_RDF_RFilterWithMissingValues
13
17#include "ROOT/RDF/Utils.hxx"
21#include "ROOT/TypeTraits.hxx"
22#include "RtypesCore.h"
23
24#include <algorithm>
25#include <cassert>
26#include <memory>
27#include <string>
28#include <unordered_map>
29#include <utility> // std::index_sequence
30#include <vector>
31
32// fwd decls for RFilterWithMissingValues
34std::shared_ptr<GraphNode> CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr,
35 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap);
36
37std::shared_ptr<GraphNode> AddDefinesToGraph(std::shared_ptr<GraphNode> node,
38 const ROOT::Internal::RDF::RColumnRegister &colRegister,
39 const std::vector<std::string> &prevNodeDefines,
40 std::unordered_map<void *, std::shared_ptr<GraphNode>> &visitedMap);
41} // namespace ROOT::Internal::RDF::GraphDrawing
42
43namespace ROOT::Detail::RDF {
44
46class RJittedFilter;
47
48/**
49 * \brief implementation of FilterAvailable and FilterMissing operations
50 *
51 * The filter evaluates if the entry is missing a value for the input column.
52 * Depending on which function was called by the user, the entry with the
53 * missing value:
54 * - will be discarded in case the user called FilterAvailable
55 * - will be kept in case the user called FilterMissing
56 */
57template <typename PrevNodeRaw>
58class R__CLING_PTRCHECK(off) RFilterWithMissingValues final : public RFilterBase {
59
60 // If the PrevNode is a RJittedFilter, treat it as a more generic RFilterBase: when dealing with systematic
61 // variations we'll have a RJittedFilter node for the nominal case but other "universes" will use concrete filters,
62 // so we normalize the "previous node type" to the base type RFilterBase.
63 using PrevNode_t = std::conditional_t<std::is_same<PrevNodeRaw, RJittedFilter>::value, RFilterBase, PrevNodeRaw>;
64 const std::shared_ptr<PrevNode_t> fPrevNodePtr;
65
66 // One column reader per slot
67 std::vector<RColumnReaderBase *> fValues;
68
69 // Whether the entry should be kept in case of missing value for the input column
71
72public:
73 RFilterWithMissingValues(bool discardEntry, std::shared_ptr<PrevNode_t> pd,
74 const RDFInternal::RColumnRegister &colRegister, const ColumnNames_t &columns,
75 std::string_view filterName = "", const std::string &variationName = "nominal")
76 : RFilterBase(pd->GetLoopManagerUnchecked(), filterName, pd->GetLoopManagerUnchecked()->GetNSlots(), colRegister,
77 columns, pd->GetVariations(), variationName),
78 fPrevNodePtr(std::move(pd)),
79 fValues(fPrevNodePtr->GetLoopManagerUnchecked()->GetNSlots()),
80 fDiscardEntryWithMissingValue(discardEntry)
81 {
82 fLoopManager->Register(this);
83 // We suppress errors that TTreeReader prints regarding the missing branch
84 fLoopManager->GetSuppressErrorsForMissingBranches().push_back(fColumnNames[0]);
85 }
86
92 {
93 // must Deregister objects from the RLoopManager here, before the fPrevNodePtr data member is destroyed:
94 // otherwise if fPrevNodePtr is the RLoopManager, it will be destroyed before the calls to Deregister happen.
95 fLoopManager->Deregister(this);
96 ROOT::Internal::RDF::Erase(fColumnNames[0], fLoopManager->GetSuppressErrorsForMissingBranches());
97 }
98
99 bool CheckFilters(unsigned int slot, Long64_t entry) final
100 {
101 constexpr static auto cacheLineStepLong64_t = RDFInternal::CacheLineStep<Long64_t>();
102 constexpr static auto cacheLineStepint = RDFInternal::CacheLineStep<int>();
103 constexpr static auto cacheLineStepULong64_t = RDFInternal::CacheLineStep<ULong64_t>();
104
105 if (entry != fLastCheckedEntry[slot * cacheLineStepLong64_t]) {
106 if (!fPrevNodePtr->CheckFilters(slot, entry)) {
107 // a filter upstream returned false, cache the result
108 fLastResult[slot * cacheLineStepint] = false;
109 } else {
110 // evaluate this filter, cache the result
111 const bool valueIsMissing = fValues[slot]->template TryGet<void>(entry) == nullptr;
112 if (fDiscardEntryWithMissingValue) {
113 valueIsMissing ? ++fRejected[slot * cacheLineStepULong64_t] : ++fAccepted[slot * cacheLineStepULong64_t];
114 fLastResult[slot * cacheLineStepint] = !valueIsMissing;
115 } else {
116 valueIsMissing ? ++fAccepted[slot * cacheLineStepULong64_t] : ++fRejected[slot * cacheLineStepULong64_t];
117 fLastResult[slot * cacheLineStepint] = valueIsMissing;
118 }
119 }
120 fLastCheckedEntry[slot * cacheLineStepLong64_t] = entry;
121 }
122 return fLastResult[slot * cacheLineStepint];
123 }
124
126 {
127 // Try to check if there is an available reader from the column register first
128 // We do not check that the type of the reader matches the type of the input column of this node,
129 // because this node does not keep track of that anyway
130 if (auto *defineOrVariationReader = fColRegister.GetReaderUnchecked(slot, fColumnNames[0], fVariation))
131 return defineOrVariationReader;
132
133 // Check if we already inserted a reader for this column in the dataset column readers (RDataSource or Tree/TChain
134 // readers)
135 if (auto *datasetColReader = fLoopManager->GetDatasetColumnReader(slot, fColumnNames[0], typeid(void)))
136 return datasetColReader;
137
138 // Create a column reader that does not need type information. It is used only to check if an entry of the
139 // column can be read validly.
140 return fLoopManager->AddTreeColumnReader(
141 slot, fColumnNames[0], std::make_unique<ROOT::Internal::RDF::RTreeOpaqueColumnReader>(*r, fColumnNames[0]),
142 typeid(void));
143 }
144
145 void InitSlot(TTreeReader *r, unsigned int slot) final
146 {
147 fValues[slot] = GetOrCreateColumnReader(r, slot);
148 fLastCheckedEntry[slot * RDFInternal::CacheLineStep<Long64_t>()] = -1;
149 }
150
151 // recursive chain of `Report`s
152 void Report(ROOT::RDF::RCutFlowReport &rep) const final { PartialReport(rep); }
153
155 {
156 fPrevNodePtr->PartialReport(rep);
157 FillReport(rep);
158 }
159
160 void StopProcessing() final
161 {
162 ++fNStopsReceived;
163 if (fNStopsReceived == fNChildren)
164 fPrevNodePtr->StopProcessing();
165 }
166
167 void IncrChildrenCount() final
168 {
169 ++fNChildren;
170 // propagate "children activation" upstream. named filters do the propagation via `TriggerChildrenCount`.
171 if (fNChildren == 1 && fName.empty())
172 fPrevNodePtr->IncrChildrenCount();
173 }
174
176 {
177 assert(!fName.empty()); // this method is to only be called on named filters
178 fPrevNodePtr->IncrChildrenCount();
179 }
180
181 void AddFilterName(std::vector<std::string> &filters) final
182 {
183 fPrevNodePtr->AddFilterName(filters);
184 auto name = (HasName() ? fName : fDiscardEntryWithMissingValue ? "FilterAvailable" : "FilterMissing");
185 filters.push_back(name);
186 }
187
188 /// Clean-up operations to be performed at the end of a task.
189 void FinalizeSlot(unsigned int slot) final { fValues[slot] = nullptr; }
190
191 std::shared_ptr<RDFGraphDrawing::GraphNode>
192 GetGraph(std::unordered_map<void *, std::shared_ptr<RDFGraphDrawing::GraphNode>> &visitedMap) final
193 {
194 // Recursively call for the previous node.
195 auto prevNode = fPrevNodePtr->GetGraph(visitedMap);
196 const auto &prevColumns = prevNode->GetDefinedColumns();
197
198 auto thisNode = RDFGraphDrawing::CreateFilterNode(this, visitedMap);
199
200 /* If the returned node is not new, there is no need to perform any other operation.
201 * This is a likely scenario when building the entire graph in which branches share
202 * some nodes. */
203 if (!thisNode->IsNew()) {
204 return thisNode;
205 }
206
207 auto upmostNode = AddDefinesToGraph(thisNode, fColRegister, prevColumns, visitedMap);
208
209 // Keep track of the columns defined up to this point.
210 thisNode->AddDefinedColumns(fColRegister.GenerateColumnNames());
211
212 upmostNode->SetPrevNode(prevNode);
213 return thisNode;
214 }
215
216 /// Return a clone of this Filter that works with values in the variationName "universe".
217 std::shared_ptr<RNodeBase> GetVariedFilter(const std::string &variationName) final
218 {
219 // Only the nominal filter should be asked to produce varied filters
220 assert(fVariation == "nominal");
221 // nobody should ask for a varied filter for the nominal variation: they can just
222 // use the nominal filter!
223 assert(variationName != "nominal");
224 // nobody should ask for a varied filter for a variation on which this filter does not depend:
225 // they can just use the nominal filter.
226 assert(RDFInternal::IsStrInVec(variationName, fVariations));
227
228 auto it = fVariedFilters.find(variationName);
229 if (it != fVariedFilters.end())
230 return it->second;
231
232 auto prevNode = fPrevNodePtr;
233 if (static_cast<RNodeBase *>(fPrevNodePtr.get()) != static_cast<RNodeBase *>(fLoopManager) &&
234 RDFInternal::IsStrInVec(variationName, prevNode->GetVariations()))
235 prevNode = std::static_pointer_cast<PrevNode_t>(prevNode->GetVariedFilter(variationName));
236
237 // the varied filters get a copy of the callable object.
238 // TODO document this
239 auto variedFilter = std::unique_ptr<RFilterBase>(new RFilterWithMissingValues<PrevNode_t>(
240 fDiscardEntryWithMissingValue, std::move(prevNode), fColRegister, fColumnNames, fName, variationName));
241 auto e = fVariedFilters.insert({variationName, std::move(variedFilter)});
242 return e.first->second;
243 }
244};
245
246} // namespace ROOT::Detail::RDF
247
248#endif // ROOT_RDF_RFilterWithMissingValues
#define e(i)
Definition RSha256.hxx:103
long long Long64_t
Definition RtypesCore.h:69
const char * filters[]
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
char name[80]
Definition TGX11.cxx:110
implementation of FilterAvailable and FilterMissing operations
RFilterWithMissingValues & operator=(RFilterWithMissingValues &&)=delete
const std::shared_ptr< PrevNode_t > fPrevNodePtr
bool CheckFilters(unsigned int slot, Long64_t entry) final
void FinalizeSlot(unsigned int slot) final
Clean-up operations to be performed at the end of a task.
std::shared_ptr< RDFGraphDrawing::GraphNode > GetGraph(std::unordered_map< void *, std::shared_ptr< RDFGraphDrawing::GraphNode > > &visitedMap) final
std::conditional_t< std::is_same< PrevNodeRaw, RJittedFilter >::value, RFilterBase, PrevNodeRaw > PrevNode_t
ROOT::Detail::RDF::RColumnReaderBase * GetOrCreateColumnReader(TTreeReader *r, unsigned int slot)
std::shared_ptr< RNodeBase > GetVariedFilter(const std::string &variationName) final
Return a clone of this Filter that works with values in the variationName "universe".
RFilterWithMissingValues(const RFilterWithMissingValues &)=delete
void InitSlot(TTreeReader *r, unsigned int slot) final
void AddFilterName(std::vector< std::string > &filters) final
RFilterWithMissingValues & operator=(const RFilterWithMissingValues &)=delete
RFilterWithMissingValues(RFilterWithMissingValues &&)=delete
void PartialReport(ROOT::RDF::RCutFlowReport &rep) const final
void Report(ROOT::RDF::RCutFlowReport &rep) const final
RFilterWithMissingValues(bool discardEntry, std::shared_ptr< PrevNode_t > pd, const RDFInternal::RColumnRegister &colRegister, const ColumnNames_t &columns, std::string_view filterName="", const std::string &variationName="nominal")
Base class for non-leaf nodes of the computational graph.
Definition RNodeBase.hxx:43
A binder for user-defined columns, variations and aliases.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
std::shared_ptr< GraphNode > CreateFilterNode(const ROOT::Detail::RDF::RFilterBase *filterPtr, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
std::shared_ptr< GraphNode > AddDefinesToGraph(std::shared_ptr< GraphNode > node, const RColumnRegister &colRegister, const std::vector< std::string > &prevNodeDefines, std::unordered_map< void *, std::shared_ptr< GraphNode > > &visitedMap)
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:440
void Erase(const T &that, std::vector< T > &v)
Erase that element from vector v
Definition Utils.hxx:189