Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROOT::RDF::RInterface< Proxied, DataSource > Class Template Reference

template<typename Proxied, typename DataSource = void>
class ROOT::RDF::RInterface< Proxied, DataSource >

The public interface to the RDataFrame federation of classes.

Template Parameters
ProxiedOne of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
DataSourceThe type of the RDataSource which is providing the data to the data frame. There is no source by default.

The documentation of each method features a one liner illustrating how to use the method, for example showing how the majority of the template parameters are automatically deduced requiring no or very little effort by the user.

Definition at line 114 of file RInterface.hxx.

Public Member Functions

 RInterface (const RInterface &)=default
 Copy-ctor for RInterface.
 
template<typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
 RInterface (const std::shared_ptr< RLoopManager > &proxied)
 Build a RInterface from a RLoopManager.
 
 RInterface (RInterface &&)=default
 Move-ctor for RInterface.
 
template<typename AccFun , typename MergeFun , typename R = typename TTraits::CallableTraits<AccFun>::ret_type, typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay, typename U = TTraits::TakeFirstParameter_t<ArgTypes>, typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
RResultPtr< U > Aggregate (AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
 Execute a user-defined accumulation operation on the processed column values in each processing slot.
 
template<typename AccFun , typename MergeFun , typename R = typename TTraits::CallableTraits<AccFun>::ret_type, typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, typename U = TTraits::TakeFirstParameter_t<ArgTypes>, typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
RResultPtr< U > Aggregate (AccFun aggregator, MergeFun merger, std::string_view columnName="")
 Execute a user-defined accumulation operation on the processed column values in each processing slot.
 
RInterface< Proxied, DS_tAlias (std::string_view alias, std::string_view columnName)
 Allow to refer to a column with a different name.
 
template<typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper >
RResultPtr< typename std::decay_t< Helper >::Result_t > Book (Helper &&helper, const ColumnNames_t &columns={})
 Book execution of a custom action using a user-defined helper object.
 
template<typename... ColumnTypes>
RInterface< RLoopManagerCache (const ColumnNames_t &columnList)
 Save selected columns in memory.
 
RInterface< RLoopManagerCache (const ColumnNames_t &columnList)
 Save selected columns in memory.
 
RInterface< RLoopManagerCache (std::initializer_list< std::string > columnList)
 Save selected columns in memory.
 
RInterface< RLoopManagerCache (std::string_view columnNameRegexp="")
 Save selected columns in memory.
 
RResultPtr< ULong64_tCount ()
 Return the number of entries processed (lazy action).
 
template<typename T >
RInterface< Proxied, DS_tDefaultValueFor (std::string_view column, const T &defaultValue)
 In case the value in the given column is missing, provide a default value.
 
template<typename F , typename std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied, DS_tDefine (std::string_view name, F expression, const ColumnNames_t &columns={})
 Define a new column.
 
RInterface< Proxied, DS_tDefine (std::string_view name, std::string_view expression)
 Define a new column.
 
template<typename F , typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
RInterface< Proxied, DS_tDefinePerSample (std::string_view name, F expression)
 Define a new column that is updated when the input sample changes.
 
RInterface< Proxied, DS_tDefinePerSample (std::string_view name, std::string_view expression)
 Define a new column that is updated when the input sample changes.
 
template<typename F >
RInterface< Proxied, DS_tDefineSlot (std::string_view name, F expression, const ColumnNames_t &columns={})
 Define a new column with a value dependent on the processing slot.
 
template<typename F >
RInterface< Proxied, DS_tDefineSlotEntry (std::string_view name, F expression, const ColumnNames_t &columns={})
 Define a new column with a value dependent on the processing slot and the current entry.
 
template<typename... ColumnTypes>
RResultPtr< RDisplayDisplay (const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
 Provides a representation of the columns in the dataset.
 
RResultPtr< RDisplayDisplay (const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
 Provides a representation of the columns in the dataset.
 
RResultPtr< RDisplayDisplay (std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
 Provides a representation of the columns in the dataset.
 
RResultPtr< RDisplayDisplay (std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
 Provides a representation of the columns in the dataset.
 
template<typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T >
RResultPtr< std::decay_t< T > > Fill (T &&model, const ColumnNames_t &columnList)
 Return an object of type T on which T::Fill will be called once per event (lazy action).
 
template<typename F , std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied >, DS_tFilter (F f, const ColumnNames_t &columns={}, std::string_view name="")
 Append a filter to the call graph.
 
template<typename F >
RInterface< RDFDetail::RFilter< F, Proxied >, DS_tFilter (F f, const std::initializer_list< std::string > &columns)
 Append a filter to the call graph.
 
template<typename F , std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied >, DS_tFilter (F f, std::string_view name)
 Append a filter to the call graph.
 
RInterface< RDFDetail::RJittedFilter, DS_tFilter (std::string_view expression, std::string_view name="")
 Append a filter to the call graph.
 
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_tFilterAvailable (std::string_view column)
 Discard entries with missing values.
 
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_tFilterMissing (std::string_view column)
 Keep only the entries that have missing values.
 
template<typename F >
void Foreach (F f, const ColumnNames_t &columns={})
 Execute a user-defined function on each entry (instant action).
 
template<typename F >
void ForeachSlot (F f, const ColumnNames_t &columns={})
 Execute a user-defined function requiring a processing slot index on each entry (instant action).
 
std::vector< std::string > GetFilterNames ()
 Returns the names of the filters created.
 
template<typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
RResultPtr<::TGraphGraph (std::string_view x="", std::string_view y="")
 Fill and return a TGraph object (lazy action).
 
template<typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType, typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType, typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
RResultPtr<::TGraphAsymmErrorsGraphAsymmErrors (std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
 Fill and return a TGraphAsymmErrors object (lazy action).
 
template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH1DHisto1D (const TH1DModel &model, std::string_view vName, std::string_view wName)
 Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
 
template<typename V , typename W >
RResultPtr<::TH1DHisto1D (const TH1DModel &model={"", "", 128u, 0., 0.})
 Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
 
template<typename V = RDFDetail::RInferredType>
RResultPtr<::TH1DHisto1D (const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
 Fill and return a one-dimensional histogram with the values of a column (lazy action).
 
template<typename V = RDFDetail::RInferredType>
RResultPtr<::TH1DHisto1D (std::string_view vName)
 Fill and return a one-dimensional histogram with the values of a column (lazy action).
 
template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH1DHisto1D (std::string_view vName, std::string_view wName)
 Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
 
template<typename V1 , typename V2 , typename W >
RResultPtr<::TH2DHisto2D (const TH2DModel &model)
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH2DHisto2D (const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
 Fill and return a weighted two-dimensional histogram (lazy action).
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
RResultPtr<::TH2DHisto2D (const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
 Fill and return a two-dimensional histogram (lazy action).
 
template<typename V1 , typename V2 , typename V3 , typename W >
RResultPtr<::TH3DHisto3D (const TH3DModel &model)
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH3DHisto3D (const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
 Fill and return a three-dimensional histogram (lazy action).
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType>
RResultPtr<::TH3DHisto3D (const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
 Fill and return a three-dimensional histogram (lazy action).
 
template<typename FirstColumn , typename... OtherColumns>
RResultPtr<::THnDHistoND (const THnDModel &model, const ColumnNames_t &columnList)
 Fill and return an N-dimensional histogram (lazy action).
 
RResultPtr<::THnDHistoND (const THnDModel &model, const ColumnNames_t &columnList)
 Fill and return an N-dimensional histogram (lazy action).
 
template<typename T = RDFDetail::RInferredType>
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max (std::string_view columnName="")
 Return the maximum of processed column values (lazy action).
 
template<typename T = RDFDetail::RInferredType>
RResultPtr< doubleMean (std::string_view columnName="")
 Return the mean of processed column values (lazy action).
 
template<typename T = RDFDetail::RInferredType>
RResultPtr< RDFDetail::MinReturnType_t< T > > Min (std::string_view columnName="")
 Return the minimum of processed column values (lazy action).
 
 operator RNode () const
 Cast any RDataFrame node to a common type ROOT::RDF::RNode.
 
RInterfaceoperator= (const RInterface &)=default
 Copy-assignment operator for RInterface.
 
RInterfaceoperator= (RInterface &&)=default
 Move-assignment operator for RInterface.
 
template<typename V1 , typename V2 , typename W >
RResultPtr<::TProfileProfile1D (const TProfile1DModel &model)
 Fill and return a one-dimensional profile (lazy action).
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TProfileProfile1D (const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
 Fill and return a one-dimensional profile (lazy action).
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
RResultPtr<::TProfileProfile1D (const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
 Fill and return a one-dimensional profile (lazy action).
 
template<typename V1 , typename V2 , typename V3 , typename W >
RResultPtr<::TProfile2DProfile2D (const TProfile2DModel &model)
 Fill and return a two-dimensional profile (lazy action).
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TProfile2DProfile2D (const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
 Fill and return a two-dimensional profile (lazy action).
 
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType>
RResultPtr<::TProfile2DProfile2D (const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
 Fill and return a two-dimensional profile (lazy action).
 
RInterface< RDFDetail::RRange< Proxied >, DS_tRange (unsigned int begin, unsigned int end, unsigned int stride=1)
 Creates a node that filters entries based on range: [begin, end).
 
RInterface< RDFDetail::RRange< Proxied >, DS_tRange (unsigned int end)
 Creates a node that filters entries based on range.
 
template<typename F , std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied, DS_tRedefine (std::string_view name, F expression, const ColumnNames_t &columns={})
 Overwrite the value and/or type of an existing column.
 
RInterface< Proxied, DS_tRedefine (std::string_view name, std::string_view expression)
 Overwrite the value and/or type of an existing column.
 
template<typename F >
RInterface< Proxied, DS_tRedefineSlot (std::string_view name, F expression, const ColumnNames_t &columns={})
 Overwrite the value and/or type of an existing column.
 
template<typename F >
RInterface< Proxied, DS_tRedefineSlotEntry (std::string_view name, F expression, const ColumnNames_t &columns={})
 Overwrite the value and/or type of an existing column.
 
template<typename F , typename T = typename TTraits::CallableTraits<F>::ret_type>
RResultPtr< T > Reduce (F f, std::string_view columnName, const T &redIdentity)
 Execute a user-defined reduce operation on the values of a column.
 
template<typename F , typename T = typename TTraits::CallableTraits<F>::ret_type>
RResultPtr< T > Reduce (F f, std::string_view columnName="")
 Execute a user-defined reduce operation on the values of a column.
 
RResultPtr< RCutFlowReportReport ()
 Gather filtering statistics.
 
template<typename... ColumnTypes>
RResultPtr< RInterface< RLoopManager > > Snapshot (std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
 Save selected columns to disk, in a new TTree treename in file filename.
 
RResultPtr< RInterface< RLoopManager > > Snapshot (std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
 Save selected columns to disk, in a new TTree treename in file filename.
 
RResultPtr< RInterface< RLoopManager > > Snapshot (std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
 Save selected columns to disk, in a new TTree treename in file filename.
 
RResultPtr< RInterface< RLoopManager > > Snapshot (std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
 Save selected columns to disk, in a new TTree treename in file filename.
 
template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr< TStatisticStats (std::string_view value, std::string_view weight)
 Return a TStatistic object, filled once per event (lazy action).
 
template<typename V = RDFDetail::RInferredType>
RResultPtr< TStatisticStats (std::string_view value="")
 Return a TStatistic object, filled once per event (lazy action).
 
template<typename T = RDFDetail::RInferredType>
RResultPtr< doubleStdDev (std::string_view columnName="")
 Return the unbiased standard deviation of processed column values (lazy action).
 
template<typename T = RDFDetail::RInferredType>
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum (std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
 Return the sum of processed column values (lazy action).
 
template<typename T , typename COLL = std::vector<T>>
RResultPtr< COLL > Take (std::string_view column="")
 Return a collection of values of a column (lazy action, returns a std::vector by default).
 
template<typename F >
RInterface< Proxied, DS_tVary (const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
 Register systematic variations for multiple existing columns using custom variation tags.
 
template<typename F >
RInterface< Proxied, DS_tVary (const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
 Register systematic variations for multiple existing columns using auto-generated tags.
 
RInterface< Proxied, DS_tVary (const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
 Register systematic variations for multiple existing columns using custom variation tags.
 
RInterface< Proxied, DS_tVary (const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
 Register systematic variations for multiple existing columns using auto-generated variation tags.
 
template<typename F >
RInterface< Proxied, DS_tVary (std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
 Register systematic variations for multiple existing columns using custom variation tags.
 
template<typename F >
RInterface< Proxied, DS_tVary (std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
 Register systematic variations for for multiple existing columns using custom variation tags.
 
RInterface< Proxied, DS_tVary (std::initializer_list< std::string > colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
 Register systematic variations for multiple existing columns using auto-generated variation tags.
 
template<typename F >
RInterface< Proxied, DS_tVary (std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
 Register systematic variations for a single existing column using custom variation tags.
 
template<typename F >
RInterface< Proxied, DS_tVary (std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
 Register systematic variations for a single existing column using auto-generated variation tags.
 
RInterface< Proxied, DS_tVary (std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
 Register systematic variations for a single existing column using custom variation tags.
 
RInterface< Proxied, DS_tVary (std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
 Register systematic variations for a single existing column using auto-generated variation tags.
 
- Public Member Functions inherited from ROOT::RDF::RInterfaceBase
 RInterfaceBase (RDFDetail::RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
 
 RInterfaceBase (std::shared_ptr< RDFDetail::RLoopManager > lm)
 
RDFDescription Describe ()
 Return information about the dataframe.
 
ColumnNames_t GetColumnNames ()
 Returns the names of the available columns.
 
std::string GetColumnType (std::string_view column)
 Return the type of a given column as a string.
 
ColumnNames_t GetDefinedColumnNames ()
 Returns the names of the defined columns.
 
unsigned int GetNFiles ()
 
unsigned int GetNRuns () const
 Gets the number of event loops run.
 
unsigned int GetNSlots () const
 Gets the number of data processing slots.
 
RVariationsDescription GetVariations () const
 Return a descriptor for the systematic variations registered in this branch of the computation graph.
 
bool HasColumn (std::string_view columnName)
 Checks if a column is present in the dataset.
 

Protected Member Functions

 RInterface (const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
 
const std::shared_ptr< Proxied > & GetProxiedPtr () const
 
- Protected Member Functions inherited from ROOT::RDF::RInterfaceBase
void AddDefaultColumns ()
 
template<typename... ColumnTypes>
void CheckAndFillDSColumns (ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
 
void CheckIMTDisabled (std::string_view callerName)
 
template<typename ActionTag , typename... ColTypes, typename ActionResultType , typename RDFNode , typename HelperArgType = ActionResultType, std::enable_if_t< RDFInternal::RNeedJitting< ColTypes... >::value, int > = 0>
RResultPtr< ActionResultType > CreateAction (const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1, const bool vector2RVec=true)
 Create RAction object, return RResultPtr for the action Overload for the case in which one or more column types were not specified (RTTI + jitting).
 
template<typename ActionTag , typename... ColTypes, typename ActionResultType , typename RDFNode , typename HelperArgType = ActionResultType, std::enable_if_t<!RDFInternal::RNeedJitting< ColTypes... >::value, int > = 0>
RResultPtr< ActionResultType > CreateAction (const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
 Create RAction object, return RResultPtr for the action Overload for the case in which all column types were specified (no jitting).
 
std::string DescribeDataset () const
 
ColumnNames_t GetColumnTypeNamesList (const ColumnNames_t &columnList)
 
RDFDetail::RLoopManagerGetLoopManager () const
 
ColumnNames_t GetValidatedColumnNames (const unsigned int nColumns, const ColumnNames_t &columns)
 
template<typename RetType >
void SanityChecksForVary (const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
 

Private Types

using DS_t = DataSource
 
using RFilterBase = RDFDetail::RFilterBase
 
using RLoopManager = RDFDetail::RLoopManager
 
using RRangeBase = RDFDetail::RRangeBase
 

Private Member Functions

template<typename... ColTypes, std::size_t... S>
RInterface< RLoopManagerCacheImpl (const ColumnNames_t &columnList, std::index_sequence< S... >)
 Implementation of cache.
 
template<typename Helper , typename ActionResultType , typename... Others>
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible (const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
 
template<typename Helper , typename ActionResultType >
auto CallCreateActionWithoutColsIfPossible (const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
 
template<typename F , typename DefineType , typename RetType = typename TTraits::CallableTraits<F>::ret_type>
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl (std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
 
template<typename F , typename DefineType , typename RetType = typename TTraits::CallableTraits<F>::ret_type, bool IsFStringConv = std::is_convertible<F, std::string>::value, bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl (std::string_view, F, const ColumnNames_t &, const std::string &)
 
RInterface< Proxied, DS_tJittedVaryImpl (const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName, bool isSingleColumn)
 
template<typename... ColumnTypes>
RResultPtr< RInterface< RLoopManager > > SnapshotImpl (std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
 
template<bool IsSingleColumn, typename F >
RInterface< Proxied, DS_tVaryImpl (const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
 

Private Attributes

std::shared_ptr< Proxied > fProxiedPtr
 Smart pointer to the graph node encapsulated by this RInterface.
 

Friends

template<typename T , typename W >
class RInterface
 
void RDFInternal::ChangeBeginAndEndEntries (const RNode &node, Long64_t start, Long64_t end)
 
void RDFInternal::ChangeEmptyEntryRange (const RNode &node, std::pair< ULong64_t, ULong64_t > &&newRange)
 
void RDFInternal::ChangeSpec (const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec)
 
class RDFInternal::GraphDrawing::GraphCreatorHelper
 
void RDFInternal::TriggerRun (RNode node)
 
std::string ROOT::Internal::RDF::GetDataSourceLabel (const RNode &node)
 

Additional Inherited Members

- Protected Attributes inherited from ROOT::RDF::RInterfaceBase
RDFInternal::RColumnRegister fColRegister
 Contains the columns defined up to this node.
 
RDataSourcefDataSource = nullptr
 Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
 
std::shared_ptr< ROOT::Detail::RDF::RLoopManagerfLoopManager
 < The RLoopManager at the root of this computation graph. Never null.
 

#include <ROOT/RDF/RInterface.hxx>

Inheritance diagram for ROOT::RDF::RInterface< Proxied, DataSource >:
[legend]

Member Typedef Documentation

◆ DS_t

template<typename Proxied , typename DataSource = void>
using ROOT::RDF::RInterface< Proxied, DataSource >::DS_t = DataSource
private

Definition at line 115 of file RInterface.hxx.

◆ RFilterBase

template<typename Proxied , typename DataSource = void>
using ROOT::RDF::RInterface< Proxied, DataSource >::RFilterBase = RDFDetail::RFilterBase
private

Definition at line 116 of file RInterface.hxx.

◆ RLoopManager

template<typename Proxied , typename DataSource = void>
using ROOT::RDF::RInterface< Proxied, DataSource >::RLoopManager = RDFDetail::RLoopManager
private

Definition at line 118 of file RInterface.hxx.

◆ RRangeBase

template<typename Proxied , typename DataSource = void>
using ROOT::RDF::RInterface< Proxied, DataSource >::RRangeBase = RDFDetail::RRangeBase
private

Definition at line 117 of file RInterface.hxx.

Constructor & Destructor Documentation

◆ RInterface() [1/4]

template<typename Proxied , typename DataSource = void>
ROOT::RDF::RInterface< Proxied, DataSource >::RInterface ( const RInterface< Proxied, DataSource > &  )
default

Copy-ctor for RInterface.

◆ RInterface() [2/4]

template<typename Proxied , typename DataSource = void>
ROOT::RDF::RInterface< Proxied, DataSource >::RInterface ( RInterface< Proxied, DataSource > &&  )
default

Move-ctor for RInterface.

◆ RInterface() [3/4]

template<typename Proxied , typename DataSource = void>
template<typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
ROOT::RDF::RInterface< Proxied, DataSource >::RInterface ( const std::shared_ptr< RLoopManager > &  proxied)
inline

Build a RInterface from a RLoopManager.

This constructor is only available for RInterface<RLoopManager>.

Definition at line 153 of file RInterface.hxx.

◆ RInterface() [4/4]

template<typename Proxied , typename DataSource = void>
ROOT::RDF::RInterface< Proxied, DataSource >::RInterface ( const std::shared_ptr< Proxied > &  proxied,
RLoopManager lm,
const RDFInternal::RColumnRegister colRegister 
)
inlineprotected

Definition at line 3314 of file RInterface.hxx.

Member Function Documentation

◆ Aggregate() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename AccFun , typename MergeFun , typename R = typename TTraits::CallableTraits<AccFun>::ret_type, typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay, typename U = TTraits::TakeFirstParameter_t<ArgTypes>, typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
RResultPtr< U > ROOT::RDF::RInterface< Proxied, DataSource >::Aggregate ( AccFun  aggregator,
MergeFun  merger,
std::string_view  columnName,
const U &  aggIdentity 
)
inline

Execute a user-defined accumulation operation on the processed column values in each processing slot.

Template Parameters
FThe type of the aggregator callable. Automatically deduced.
UThe type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
TThe type of the column to apply the reduction to. Automatically deduced.
Parameters
[in]aggregatorA callable with signature U(U,T) or void(U&,T), where T is the type of the column, U is the type of the aggregator variable
[in]mergerA callable with signature U(U,U) or void(std::vector<U>&) used to merge the results of the accumulations of each thread
[in]columnNameThe column to be aggregated. If omitted, the first default column is used instead.
[in]aggIdentityThe aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
Returns
the result of the aggregation wrapped in a RResultPtr.

An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is initialized to aggIdentity or default-constructed if aggIdentity is omitted. This action calls the aggregator callable for each processed entry, passing in the aggregator variable and the value of the column columnName. If the signature is U(U,T) the aggregator variable is then copy-assigned the result of the execution of the callable. Otherwise the signature of aggregator must be void(U&,T).

The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions. If its signature is U(U,U) the aggregator variables of each thread are merged two by two. If its signature is void(std::vector<U>& a) it is assumed that it merges all aggregators in a[0].

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

auto aggregator = [](double acc, double x) { return acc * x; };
// If multithread is enabled, the aggregator function will be called by more threads
// and will produce a vector of partial accumulators.
// The merger function performs the final aggregation of these partial results.
auto merger = [](std::vector<double> &accumulators) {
for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
accumulators[0] *= accumulators[i];
}
};
// The accumulator is initialized at this value by every thread.
double initValue = 1.;
// Multiplies all elements of the column "x"
auto result = d.Aggregate(aggregator, merger, "x", initValue);
#define d(i)
Definition RSha256.hxx:102
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
Double_t x[n]
Definition legend1.C:17
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539

Definition at line 2881 of file RInterface.hxx.

◆ Aggregate() [2/2]

template<typename Proxied , typename DataSource = void>
template<typename AccFun , typename MergeFun , typename R = typename TTraits::CallableTraits<AccFun>::ret_type, typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, typename U = TTraits::TakeFirstParameter_t<ArgTypes>, typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
RResultPtr< U > ROOT::RDF::RInterface< Proxied, DataSource >::Aggregate ( AccFun  aggregator,
MergeFun  merger,
std::string_view  columnName = "" 
)
inline

Execute a user-defined accumulation operation on the processed column values in each processing slot.

Template Parameters
FThe type of the aggregator callable. Automatically deduced.
UThe type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
TThe type of the column to apply the reduction to. Automatically deduced.
Parameters
[in]aggregatorA callable with signature U(U,T) or void(U,T), where T is the type of the column, U is the type of the aggregator variable
[in]mergerA callable with signature U(U,U) or void(std::vector<U>&) used to merge the results of the accumulations of each thread
[in]columnNameThe column to be aggregated. If omitted, the first default column is used instead.
Returns
the result of the aggregation wrapped in a RResultPtr.

See previous Aggregate overload for more information.

Definition at line 2915 of file RInterface.hxx.

◆ Alias()

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Alias ( std::string_view  alias,
std::string_view  columnName 
)
inline

Allow to refer to a column with a different name.

Parameters
[in]aliasname of the column alias
[in]columnNameof the column to be aliased
Returns
the first node of the computation graph for which the alias is available.

Aliasing an alias is supported.

Example usage:

auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");

Definition at line 1216 of file RInterface.hxx.

◆ Book()

template<typename Proxied , typename DataSource = void>
template<typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper >
RResultPtr< typename std::decay_t< Helper >::Result_t > ROOT::RDF::RInterface< Proxied, DataSource >::Book ( Helper &&  helper,
const ColumnNames_t columns = {} 
)
inline

Book execution of a custom action using a user-defined helper object.

Template Parameters
FirstColumnThe type of the first column used by this action. Inferred together with OtherColumns if not present.
OtherColumnsA list of the types of the other columns used by this action
HelperThe type of the user-defined helper. See below for the required interface it should expose.
Parameters
[in]helperThe Action Helper to be scheduled.
[in]columnsThe names of the columns on which the helper acts.
Returns
the result of the helper wrapped in a RResultPtr.

This method books a custom action for execution. The behavior of the action is completely dependent on the Helper object provided by the caller. The required interface for the helper is described below (more methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):

Mandatory interface

  • Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
  • Helper::Result_t: public alias for the type of the result of this action helper. Result_t must be default-constructible.
  • Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
  • std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called before Initialize(), because the RResultPtr is constructed before the event loop is started.
  • void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper, or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
  • void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
  • void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value: this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of the requested columns for the particular entry being processed.
  • void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
  • std::string GetActionName(): it returns a string identifier for this type of action that RDataFrame will use in diagnostics, SaveGraph(), etc.

Optional methods

If these methods are implemented they enable extra functionality as per the description below.

  • Result_t &PartialUpdate(unsigned int slot): if present, it must return the value of the partial result of this action for the given 'slot'. Different threads might call this method concurrently, but will do so with different 'slot' numbers. RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
  • ROOT::RDF::SampleCallback_t GetSampleCallback(): if present, it must return a callable with the appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing of every sample, as in DefinePerSample().
  • Helper MakeNew(void *newResult): if implemented, it enables varying the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a std::shared_ptr<Result_t> * (a pointer to shared pointer) and should be used as the action's output result.

In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Examples

See this tutorial for an example implementation of an action helper.

It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.

Definition at line 2986 of file RInterface.hxx.

◆ Cache() [1/4]

template<typename Proxied , typename DataSource = void>
template<typename... ColumnTypes>
RInterface< RLoopManager > ROOT::RDF::RInterface< Proxied, DataSource >::Cache ( const ColumnNames_t columnList)
inline

Save selected columns in memory.

Template Parameters
ColumnTypesvariadic list of branch/column types.
Parameters
[in]columnListcolumns to be cached in memory.
Returns
a RDataFrame that wraps the cached dataset.

This action returns a new RDataFrame object, completely detached from the originating RDataFrame. The new dataframe only contains the cached columns and stores their content in memory for fast, zero-copy subsequent access.

Use Cache if you know you will only need a subset of the (Filtered) data that fits in memory and that will be accessed many times.

Note
Cache will refuse to process columns with names of the form #columnname. These are special columns made available by some data sources (e.g. RNTupleDS) that represent the size of column columnname, and are not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an Alias(): df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"}).

Example usage:

Types and columns specified:

auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});

Types inferred and columns specified (this invocation relies on jitting):

auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});

Types inferred and columns selected with a regexp (this invocation relies on jitting):

auto cache_all_cols_df = df.Cache(myRegexp);

Definition at line 1461 of file RInterface.hxx.

◆ Cache() [2/4]

template<typename Proxied , typename DataSource = void>
RInterface< RLoopManager > ROOT::RDF::RInterface< Proxied, DataSource >::Cache ( const ColumnNames_t columnList)
inline

Save selected columns in memory.

Parameters
[in]columnListcolumns to be cached in memory
Returns
a RDataFrame that wraps the cached dataset.

See the previous overloads for more information.

Definition at line 1473 of file RInterface.hxx.

◆ Cache() [3/4]

template<typename Proxied , typename DataSource = void>
RInterface< RLoopManager > ROOT::RDF::RInterface< Proxied, DataSource >::Cache ( std::initializer_list< std::string >  columnList)
inline

Save selected columns in memory.

Parameters
[in]columnListcolumns to be cached in memory.
Returns
a RDataFrame that wraps the cached dataset.

See the previous overloads for more information.

Definition at line 1548 of file RInterface.hxx.

◆ Cache() [4/4]

template<typename Proxied , typename DataSource = void>
RInterface< RLoopManager > ROOT::RDF::RInterface< Proxied, DataSource >::Cache ( std::string_view  columnNameRegexp = "")
inline

Save selected columns in memory.

Parameters
[in]columnNameRegexpThe regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
Returns
a RDataFrame that wraps the cached dataset.

The existing columns are matched against the regular expression. If the string provided is empty, all columns are selected. See the previous overloads for more information.

Definition at line 1522 of file RInterface.hxx.

◆ CacheImpl()

template<typename Proxied , typename DataSource = void>
template<typename... ColTypes, std::size_t... S>
RInterface< RLoopManager > ROOT::RDF::RInterface< Proxied, DataSource >::CacheImpl ( const ColumnNames_t columnList,
std::index_sequence< S... >   
)
inlineprivate

Implementation of cache.

Definition at line 3202 of file RInterface.hxx.

◆ CallCreateActionWithoutColsIfPossible() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename Helper , typename ActionResultType , typename... Others>
RResultPtr< ActionResultType > ROOT::RDF::RInterface< Proxied, DataSource >::CallCreateActionWithoutColsIfPossible ( const std::shared_ptr< ActionResultType > &  ,
const std::shared_ptr< Helper > &  ,
Others...   
)
inlineprivate

Definition at line 3303 of file RInterface.hxx.

◆ CallCreateActionWithoutColsIfPossible() [2/2]

template<typename Proxied , typename DataSource = void>
template<typename Helper , typename ActionResultType >
auto ROOT::RDF::RInterface< Proxied, DataSource >::CallCreateActionWithoutColsIfPossible ( const std::shared_ptr< ActionResultType > &  resPtr,
const std::shared_ptr< Helper > &  hPtr,
TTraits::TypeList< RDFDetail::RInferredType  
) -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
inlineprivate

Definition at line 3293 of file RInterface.hxx.

◆ Count()

template<typename Proxied , typename DataSource = void>
RResultPtr< ULong64_t > ROOT::RDF::RInterface< Proxied, DataSource >::Count ( )
inline

Return the number of entries processed (lazy action).

Returns
the number of entries wrapped in a RResultPtr.

Useful e.g. for counting the number of entries passing a certain filter (see also Report). This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

auto nEntriesAfterCuts = myFilteredDf.Count();

Definition at line 1731 of file RInterface.hxx.

◆ DefaultValueFor()

template<typename Proxied , typename DataSource = void>
template<typename T >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::DefaultValueFor ( std::string_view  column,
const T &  defaultValue 
)
inline

In case the value in the given column is missing, provide a default value.

Template Parameters
TThe type of the column
Parameters
[in]columnColumn name where missing values should be replaced by the given default value
[in]defaultValueValue to provide instead of a missing value
Returns
The node of the graph that will provide a default value

This operation is useful in case an entry of the dataset is incomplete, i.e. if one or more of the columns do not have valid values. It does not modify the values of the column, but in case any entry is missing, it will provide the default value to downstream nodes instead.

Use cases include:

  • When processing multiple files, one or more of them is missing a column
  • In horizontal joining with entry matching, a certain dataset has no match for the current entry.

Example usage:

// Assume a dataset with columns [idx, x] matching another dataset with
// columns [idx, y]. For idx == 42, the right-hand dataset has no match
ROOT::RDataFrame df{dataset};
auto df_default = df.DefaultValueFor("y", 33)
.Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
auto colz = df_default.Take<int>("z");
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Double_t y[n]
Definition legend1.C:17
df = ROOT.RDataFrame(dataset)
df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
colz = df_default.Take[int]("z")

Definition at line 674 of file RInterface.hxx.

◆ Define() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename F , typename std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Define ( std::string_view  name,
F  expression,
const ColumnNames_t columns = {} 
)
inline

Define a new column.

Parameters
[in]nameThe name of the defined column.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function.
Returns
the first node of the computation graph for which the new quantity is defined.

Define a column that will be visible from all subsequent nodes of the functional chain. The expression is only evaluated for entries that pass all the preceding filters. A new variable is created called name, accessible as if it was contained in the dataset from subsequent transformations/actions.

Use cases include:

  • caching the results of complex calculations for easy and efficient multiple access
  • extraction of quantities of interest from complex objects

An exception is thrown if the name of the new column is already in use in this branch of the computation graph.

Example usage:

// assuming a function with signature:
double myComplexCalculation(const RVec<float> &muon_pts);
// we can pass it directly to Define
auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
// alternatively, we can pass the body of the function as a string, as in Filter:
auto df_with_define = df.Define("newColumn", "x*x + y*y");
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1529
Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Define("x2", "Map(v, [](float e) { return e*e; })")
but instead this will:
df.Define("x2", "return Map(v, [](float e) { return e*e; })")

Definition at line 443 of file RInterface.hxx.

◆ Define() [2/2]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Define ( std::string_view  name,
std::string_view  expression 
)
inline

Define a new column.

Parameters
[in]nameThe name of the defined column.
[in]expressionAn expression in C++ which represents the defined value
Returns
the first node of the computation graph for which the new quantity is defined.

The expression is just-in-time compiled and used to produce the column entries. It must be valid C++ syntax in which variable names are substituted with the names of branches/columns.

Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Define("x2", "Map(v, [](float e) { return e*e; })")
but instead this will:
df.Define("x2", "return Map(v, [](float e) { return e*e; })")

Refer to the first overload of this method for the full documentation.

Definition at line 530 of file RInterface.hxx.

◆ DefineImpl() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename F , typename DefineType , typename RetType = typename TTraits::CallableTraits<F>::ret_type>
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > ROOT::RDF::RInterface< Proxied, DataSource >::DefineImpl ( std::string_view  name,
F &&  expression,
const ColumnNames_t columns,
const std::string &  where 
)
inlineprivate

Definition at line 3102 of file RInterface.hxx.

◆ DefineImpl() [2/2]

template<typename Proxied , typename DataSource = void>
template<typename F , typename DefineType , typename RetType = typename TTraits::CallableTraits<F>::ret_type, bool IsFStringConv = std::is_convertible<F, std::string>::value, bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > ROOT::RDF::RInterface< Proxied, DataSource >::DefineImpl ( std::string_view  ,
F  ,
const ColumnNames_t ,
const std::string &   
)
inlineprivate

Definition at line 3153 of file RInterface.hxx.

◆ DefinePerSample() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename F , typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::DefinePerSample ( std::string_view  name,
F  expression 
)
inline

Define a new column that is updated when the input sample changes.

Parameters
[in]nameThe name of the defined column.
[in]expressionA C++ callable that computes the new value of the defined column.
Returns
the first node of the computation graph for which the new quantity is defined.

The signature of the callable passed as second argument should be T(unsigned int slot, const ROOT::RDF::RSampleInfo &id) where:

  • T is the type of the defined column
  • slot is a number in the range [0, nThreads) that is different for each processing thread. This can simplify the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
  • id is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is being processed (see the class docs for more information).

DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being processed or to inject a callback into the event loop that is only called when the processing of a new sample starts rather than at every entry.

The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.

Example usage:

ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
df.DefinePerSample("weightbysample",
[](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
{ return id.Contains("sample1") ? 1.0f : 2.0f; });
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...

Definition at line 737 of file RInterface.hxx.

◆ DefinePerSample() [2/2]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::DefinePerSample ( std::string_view  name,
std::string_view  expression 
)
inline

Define a new column that is updated when the input sample changes.

Parameters
[in]nameThe name of the defined column.
[in]expressionA valid C++ expression as a string, which will be used to compute the defined value.
Returns
the first node of the computation graph for which the new quantity is defined.

The expression is just-in-time compiled and used to produce the column entries. It must be valid C++ syntax and the usage of the special variable names rdfslot_ and rdfsampleinfo_ is permitted, where these variables will take the same values as the slot and id parameters described at the DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.

Example usage:

df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
Note
If you have declared some C++ function to the interpreter, the correct syntax to call that function with this overload of DefinePerSample is by calling it explicitly with the special names rdfslot_ and rdfsampleinfo_ as input parameters. This is for example the correct way to call this overload when working in PyROOT:
ROOT.gInterpreter.Declare(
"""
float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
return id.Contains("sample1") ? 1.0f : 2.0f;
}
""")
df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain column names other than those mentioned above: the expression is evaluated once before the processing of the sample even starts, so column values are not accessible.

Definition at line 798 of file RInterface.hxx.

◆ DefineSlot()

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::DefineSlot ( std::string_view  name,
F  expression,
const ColumnNames_t columns = {} 
)
inline

Define a new column with a value dependent on the processing slot.

Parameters
[in]nameThe name of the defined column.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding the slot number).
Returns
the first node of the computation graph for which the new quantity is defined.

This alternative implementation of Define is meant as a helper to evaluate new column values in a thread-safe manner. The expression must be a callable of signature R(unsigned int, T1, T2, ...) where T1, T2... are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.

The following two calls are equivalent, although DefineSlot is slightly more performant:

int function(unsigned int, double, double);
df.Define("x", function, {"rdfslot_", "column1", "column2"})
df.DefineSlot("x", function, {"column1", "column2"})

See Define() for more information.

Definition at line 472 of file RInterface.hxx.

◆ DefineSlotEntry()

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::DefineSlotEntry ( std::string_view  name,
F  expression,
const ColumnNames_t columns = {} 
)
inline

Define a new column with a value dependent on the processing slot and the current entry.

Parameters
[in]nameThe name of the defined column.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding slot and entry).
Returns
the first node of the computation graph for which the new quantity is defined.

This alternative implementation of Define is meant as a helper in writing entry-specific, thread-safe custom columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where T1, T2... are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter is reserved for a ULong64_t representing the current entry being processed by the current thread.

The following two Defines are equivalent, although DefineSlotEntry is slightly more performant:

int function(unsigned int, ULong64_t, double, double);
Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
DefineSlotEntry("x", function, {"column1", "column2"})
unsigned long long ULong64_t
Definition RtypesCore.h:70
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.

See Define() for more information.

Definition at line 502 of file RInterface.hxx.

◆ Display() [1/4]

template<typename Proxied , typename DataSource = void>
template<typename... ColumnTypes>
RResultPtr< RDisplay > ROOT::RDF::RInterface< Proxied, DataSource >::Display ( const ColumnNames_t columnList,
size_t  nRows = 5,
size_t  nMaxCollectionElements = 10 
)
inline

Provides a representation of the columns in the dataset.

Template Parameters
ColumnTypesvariadic list of branch/column types.
Parameters
[in]columnListNames of the columns to be displayed.
[in]nRowsNumber of events for each column to be displayed.
[in]nMaxCollectionElementsMaximum number of collection elements to display per row.
Returns
the RDisplay instance wrapped in a RResultPtr.

This function returns a RResultPtr<RDisplay> containing all the entries to be displayed, organized in a tabular form. RDisplay will either print on the standard output a summarized version through RDisplay::Print() or will return a complete version through RDisplay::AsString().

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
auto d1 = rdf.Display("");
// Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
auto d2 = d.Display({"x", "y"}, 128);
// Printing the short representations, the event loop will run
d1->Print();
d2->Print();

Definition at line 3031 of file RInterface.hxx.

◆ Display() [2/4]

template<typename Proxied , typename DataSource = void>
RResultPtr< RDisplay > ROOT::RDF::RInterface< Proxied, DataSource >::Display ( const ColumnNames_t columnList,
size_t  nRows = 5,
size_t  nMaxCollectionElements = 10 
)
inline

Provides a representation of the columns in the dataset.

Parameters
[in]columnListNames of the columns to be displayed.
[in]nRowsNumber of events for each column to be displayed.
[in]nMaxCollectionElementsMaximum number of collection elements to display per row.
Returns
the RDisplay instance wrapped in a RResultPtr.

This overload automatically infers the column types. See the previous overloads for further details.

Invoked when no types are specified to Display

Definition at line 3054 of file RInterface.hxx.

◆ Display() [3/4]

template<typename Proxied , typename DataSource = void>
RResultPtr< RDisplay > ROOT::RDF::RInterface< Proxied, DataSource >::Display ( std::initializer_list< std::string >  columnList,
size_t  nRows = 5,
size_t  nMaxCollectionElements = 10 
)
inline

Provides a representation of the columns in the dataset.

Parameters
[in]columnListNames of the columns to be displayed.
[in]nRowsNumber of events for each column to be displayed.
[in]nMaxCollectionElementsNumber of maximum elements in collection.
Returns
the RDisplay instance wrapped in a RResultPtr.

See the previous overloads for further details.

Definition at line 3093 of file RInterface.hxx.

◆ Display() [4/4]

template<typename Proxied , typename DataSource = void>
RResultPtr< RDisplay > ROOT::RDF::RInterface< Proxied, DataSource >::Display ( std::string_view  columnNameRegexp = "",
size_t  nRows = 5,
size_t  nMaxCollectionElements = 10 
)
inline

Provides a representation of the columns in the dataset.

Parameters
[in]columnNameRegexpA regular expression to select the columns.
[in]nRowsNumber of events for each column to be displayed.
[in]nMaxCollectionElementsMaximum number of collection elements to display per row.
Returns
the RDisplay instance wrapped in a RResultPtr.

The existing columns are matched against the regular expression. If the string provided is empty, all columns are selected. See the previous overloads for further details.

Definition at line 3077 of file RInterface.hxx.

◆ Fill()

template<typename Proxied , typename DataSource = void>
template<typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T >
RResultPtr< std::decay_t< T > > ROOT::RDF::RInterface< Proxied, DataSource >::Fill ( T &&  model,
const ColumnNames_t columnList 
)
inline

Return an object of type T on which T::Fill will be called once per event (lazy action).

Type T must provide at least:

  • a copy-constructor
  • a Fill method that accepts as many arguments and with same types as the column names passed as columnList (these types can also be passed as template parameters to this method)
  • a Merge method with signature Merge(TCollection *) or Merge(const std::vector<T *>&) that merges the objects passed as argument into the object on which Merge was called (an analogous of TH1::Merge). Note that if the signature that takes a TCollection* is used, then T must inherit from TObject (to allow insertion in the TCollection*).
Template Parameters
FirstColumnThe first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
OtherColumnsA list of the other types of the columns the values of which are used to fill the object.
TThe type of the object to fill. Automatically deduced.
Parameters
[in]modelThe model to be considered to build the new return value.
[in]columnListA list containing the names of the columns that will be passed when calling Fill
Returns
the filled object wrapped in a RResultPtr.

The user gives up ownership of the model object. The list of column names to be used for filling must always be specified. This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

MyClass obj;
// Deduce column types (this invocation needs jitting internally, and in this case
// MyClass needs to be known to the interpreter)
auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
// explicit column types
auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});

Definition at line 2538 of file RInterface.hxx.

◆ Filter() [1/4]

template<typename Proxied , typename DataSource = void>
template<typename F , std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Filter ( F  f,
const ColumnNames_t columns = {},
std::string_view  name = "" 
)
inline

Append a filter to the call graph.

Parameters
[in]fFunction, lambda expression, functor class or any other callable object. It must return a bool signalling whether the event has passed the selection (true) or not (false).
[in]columnsNames of the columns/branches in input to the filter function.
[in]nameOptional name of this filter. See Report.
Returns
the filter node of the computation graph.

Append a filter node at the point of the call graph corresponding to the object this method is called on. The callable f should not have side-effects (e.g. modification of an external or static variable) to ensure correct results when implicit multi-threading is active.

RDataFrame only evaluates filters when necessary: if multiple filters are chained one after another, they are executed in order and the first one returning false causes the event to be discarded. Even if multiple actions or transformations depend on the same filter, it is executed once per entry. If its result is requested more than once, the cached result is served.

Example usage:

// C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
auto filtered = df.Filter(myCut, {"x", "y"});
// String: it must contain valid C++ except that column names can be used instead of variable names
auto filtered = df.Filter("x*y > 0");
Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
but instead this will:
df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")

Definition at line 221 of file RInterface.hxx.

◆ Filter() [2/4]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Filter ( F  f,
const std::initializer_list< std::string > &  columns 
)
inline

Append a filter to the call graph.

Parameters
[in]fFunction, lambda expression, functor class or any other callable object. It must return a bool signalling whether the event has passed the selection (true) or not (false).
[in]columnsNames of the columns/branches in input to the filter function.
Returns
the filter node of the computation graph.

Refer to the first overload of this method for the full documentation.

Definition at line 260 of file RInterface.hxx.

◆ Filter() [3/4]

template<typename Proxied , typename DataSource = void>
template<typename F , std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Filter ( F  f,
std::string_view  name 
)
inline

Append a filter to the call graph.

Parameters
[in]fFunction, lambda expression, functor class or any other callable object. It must return a bool signalling whether the event has passed the selection (true) or not (false).
[in]nameOptional name of this filter. See Report.
Returns
the filter node of the computation graph.

Refer to the first overload of this method for the full documentation.

Definition at line 244 of file RInterface.hxx.

◆ Filter() [4/4]

template<typename Proxied , typename DataSource = void>
RInterface< RDFDetail::RJittedFilter, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Filter ( std::string_view  expression,
std::string_view  name = "" 
)
inline

Append a filter to the call graph.

Parameters
[in]expressionThe filter expression in C++
[in]nameOptional name of this filter. See Report.
Returns
the filter node of the computation graph.

The expression is just-in-time compiled and used to filter entries. It must be valid C++ syntax in which variable names are substituted with the names of branches/columns.

Example usage:

auto filtered_df = df.Filter("myCollection.size() > 3");
auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
Note
If the body of the string expression contains an explicit return statement (even if it is in a nested scope), RDataFrame will not add another one in front of the expression. So this will not work:
df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
but instead this will:
df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")

Definition at line 290 of file RInterface.hxx.

◆ FilterAvailable()

template<typename Proxied , typename DataSource = void>
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::FilterAvailable ( std::string_view  column)
inline

Discard entries with missing values.

Parameters
[in]columnColumn name whose entries with missing values should be discarded
Returns
The filter node of the computation graph

This operation is useful in case an entry of the dataset is incomplete, i.e. if one or more of the columns do not have valid values. If the value of the input column is missing for an entry, the entire entry will be discarded from the rest of this branch of the computation graph.

Use cases include:

  • When processing multiple files, one or more of them is missing a column
  • In horizontal joining with entry matching, a certain dataset has no match for the current entry.

Example usage:

# Assume a dataset with columns [idx, x] matching another dataset with
# columns [idx, y]. For idx == 42, the right-hand dataset has no match
df = ROOT.RDataFrame(dataset)
df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
colz = df_nomissing.Take[int]("z")
// Assume a dataset with columns [idx, x] matching another dataset with
// columns [idx, y]. For idx == 42, the right-hand dataset has no match
ROOT::RDataFrame df{dataset};
auto df_nomissing = df.FilterAvailable("idx")
.Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
auto colz = df_nomissing.Take<int>("z");
Note
See FilterMissing() if you want to keep only the entries with missing values instead.

Definition at line 339 of file RInterface.hxx.

◆ FilterMissing()

template<typename Proxied , typename DataSource = void>
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::FilterMissing ( std::string_view  column)
inline

Keep only the entries that have missing values.

Parameters
[in]columnColumn name whose entries with missing values should be kept
Returns
The filter node of the computation graph

This operation is useful in case an entry of the dataset is incomplete, i.e. if one or more of the columns do not have valid values. It only keeps the entries for which the value of the input column is missing.

Use cases include:

  • When processing multiple files, one or more of them is missing a column
  • In horizontal joining with entry matching, a certain dataset has no match for the current entry.

Example usage:

# Assume a dataset made of two files vertically chained together, one has
# column "x" and the other has column "y"
df = ROOT.RDataFrame(dataset)
df_valid_col_x = df.FilterMissing("y")
df_valid_col_y = df.FilterMissing("x")
display_x = df_valid_col_x.Display(("x",))
display_y = df_valid_col_y.Display(("y",))
// Assume a dataset made of two files vertically chained together, one has
// column "x" and the other has column "y"
ROOT.RDataFrame df{dataset};
auto df_valid_col_x = df.FilterMissing("y");
auto df_valid_col_y = df.FilterMissing("x");
auto display_x = df_valid_col_x.Display<int>({"x"});
auto display_y = df_valid_col_y.Display<int>({"y"});
Note
See FilterAvailable() if you want to discard the entries in case there is a missing value instead.

Definition at line 390 of file RInterface.hxx.

◆ Foreach()

template<typename Proxied , typename DataSource = void>
template<typename F >
void ROOT::RDF::RInterface< Proxied, DataSource >::Foreach ( F  f,
const ColumnNames_t columns = {} 
)
inline

Execute a user-defined function on each entry (instant action).

Parameters
[in]fFunction, lambda expression, functor class or any other callable object performing user defined calculations.
[in]columnsNames of the columns/branches in input to the user function.

The callable f is invoked once per entry. This is an instant action: upon invocation, an event loop as well as execution of all scheduled actions is triggered. Users are responsible for the thread-safety of this callable when executing with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).

Example usage:

myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});

Definition at line 1613 of file RInterface.hxx.

◆ ForeachSlot()

template<typename Proxied , typename DataSource = void>
template<typename F >
void ROOT::RDF::RInterface< Proxied, DataSource >::ForeachSlot ( F  f,
const ColumnNames_t columns = {} 
)
inline

Execute a user-defined function requiring a processing slot index on each entry (instant action).

Parameters
[in]fFunction, lambda expression, functor class or any other callable object performing user defined calculations.
[in]columnsNames of the columns/branches in input to the user function.

Same as Foreach, but the user-defined function takes an extra unsigned int as its first parameter, the processing slot index. This slot index will be assigned a different value, 0 to poolSize - 1, for each thread of execution. This is meant as a helper in writing thread-safe Foreach actions when using RDataFrame after ROOT::EnableImplicitMT(). The user-defined processing callable is able to follow different streams of processing indexed by the first parameter. ForeachSlot works just as well with single-thread execution: in that case slot will always be 0.

Example usage:

myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});

Definition at line 1643 of file RInterface.hxx.

◆ GetFilterNames()

template<typename Proxied , typename DataSource = void>
std::vector< std::string > ROOT::RDF::RInterface< Proxied, DataSource >::GetFilterNames ( )
inline

Returns the names of the filters created.

Returns
the container of filters names.

If called on a root node, all the filters in the computation graph will be printed. For any other node, only the filters upstream of that node. Filters without a name are printed as "Unnamed Filter" This is not an action nor a transformation, just a query to the RDataFrame object.

Example usage:

auto filtNames = d.GetFilterNames();
for (auto &&filtName : filtNames) std::cout << filtName << std::endl;

Definition at line 2829 of file RInterface.hxx.

◆ GetProxiedPtr()

template<typename Proxied , typename DataSource = void>
const std::shared_ptr< Proxied > & ROOT::RDF::RInterface< Proxied, DataSource >::GetProxiedPtr ( ) const
inlineprotected

Definition at line 3320 of file RInterface.hxx.

◆ Graph()

template<typename Proxied , typename DataSource = void>
template<typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
RResultPtr<::TGraph > ROOT::RDF::RInterface< Proxied, DataSource >::Graph ( std::string_view  x = "",
std::string_view  y = "" 
)
inline

Fill and return a TGraph object (lazy action).

Template Parameters
XThe type of the column used to fill the x axis.
YThe type of the column used to fill the y axis.
Parameters
[in]xThe name of the column that will fill the x axis.
[in]yThe name of the column that will fill the y axis.
Returns
the TGraph wrapped in a RResultPtr.

Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph is filled with each one of the elements of the container. If Multithreading is enabled, the order in which points are inserted is undefined. If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing. A name and a title to the TGraph is given based on the input column names.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myGraph1 = myDf.Graph("xValues", "yValues");
// Explicit column types
auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
Note
Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 2227 of file RInterface.hxx.

◆ GraphAsymmErrors()

template<typename Proxied , typename DataSource = void>
template<typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType, typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType, typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
RResultPtr<::TGraphAsymmErrors > ROOT::RDF::RInterface< Proxied, DataSource >::GraphAsymmErrors ( std::string_view  x = "",
std::string_view  y = "",
std::string_view  exl = "",
std::string_view  exh = "",
std::string_view  eyl = "",
std::string_view  eyh = "" 
)
inline

Fill and return a TGraphAsymmErrors object (lazy action).

Parameters
[in]xThe name of the column that will fill the x axis.
[in]yThe name of the column that will fill the y axis.
[in]exlThe name of the column of X low errors
[in]exhThe name of the column of X high errors
[in]eylThe name of the column of Y low errors
[in]eyhThe name of the column of Y high errors
Returns
the TGraphAsymmErrors wrapped in a RResultPtr.

Columns can be of a container type (e.g. std::vector<double>), in which case the graph is filled with each one of the elements of the container. If Multithreading is enabled, the order in which points are inserted is undefined.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
// Explicit column types
using f = float
auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
#define f(i)
Definition RSha256.hxx:104
Note
Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 2280 of file RInterface.hxx.

◆ Histo1D() [1/5]

template<typename Proxied , typename DataSource = void>
template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH1D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo1D ( const TH1DModel model,
std::string_view  vName,
std::string_view  wName 
)
inline

Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).

Template Parameters
VThe type of the column used to fill the histogram.
WThe type of the column used as weights.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]vNameThe name of the column that will fill the histogram.
[in]wNameThe name of the column that will provide the weights.
Returns
the monodimensional histogram wrapped in a RResultPtr.

See the description of the first Histo1D() overload for more details.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
// Explicit column type
auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");

Definition at line 1868 of file RInterface.hxx.

◆ Histo1D() [2/5]

template<typename Proxied , typename DataSource = void>
template<typename V , typename W >
RResultPtr<::TH1D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo1D ( const TH1DModel model = {"", "", 128u, 0., 0.})
inline

Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).

Template Parameters
VThe type of the column used to fill the histogram.
WThe type of the column used as weights.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
Returns
the monodimensional histogram wrapped in a RResultPtr.

This overload will use the first two default columns as column names. See the description of the first Histo1D() overload for more details.

Definition at line 1922 of file RInterface.hxx.

◆ Histo1D() [3/5]

template<typename Proxied , typename DataSource = void>
template<typename V = RDFDetail::RInferredType>
RResultPtr<::TH1D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo1D ( const TH1DModel model = {"", "", 128u, 0., 0.},
std::string_view  vName = "" 
)
inline

Fill and return a one-dimensional histogram with the values of a column (lazy action).

Template Parameters
VThe type of the column used to fill the histogram.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]vNameThe name of the column that will fill the histogram.
Returns
the monodimensional histogram wrapped in a RResultPtr.

Columns can be of a container type (e.g. std::vector<double>), in which case the histogram is filled with each one of the elements of the container. In case multiple columns of container type are provided (e.g. values and weights) they must have the same length for each one of the events (but possibly different lengths between events). This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
// Explicit column type
auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
Note
Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 1806 of file RInterface.hxx.

◆ Histo1D() [4/5]

template<typename Proxied , typename DataSource = void>
template<typename V = RDFDetail::RInferredType>
RResultPtr<::TH1D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo1D ( std::string_view  vName)
inline

Fill and return a one-dimensional histogram with the values of a column (lazy action).

Template Parameters
VThe type of the column used to fill the histogram.
Parameters
[in]vNameThe name of the column that will fill the histogram.
Returns
the monodimensional histogram wrapped in a RResultPtr.

This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). The "name" and "title" strings are built starting from the input column name. See the description of the first Histo1D() overload for more details.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto myHist1 = myDf.Histo1D("myColumn");
// Explicit column type
auto myHist2 = myDf.Histo1D<float>("myColumn");

Definition at line 1842 of file RInterface.hxx.

◆ Histo1D() [5/5]

template<typename Proxied , typename DataSource = void>
template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH1D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo1D ( std::string_view  vName,
std::string_view  wName 
)
inline

Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).

Template Parameters
VThe type of the column used to fill the histogram.
WThe type of the column used as weights.
Parameters
[in]vNameThe name of the column that will fill the histogram.
[in]wNameThe name of the column that will provide the weights.
Returns
the monodimensional histogram wrapped in a RResultPtr.

This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). The "name" and "title" strings are built starting from the input column names. See the description of the first Histo1D() overload for more details.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myHist1 = myDf.Histo1D("myValue", "myweight");
// Explicit column types
auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");

Definition at line 1902 of file RInterface.hxx.

◆ Histo2D() [1/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 , typename V2 , typename W >
RResultPtr<::TH2D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo2D ( const TH2DModel model)
inline

Definition at line 2017 of file RInterface.hxx.

◆ Histo2D() [2/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH2D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo2D ( const TH2DModel model,
std::string_view  v1Name,
std::string_view  v2Name,
std::string_view  wName 
)
inline

Fill and return a weighted two-dimensional histogram (lazy action).

Template Parameters
V1The type of the column used to fill the x axis of the histogram.
V2The type of the column used to fill the y axis of the histogram.
WThe type of the column used for the weights of the histogram.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
[in]wNameThe name of the column that will provide the weights.
Returns
the bidimensional histogram wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
// Explicit column types
auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");

See the documentation of the first Histo2D() overload for more details.

Definition at line 1999 of file RInterface.hxx.

◆ Histo2D() [3/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
RResultPtr<::TH2D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo2D ( const TH2DModel model,
std::string_view  v1Name = "",
std::string_view  v2Name = "" 
)
inline

Fill and return a two-dimensional histogram (lazy action).

Template Parameters
V1The type of the column used to fill the x axis of the histogram.
V2The type of the column used to fill the y axis of the histogram.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
Returns
the bidimensional histogram wrapped in a RResultPtr.

Columns can be of a container type (e.g. std::vector<double>), in which case the histogram is filled with each one of the elements of the container. In case multiple columns of container type are provided (e.g. values and weights) they must have the same length for each one of the events (but possibly different lengths between events). This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
// Explicit column types
auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
Note
Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 1956 of file RInterface.hxx.

◆ Histo3D() [1/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 , typename V2 , typename V3 , typename W >
RResultPtr<::TH3D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo3D ( const TH3DModel model)
inline

Definition at line 2119 of file RInterface.hxx.

◆ Histo3D() [2/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TH3D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo3D ( const TH3DModel model,
std::string_view  v1Name,
std::string_view  v2Name,
std::string_view  v3Name,
std::string_view  wName 
)
inline

Fill and return a three-dimensional histogram (lazy action).

Template Parameters
V1The type of the column used to fill the x axis of the histogram. Inferred if not present.
V2The type of the column used to fill the y axis of the histogram. Inferred if not present.
V3The type of the column used to fill the z axis of the histogram. Inferred if not present.
WThe type of the column used for the weights of the histogram. Inferred if not present.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
[in]v3NameThe name of the column that will fill the z axis.
[in]wNameThe name of the column that will provide the weights.
Returns
the tridimensional histogram wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
"myValueX", "myValueY", "myValueZ", "myWeight");
// Explicit column types
using d_t = double;
auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
"myValueX", "myValueY", "myValueZ", "myWeight");

See the documentation of the first Histo2D() overload for more details.

Definition at line 2100 of file RInterface.hxx.

◆ Histo3D() [3/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType>
RResultPtr<::TH3D > ROOT::RDF::RInterface< Proxied, DataSource >::Histo3D ( const TH3DModel model,
std::string_view  v1Name = "",
std::string_view  v2Name = "",
std::string_view  v3Name = "" 
)
inline

Fill and return a three-dimensional histogram (lazy action).

Template Parameters
V1The type of the column used to fill the x axis of the histogram. Inferred if not present.
V2The type of the column used to fill the y axis of the histogram. Inferred if not present.
V3The type of the column used to fill the z axis of the histogram. Inferred if not present.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
[in]v3NameThe name of the column that will fill the z axis.
Returns
the tridimensional histogram wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
"myValueX", "myValueY", "myValueZ");
// Explicit column types
auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
"myValueX", "myValueY", "myValueZ");
Note
Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 2051 of file RInterface.hxx.

◆ HistoND() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename FirstColumn , typename... OtherColumns>
RResultPtr<::THnD > ROOT::RDF::RInterface< Proxied, DataSource >::HistoND ( const THnDModel model,
const ColumnNames_t columnList 
)
inline

Fill and return an N-dimensional histogram (lazy action).

Template Parameters
FirstColumnThe first type of the column the values of which are used to fill the object. Inferred if not present.
OtherColumnsA list of the other types of the columns the values of which are used to fill the object.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]columnListA list containing the names of the columns that will be passed when calling Fill. (N columns for unweighted filling, or N+1 columns for weighted filling)
Returns
the N-dimensional histogram wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.

Example usage:

auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
{40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
{"col0", "col1", "col2", "col3"});

Definition at line 2147 of file RInterface.hxx.

◆ HistoND() [2/2]

template<typename Proxied , typename DataSource = void>
RResultPtr<::THnD > ROOT::RDF::RInterface< Proxied, DataSource >::HistoND ( const THnDModel model,
const ColumnNames_t columnList 
)
inline

Fill and return an N-dimensional histogram (lazy action).

Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]columnListA list containing the names of the columns that will be passed when calling Fill (N columns for unweighted filling, or N+1 columns for weighted filling)
Returns
the N-dimensional histogram wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

auto myFilledObj = myDf.HistoND({"name","title", 4,
{40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
{"col0", "col1", "col2", "col3"});

Definition at line 2181 of file RInterface.hxx.

◆ JittedVaryImpl()

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::JittedVaryImpl ( const std::vector< std::string > &  colNames,
std::string_view  expression,
const std::vector< std::string > &  variationTags,
std::string_view  variationName,
bool  isSingleColumn 
)
inlineprivate

Definition at line 3257 of file RInterface.hxx.

◆ Max()

template<typename Proxied , typename DataSource = void>
template<typename T = RDFDetail::RInferredType>
RResultPtr< RDFDetail::MaxReturnType_t< T > > ROOT::RDF::RInterface< Proxied, DataSource >::Max ( std::string_view  columnName = "")
inline

Return the maximum of processed column values (lazy action).

Template Parameters
TThe type of the branch/column.
Parameters
[in]columnNameThe name of the branch/column to be treated.
Returns
the maximum value of the selected column wrapped in a RResultPtr.

If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct template specialization of this method. If the type of the column is inferred, the return type is double, the type of the column otherwise.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto maxVal0 = myDf.Max("values");
// Explicit column type
auto maxVal1 = myDf.Max<double>("values");

Definition at line 2672 of file RInterface.hxx.

◆ Mean()

template<typename Proxied , typename DataSource = void>
template<typename T = RDFDetail::RInferredType>
RResultPtr< double > ROOT::RDF::RInterface< Proxied, DataSource >::Mean ( std::string_view  columnName = "")
inline

Return the mean of processed column values (lazy action).

Template Parameters
TThe type of the branch/column.
Parameters
[in]columnNameThe name of the branch/column to be treated.
Returns
the mean value of the selected column wrapped in a RResultPtr.

If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct template specialization of this method.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto meanVal0 = myDf.Mean("values");
// Explicit column type
auto meanVal1 = myDf.Mean<double>("values");

Definition at line 2701 of file RInterface.hxx.

◆ Min()

template<typename Proxied , typename DataSource = void>
template<typename T = RDFDetail::RInferredType>
RResultPtr< RDFDetail::MinReturnType_t< T > > ROOT::RDF::RInterface< Proxied, DataSource >::Min ( std::string_view  columnName = "")
inline

Return the minimum of processed column values (lazy action).

Template Parameters
TThe type of the branch/column.
Parameters
[in]columnNameThe name of the branch/column to be treated.
Returns
the minimum value of the selected column wrapped in a RResultPtr.

If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct template specialization of this method. If the type of the column is inferred, the return type is double, the type of the column otherwise.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto minVal0 = myDf.Min("values");
// Explicit column type
auto minVal1 = myDf.Min<double>("values");

Definition at line 2642 of file RInterface.hxx.

◆ operator RNode()

template<typename Proxied , typename DataSource = void>
ROOT::RDF::RInterface< Proxied, DataSource >::operator RNode ( ) const
inline

Cast any RDataFrame node to a common type ROOT::RDF::RNode.

Different RDataFrame methods return different C++ types. All nodes, however, can be cast to this common type at the cost of a small performance penalty. This allows, for example, storing RDataFrame nodes in a vector, or passing them around via (non-template, C++11) helper functions. Example usage:

// a function that conditionally adds a Range to a RDataFrame node.
RNode MaybeAddRange(RNode df, bool mustAddRange)
{
return mustAddRange ? df.Range(1) : df;
}
// use as :
auto maybeRanged = MaybeAddRange(df, true);
The public interface to the RDataFrame federation of classes.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).

Note that it is not a problem to pass RNode's by value.

Definition at line 175 of file RInterface.hxx.

◆ operator=() [1/2]

template<typename Proxied , typename DataSource = void>
RInterface & ROOT::RDF::RInterface< Proxied, DataSource >::operator= ( const RInterface< Proxied, DataSource > &  )
default

Copy-assignment operator for RInterface.

◆ operator=() [2/2]

template<typename Proxied , typename DataSource = void>
RInterface & ROOT::RDF::RInterface< Proxied, DataSource >::operator= ( RInterface< Proxied, DataSource > &&  )
default

Move-assignment operator for RInterface.

◆ Profile1D() [1/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 , typename V2 , typename W >
RResultPtr<::TProfile > ROOT::RDF::RInterface< Proxied, DataSource >::Profile1D ( const TProfile1DModel model)
inline

Fill and return a one-dimensional profile (lazy action).

See the first Profile1D() overload for more details.

Definition at line 2394 of file RInterface.hxx.

◆ Profile1D() [2/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TProfile > ROOT::RDF::RInterface< Proxied, DataSource >::Profile1D ( const TProfile1DModel model,
std::string_view  v1Name,
std::string_view  v2Name,
std::string_view  wName 
)
inline

Fill and return a one-dimensional profile (lazy action).

Template Parameters
V1The type of the column the values of which are used to fill the profile. Inferred if not present.
V2The type of the column the values of which are used to fill the profile. Inferred if not present.
WThe type of the column the weights of which are used to fill the profile. Inferred if not present.
Parameters
[in]modelThe model to be considered to build the new return value.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
[in]wNameThe name of the column that will provide the weights.
Returns
the monodimensional profile wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
// Explicit column types
auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
"xValues", "yValues", "weight");

See the first Profile1D() overload for more details.

Definition at line 2372 of file RInterface.hxx.

◆ Profile1D() [3/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
RResultPtr<::TProfile > ROOT::RDF::RInterface< Proxied, DataSource >::Profile1D ( const TProfile1DModel model,
std::string_view  v1Name = "",
std::string_view  v2Name = "" 
)
inline

Fill and return a one-dimensional profile (lazy action).

Template Parameters
V1The type of the column the values of which are used to fill the profile. Inferred if not present.
V2The type of the column the values of which are used to fill the profile. Inferred if not present.
Parameters
[in]modelThe model to be considered to build the new return value.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
Returns
the monodimensional profile wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
// Explicit column types
auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
Note
Differently from other ROOT interfaces, the returned profile is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 2327 of file RInterface.hxx.

◆ Profile2D() [1/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 , typename V2 , typename V3 , typename W >
RResultPtr<::TProfile2D > ROOT::RDF::RInterface< Proxied, DataSource >::Profile2D ( const TProfile2DModel model)
inline

Fill and return a two-dimensional profile (lazy action).

See the first Profile2D() overload for more details.

Definition at line 2498 of file RInterface.hxx.

◆ Profile2D() [2/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr<::TProfile2D > ROOT::RDF::RInterface< Proxied, DataSource >::Profile2D ( const TProfile2DModel model,
std::string_view  v1Name,
std::string_view  v2Name,
std::string_view  v3Name,
std::string_view  wName 
)
inline

Fill and return a two-dimensional profile (lazy action).

Template Parameters
V1The type of the column used to fill the x axis of the histogram. Inferred if not present.
V2The type of the column used to fill the y axis of the histogram. Inferred if not present.
V3The type of the column used to fill the z axis of the histogram. Inferred if not present.
WThe type of the column used for the weights of the histogram. Inferred if not present.
Parameters
[in]modelThe returned histogram will be constructed using this as a model.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
[in]v3NameThe name of the column that will fill the z axis.
[in]wNameThe name of the column that will provide the weights.
Returns
the bidimensional profile wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
"xValues", "yValues", "zValues", "weight");
// Explicit column types
auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
"xValues", "yValues", "zValues", "weight");

See the first Profile2D() overload for more details.

Definition at line 2476 of file RInterface.hxx.

◆ Profile2D() [3/3]

template<typename Proxied , typename DataSource = void>
template<typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, typename V3 = RDFDetail::RInferredType>
RResultPtr<::TProfile2D > ROOT::RDF::RInterface< Proxied, DataSource >::Profile2D ( const TProfile2DModel model,
std::string_view  v1Name = "",
std::string_view  v2Name = "",
std::string_view  v3Name = "" 
)
inline

Fill and return a two-dimensional profile (lazy action).

Template Parameters
V1The type of the column used to fill the x axis of the histogram. Inferred if not present.
V2The type of the column used to fill the y axis of the histogram. Inferred if not present.
V3The type of the column used to fill the z axis of the histogram. Inferred if not present.
Parameters
[in]modelThe returned profile will be constructed using this as a model.
[in]v1NameThe name of the column that will fill the x axis.
[in]v2NameThe name of the column that will fill the y axis.
[in]v3NameThe name of the column that will fill the z axis.
Returns
the bidimensional profile wrapped in a RResultPtr.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
"xValues", "yValues", "zValues");
// Explicit column types
auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
"xValues", "yValues", "zValues");
Note
Differently from other ROOT interfaces, the returned profile is not associated to gDirectory and the caller is responsible for its lifetime (in particular, a typical source of confusion is that if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).

Definition at line 2428 of file RInterface.hxx.

◆ Range() [1/2]

template<typename Proxied , typename DataSource = void>
RInterface< RDFDetail::RRange< Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Range ( unsigned int  begin,
unsigned int  end,
unsigned int  stride = 1 
)
inline

Creates a node that filters entries based on range: [begin, end).

Parameters
[in]beginInitial entry number considered for this range.
[in]endFinal entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
[in]strideProcess one entry of the [begin, end) range every stride entries. Must be strictly greater than 0.
Returns
the first node of the computation graph for which the event loop is limited to a certain range of entries.

Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset. Ranges are only available if EnableImplicitMT has not been called. Multi-thread ranges are not supported.

Example usage:

auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3

Definition at line 1572 of file RInterface.hxx.

◆ Range() [2/2]

template<typename Proxied , typename DataSource = void>
RInterface< RDFDetail::RRange< Proxied >, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Range ( unsigned int  end)
inline

Creates a node that filters entries based on range.

Parameters
[in]endFinal entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
Returns
a node of the computation graph for which the range is defined.

See the other Range overload for a detailed description.

Definition at line 1593 of file RInterface.hxx.

◆ Redefine() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename F , std::enable_if_t<!std::is_convertible< F, std::string >::value, int > = 0>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Redefine ( std::string_view  name,
F  expression,
const ColumnNames_t columns = {} 
)
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the expression.
Returns
the first node of the computation graph for which the quantity is redefined.

The old value of the column can be used as an input for the expression.

An exception is thrown in case the column to redefine does not already exist. See Define() for more information.

Definition at line 562 of file RInterface.hxx.

◆ Redefine() [2/2]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Redefine ( std::string_view  name,
std::string_view  expression 
)
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionAn expression in C++ which represents the defined value
Returns
the first node of the computation graph for which the new quantity is defined.

The expression is just-in-time compiled and used to produce the column entries. It must be valid C++ syntax in which variable names are substituted with the names of branches/columns.

The old value of the column can be used as an input for the expression. An exception is thrown in case the column to re-define does not already exist.

Aliases cannot be overridden. See the corresponding Define() overload for more information.

Definition at line 620 of file RInterface.hxx.

◆ RedefineSlot()

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::RedefineSlot ( std::string_view  name,
F  expression,
const ColumnNames_t columns = {} 
)
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding slot).
Returns
the first node of the computation graph for which the new quantity is defined.

The old value of the column can be used as an input for the expression. An exception is thrown in case the column to redefine does not already exist.

See DefineSlot() for more information.

Definition at line 581 of file RInterface.hxx.

◆ RedefineSlotEntry()

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::RedefineSlotEntry ( std::string_view  name,
F  expression,
const ColumnNames_t columns = {} 
)
inline

Overwrite the value and/or type of an existing column.

Parameters
[in]nameThe name of the column to redefine.
[in]expressionFunction, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
[in]columnsNames of the columns/branches in input to the producer function (excluding slot and entry).
Returns
the first node of the computation graph for which the new quantity is defined.

The old value of the column can be used as an input for the expression. An exception is thrown in case the column to re-define does not already exist.

See DefineSlotEntry() for more information.

Definition at line 600 of file RInterface.hxx.

◆ Reduce() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename F , typename T = typename TTraits::CallableTraits<F>::ret_type>
RResultPtr< T > ROOT::RDF::RInterface< Proxied, DataSource >::Reduce ( F  f,
std::string_view  columnName,
const T &  redIdentity 
)
inline

Execute a user-defined reduce operation on the values of a column.

Template Parameters
FThe type of the reduce callable. Automatically deduced.
TThe type of the column to apply the reduction to. Automatically deduced.
Parameters
[in]fA callable with signature T(T,T)
[in]columnNameThe column to be reduced. If omitted, the first default column is used instead.
[in]redIdentityThe reduced object of each thread is initialized to this value.
Returns
the reduced quantity wrapped in a RResultPtr.

Example usage:

auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);

See the description of the first Reduce overload for more information.

Definition at line 1713 of file RInterface.hxx.

◆ Reduce() [2/2]

template<typename Proxied , typename DataSource = void>
template<typename F , typename T = typename TTraits::CallableTraits<F>::ret_type>
RResultPtr< T > ROOT::RDF::RInterface< Proxied, DataSource >::Reduce ( F  f,
std::string_view  columnName = "" 
)
inline

Execute a user-defined reduce operation on the values of a column.

Template Parameters
FThe type of the reduce callable. Automatically deduced.
TThe type of the column to apply the reduction to. Automatically deduced.
Parameters
[in]fA callable with signature T(T,T)
[in]columnNameThe column to be reduced. If omitted, the first default column is used instead.
Returns
the reduced quantity wrapped in a ROOT::RDF:RResultPtr.

A reduction takes two values of a column and merges them into one (e.g. by summing them, taking the maximum, etc). This action performs the specified reduction operation on all processed column values, returning a single value of the same type. The callable f must satisfy the general requirements of a processing function besides having signature T(T,T) where T is the type of column columnName.

The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a default-constructed T object. This is commonly expected to be the neutral/identity element for the specific reduction operation f (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this requirement, users should explicitly specify an initialization value for T by calling the appropriate Reduce overload.

Example usage:

auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Definition at line 1690 of file RInterface.hxx.

◆ Report()

template<typename Proxied , typename DataSource = void>
RResultPtr< RCutFlowReport > ROOT::RDF::RInterface< Proxied, DataSource >::Report ( )
inline

Gather filtering statistics.

Returns
the resulting RCutFlowReport instance wrapped in a RResultPtr.

Calling Report on the main RDataFrame object gathers stats for all named filters in the call graph. Calling this method on a stored chain state (i.e. a graph node different from the first) gathers the stats for all named filters in the chain section between the original RDataFrame and that node (included). Stats are gathered in the same order as the named filters have been added to the graph. A RResultPtr<RCutFlowReport> is returned to allow inspection of the effects cuts had.

This action is lazy: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.

Example usage:

auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
auto cutReport = filtered3.Report();
cutReport->Print();
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.

Definition at line 2794 of file RInterface.hxx.

◆ Snapshot() [1/4]

template<typename Proxied , typename DataSource = void>
template<typename... ColumnTypes>
RResultPtr< RInterface< RLoopManager > > ROOT::RDF::RInterface< Proxied, DataSource >::Snapshot ( std::string_view  treename,
std::string_view  filename,
const ColumnNames_t columnList,
const RSnapshotOptions options = RSnapshotOptions() 
)
inline

Save selected columns to disk, in a new TTree treename in file filename.

Template Parameters
ColumnTypesvariadic list of branch/column types.
Parameters
[in]treenameThe name of the output TTree.
[in]filenameThe name of the output TFile.
[in]columnListThe list of names of the columns/branches to be written.
[in]optionsRSnapshotOptions struct with extra options to pass to TFile and TTree.
Returns
a RDataFrame that wraps the snapshotted dataset.

Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.') characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot. When writing a variable size array through Snapshot, it is required that the column indicating its size is also written out and it appears before the array in the columnList.

By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are present, by default all friend top-level branches that have names that do not collide with names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out friend branches with the same names of branches in the main TTree/TChain with names of the form <friendname>_<branchname> in order to differentiate them from the branches in the main tree/chain.

Writing to a sub-directory

Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to the TTree as part of the TTree name, e.g. df.Snapshot("subdir/t", "f.root") write TTree t in the sub-directory subdir of file f.root (creating file and sub-directory as needed).

Attention
In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will error out if such a "shuffled" TTree is used in a friendship.
Note
In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
Snapshot will refuse to process columns with names of the form #columnname. These are special columns made available by some data sources (e.g. RNTupleDS) that represent the size of column columnname, and are not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an Alias(): df.Alias("nbar", "#bar").Snapshot(..., {"nbar"}).

Example invocations:

// without specifying template parameters (column types automatically deduced)
df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
// specifying template parameters ("x" is `int`, "y" is `float`)
df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});

To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in RSnapshotOptions:

opts.fLazy = true;
df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.

Definition at line 1301 of file RInterface.hxx.

◆ Snapshot() [2/4]

template<typename Proxied , typename DataSource = void>
RResultPtr< RInterface< RLoopManager > > ROOT::RDF::RInterface< Proxied, DataSource >::Snapshot ( std::string_view  treename,
std::string_view  filename,
const ColumnNames_t columnList,
const RSnapshotOptions options = RSnapshotOptions() 
)
inline

Save selected columns to disk, in a new TTree treename in file filename.

Parameters
[in]treenameThe name of the output TTree.
[in]filenameThe name of the output TFile.
[in]columnListThe list of names of the columns/branches to be written.
[in]optionsRSnapshotOptions struct with extra options to pass to TFile and TTree.
Returns
a RDataFrame that wraps the snapshotted dataset.

This function returns a RDataFrame built with the output tree as a source. The types of the columns are automatically inferred and do not need to be specified.

See above for a more complete description and example usages.

Definition at line 1319 of file RInterface.hxx.

◆ Snapshot() [3/4]

template<typename Proxied , typename DataSource = void>
RResultPtr< RInterface< RLoopManager > > ROOT::RDF::RInterface< Proxied, DataSource >::Snapshot ( std::string_view  treename,
std::string_view  filename,
std::initializer_list< std::string >  columnList,
const RSnapshotOptions options = RSnapshotOptions() 
)
inline

Save selected columns to disk, in a new TTree treename in file filename.

Parameters
[in]treenameThe name of the output TTree.
[in]filenameThe name of the output TFile.
[in]columnListThe list of names of the columns/branches to be written.
[in]optionsRSnapshotOptions struct with extra options to pass to TFile and TTree.
Returns
a RDataFrame that wraps the snapshotted dataset.

This function returns a RDataFrame built with the output tree as a source. The types of the columns are automatically inferred and do not need to be specified.

See above for a more complete description and example usages.

Definition at line 1417 of file RInterface.hxx.

◆ Snapshot() [4/4]

template<typename Proxied , typename DataSource = void>
RResultPtr< RInterface< RLoopManager > > ROOT::RDF::RInterface< Proxied, DataSource >::Snapshot ( std::string_view  treename,
std::string_view  filename,
std::string_view  columnNameRegexp = "",
const RSnapshotOptions options = RSnapshotOptions() 
)
inline

Save selected columns to disk, in a new TTree treename in file filename.

Parameters
[in]treenameThe name of the output TTree.
[in]filenameThe name of the output TFile.
[in]columnNameRegexpThe regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
[in]optionsRSnapshotOptions struct with extra options to pass to TFile and TTree
Returns
a RDataFrame that wraps the snapshotted dataset.

This function returns a RDataFrame built with the output tree as a source. The types of the columns are automatically inferred and do not need to be specified.

See above for a more complete description and example usages.

Definition at line 1377 of file RInterface.hxx.

◆ SnapshotImpl()

template<typename Proxied , typename DataSource = void>
template<typename... ColumnTypes>
RResultPtr< RInterface< RLoopManager > > ROOT::RDF::RInterface< Proxied, DataSource >::SnapshotImpl ( std::string_view  fullTreeName,
std::string_view  filename,
const ColumnNames_t columnList,
const RSnapshotOptions options 
)
inlineprivate

Definition at line 3161 of file RInterface.hxx.

◆ Stats() [1/2]

template<typename Proxied , typename DataSource = void>
template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
RResultPtr< TStatistic > ROOT::RDF::RInterface< Proxied, DataSource >::Stats ( std::string_view  value,
std::string_view  weight 
)
inline

Return a TStatistic object, filled once per event (lazy action).

Template Parameters
VThe type of the value column
WThe type of the weight column
Parameters
[in]valueThe name of the column with the values to fill the statistics with.
[in]weightThe name of the column with the weights to fill the statistics with.
Returns
the filled TStatistic object wrapped in a RResultPtr.

Example usage:

// Deduce column types (this invocation needs jitting internally)
auto stats0 = myDf.Stats("values", "weights");
// Explicit column types
auto stats1 = myDf.Stats<int, float>("values", "weights");

Definition at line 2596 of file RInterface.hxx.

◆ Stats() [2/2]

template<typename Proxied , typename DataSource = void>
template<typename V = RDFDetail::RInferredType>
RResultPtr< TStatistic > ROOT::RDF::RInterface< Proxied, DataSource >::Stats ( std::string_view  value = "")
inline

Return a TStatistic object, filled once per event (lazy action).

Template Parameters
VThe type of the value column
Parameters
[in]valueThe name of the column with the values to fill the statistics with.
Returns
the filled TStatistic object wrapped in a RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto stats0 = myDf.Stats("values");
// Explicit column type
auto stats1 = myDf.Stats<float>("values");

Definition at line 2564 of file RInterface.hxx.

◆ StdDev()

template<typename Proxied , typename DataSource = void>
template<typename T = RDFDetail::RInferredType>
RResultPtr< double > ROOT::RDF::RInterface< Proxied, DataSource >::StdDev ( std::string_view  columnName = "")
inline

Return the unbiased standard deviation of processed column values (lazy action).

Template Parameters
TThe type of the branch/column.
Parameters
[in]columnNameThe name of the branch/column to be treated.
Returns
the standard deviation value of the selected column wrapped in a RResultPtr.

If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct template specialization of this method.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto stdDev0 = myDf.StdDev("values");
// Explicit column type
auto stdDev1 = myDf.StdDev<double>("values");

Definition at line 2729 of file RInterface.hxx.

◆ Sum()

template<typename Proxied , typename DataSource = void>
template<typename T = RDFDetail::RInferredType>
RResultPtr< RDFDetail::SumReturnType_t< T > > ROOT::RDF::RInterface< Proxied, DataSource >::Sum ( std::string_view  columnName = "",
const RDFDetail::SumReturnType_t< T > &  initValue = RDFDetail::SumReturnType_t<T>{} 
)
inline

Return the sum of processed column values (lazy action).

Template Parameters
TThe type of the branch/column.
Parameters
[in]columnNameThe name of the branch/column.
[in]initValueOptional initial value for the sum. If not present, the column values must be default-constructible.
Returns
the sum of the selected column wrapped in a RResultPtr.

If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct template specialization of this method. If the type of the column is inferred, the return type is double, the type of the column otherwise.

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Example usage:

// Deduce column type (this invocation needs jitting internally)
auto sum0 = myDf.Sum("values");
// Explicit column type
auto sum1 = myDf.Sum<double>("values");

Definition at line 2761 of file RInterface.hxx.

◆ Take()

template<typename Proxied , typename DataSource = void>
template<typename T , typename COLL = std::vector<T>>
RResultPtr< COLL > ROOT::RDF::RInterface< Proxied, DataSource >::Take ( std::string_view  column = "")
inline

Return a collection of values of a column (lazy action, returns a std::vector by default).

Template Parameters
TThe type of the column.
COLLThe type of collection used to store the values.
Parameters
[in]columnThe name of the column to collect the values of.
Returns
the content of the selected column wrapped in a RResultPtr.

The collection type to be specified for C-style array columns is RVec<T>: in this case the returned collection is a std::vector<RVec<T>>.

Example usage:

// In this case intCol is a std::vector<int>
auto intCol = rdf.Take<int>("integerColumn");
// Same content as above but in this case taken as a RVec<int>
auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
// In this case intCol is a std::vector<RVec<int>>, a collection of collections
auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");

This action is lazy: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.

Definition at line 1763 of file RInterface.hxx.

◆ Vary() [1/11]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( const std::vector< std::string > &  colNames,
F &&  expression,
const ColumnNames_t inputColumns,
const std::vector< std::string > &  variationTags,
std::string_view  variationName 
)
inline

Register systematic variations for multiple existing columns using custom variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation"

This overload of Vary takes a list of column names as first argument and requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each affected column. The variationTags are defined as {"down", "up"}.

Example usage:

// produce variations "ptAndEta:down" and "ptAndEta:up"
auto nominal_hx =
df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
[](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
{"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
{"down", "up"}, // variation tags
"ptAndEta") // variation name
.Histo1D("pt", "eta");
hx["nominal"].Draw();
hx["ptAndEta:down"].Draw("SAME");
hx["ptAndEta:up"].Draw("SAME");
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
TPaveText * pt
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
See also
This Vary() overload for more information.

Definition at line 952 of file RInterface.hxx.

◆ Vary() [2/11]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( const std::vector< std::string > &  colNames,
F &&  expression,
const ColumnNames_t inputColumns,
std::size_t  nVariations,
std::string_view  variationName 
)
inline

Register systematic variations for multiple existing columns using auto-generated tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.

This overload of Vary takes a list of column names as first argument. It takes an nVariations parameter instead of a list of tag names (variationTags). Tag names will be auto-generated as the sequence 0...nVariations-1.

Example usage:

auto nominal_hx =
df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
[](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
{"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
2, // auto-generated variation tags
"ptAndEta") // variation name
.Histo1D("pt", "eta");
hx["nominal"].Draw();
hx["ptAndEta:0"].Draw("SAME");
hx["ptAndEta:1"].Draw("SAME");
See also
This Vary() overload for more information.

Definition at line 1014 of file RInterface.hxx.

◆ Vary() [3/11]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( const std::vector< std::string > &  colNames,
std::string_view  expression,
const std::vector< std::string > &  variationTags,
std::string_view  variationName 
)
inline

Register systematic variations for multiple existing columns using custom variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona string containing valid C++ code that evaluates to an RVec or RVecs containing the varied values for the specified columns.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation".

This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as {"down", "up"}.

auto nominal_hx =
df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
.Histo1D("x", "y");
hx["nominal"].Draw();
hx["xy:down"].Draw("SAME");
hx["xy:up"].Draw("SAME");
See also
This Vary() overload for more information.

Definition at line 1198 of file RInterface.hxx.

◆ Vary() [4/11]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( const std::vector< std::string > &  colNames,
std::string_view  expression,
std::size_t  nVariations,
std::string_view  variationName 
)
inline

Register systematic variations for multiple existing columns using auto-generated variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona string containing valid C++ code that evaluates to an RVec or RVecs containing the varied values for the specified columns.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation".

This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time compiled. It takes an nVariations parameter instead of a list of tag names. The varied results will be accessible via the keys of the dictionary with the form variationName:N where N is the corresponding sequential tag starting at 0 and going up to nVariations - 1. The example below shows how Vary() is used while dealing with multiple columns.

auto nominal_hx =
df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
.Histo1D("x", "y");
hx["nominal"].Draw();
hx["xy:0"].Draw("SAME");
hx["xy:1"].Draw("SAME");
See also
This Vary() overload for more information.

Definition at line 1146 of file RInterface.hxx.

◆ Vary() [5/11]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::initializer_list< std::string >  colNames,
F &&  expression,
const ColumnNames_t inputColumns,
const std::vector< std::string > &  variationTags,
std::string_view  variationName 
)
inline

Register systematic variations for multiple existing columns using custom variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.
Note
This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list is avoided.
See also
This Vary() overload for more information.

Definition at line 974 of file RInterface.hxx.

◆ Vary() [6/11]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::initializer_list< std::string >  colNames,
F &&  expression,
const ColumnNames_t inputColumns,
std::size_t  nVariations,
std::string_view  variationName 
)
inline

Register systematic variations for for multiple existing columns using custom variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.
Note
This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list is avoided.
See also
This Vary() overload for more information.

Definition at line 1045 of file RInterface.hxx.

◆ Vary() [7/11]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::initializer_list< std::string >  colNames,
std::string_view  expression,
std::size_t  nVariations,
std::string_view  variationName 
)
inline

Register systematic variations for multiple existing columns using auto-generated variation tags.

Parameters
[in]colNamesset of names of the columns for which varied values are provided.
[in]expressiona string containing valid C++ code that evaluates to an RVec containing the varied values for the specified column.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.
Note
This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list is avoided.
See also
This Vary() overload for more information.

Definition at line 1170 of file RInterface.hxx.

◆ Vary() [8/11]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::string_view  colName,
F &&  expression,
const ColumnNames_t inputColumns,
const std::vector< std::string > &  variationTags,
std::string_view  variationName = "" 
)
inline

Register systematic variations for a single existing column using custom variation tags.

Parameters
[in]colNamename of the column for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation".

Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for results that depend on any varied quantity, a map/dictionary of varied results can be produced with ROOT::RDF::Experimental::VariationsFor (see the example below).

The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and values for each of the systematic variations that affected the result (via upstream Filters or via direct or indirect dependencies of the column values on some registered variations). The keys will be a composition of variation names and tags, e.g. "pt:up" and "pt:down" for the example below.

In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt. We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):

auto nominal_hx =
df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
.Filter("pt > k")
.Define("x", someFunc, {"pt"})
.Histo1D("x");
hx["nominal"].Draw();
hx["pt:down"].Draw("SAME");
hx["pt:up"].Draw("SAME");

RDataFrame computes all variations as part of a single loop over the data. In particular, this means that I/O and computation of values shared among variations only happen once for all variations. Thus, the event loop run-time typically scales much better than linearly with the number of variations.

RDataFrame lazily computes the varied values required to produce the outputs of VariationsFor(). If VariationsFor() was not called for a result, the computations are only run for the nominal case.

See other overloads for examples when variations are added for multiple existing columns, or when the tags are auto-generated instead of being directly defined.

Definition at line 864 of file RInterface.hxx.

◆ Vary() [9/11]

template<typename Proxied , typename DataSource = void>
template<typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::string_view  colName,
F &&  expression,
const ColumnNames_t inputColumns,
std::size_t  nVariations,
std::string_view  variationName = "" 
)
inline

Register systematic variations for a single existing column using auto-generated variation tags.

Parameters
[in]colNamename of the column for which varied values are provided.
[in]expressiona callable that evaluates the varied values for the specified columns. The callable can take any column values as input, similarly to what happens during Filter and Define calls. It must return an RVec of varied values, one for each variation tag, in the same order as the tags.
[in]inputColumnsthe names of the columns to be passed to the callable.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.

This overload of Vary takes an nVariations parameter instead of a list of tag names. The varied results will be accessible via the keys of the dictionary with the form variationName:N where N is the corresponding sequential tag starting at 0 and going up to nVariations - 1.

Example usage:

auto nominal_hx =
df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
.Histo1D("x");
hx["nominal"].Draw();
hx["x:0"].Draw("SAME");
hx["x:1"].Draw("SAME");
See also
This Vary() overload for more information.

Definition at line 903 of file RInterface.hxx.

◆ Vary() [10/11]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::string_view  colName,
std::string_view  expression,
const std::vector< std::string > &  variationTags,
std::string_view  variationName = "" 
)
inline

Register systematic variations for a single existing column using custom variation tags.

Parameters
[in]colNamename of the column for which varied values are provided.
[in]expressiona string containing valid C++ code that evaluates to an RVec containing the varied values for the specified column.
[in]variationTagsnames for each of the varied values, e.g. "up" and "down".
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.

This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are defined as {"down", "up"}.

auto nominal_hx =
df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
.Filter("pt > k")
.Define("x", someFunc, {"pt"})
.Histo1D("x");
hx["nominal"].Draw();
hx["pt:down"].Draw("SAME");
hx["pt:up"].Draw("SAME");
See also
This Vary() overload for more information.

Definition at line 1076 of file RInterface.hxx.

◆ Vary() [11/11]

template<typename Proxied , typename DataSource = void>
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::Vary ( std::string_view  colName,
std::string_view  expression,
std::size_t  nVariations,
std::string_view  variationName = "" 
)
inline

Register systematic variations for a single existing column using auto-generated variation tags.

Parameters
[in]colNamename of the column for which varied values are provided.
[in]expressiona string containing valid C++ code that evaluates to an RVec containing the varied values for the specified column.
[in]nVariationsnumber of variations returned by the expression. The corresponding tags will be "0", "1", etc.
[in]variationNamea generic name for this set of varied values, e.g. "ptvariation". colName is used if none is provided.

This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are auto-generated.

auto nominal_hx =
df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
.Histo1D("pt");
hx["nominal"].Draw();
hx["pt:0"].Draw("SAME");
hx["pt:1"].Draw("SAME");
See also
This Vary() overload for more information.

Definition at line 1109 of file RInterface.hxx.

◆ VaryImpl()

template<typename Proxied , typename DataSource = void>
template<bool IsSingleColumn, typename F >
RInterface< Proxied, DS_t > ROOT::RDF::RInterface< Proxied, DataSource >::VaryImpl ( const std::vector< std::string > &  colNames,
F &&  expression,
const ColumnNames_t inputColumns,
const std::vector< std::string > &  variationTags,
std::string_view  variationName 
)
inlineprivate

Definition at line 3224 of file RInterface.hxx.

Friends And Related Symbol Documentation

◆ RInterface

template<typename Proxied , typename DataSource = void>
template<typename T , typename W >
friend class RInterface
friend

Definition at line 123 of file RInterface.hxx.

◆ RDFInternal::ChangeBeginAndEndEntries

template<typename Proxied , typename DataSource = void>
void RDFInternal::ChangeBeginAndEndEntries ( const RNode node,
Long64_t  start,
Long64_t  end 
)
friend

◆ RDFInternal::ChangeEmptyEntryRange

template<typename Proxied , typename DataSource = void>
void RDFInternal::ChangeEmptyEntryRange ( const RNode node,
std::pair< ULong64_t, ULong64_t > &&  newRange 
)
friend

◆ RDFInternal::ChangeSpec

template<typename Proxied , typename DataSource = void>
void RDFInternal::ChangeSpec ( const RNode node,
ROOT::RDF::Experimental::RDatasetSpec &&  spec 
)
friend

◆ RDFInternal::GraphDrawing::GraphCreatorHelper

template<typename Proxied , typename DataSource = void>
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
friend

Definition at line 120 of file RInterface.hxx.

◆ RDFInternal::TriggerRun

template<typename Proxied , typename DataSource = void>
void RDFInternal::TriggerRun ( RNode  node)
friend

◆ ROOT::Internal::RDF::GetDataSourceLabel

template<typename Proxied , typename DataSource = void>
std::string ROOT::Internal::RDF::GetDataSourceLabel ( const RNode node)
friend

Member Data Documentation

◆ fProxiedPtr

template<typename Proxied , typename DataSource = void>
std::shared_ptr<Proxied> ROOT::RDF::RInterface< Proxied, DataSource >::fProxiedPtr
private

Smart pointer to the graph node encapsulated by this RInterface.

Definition at line 130 of file RInterface.hxx.

  • tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx
  • tree/dataframe/inc/ROOT/RDF/RInterface.hxx