Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df037_TTreeEventMatching.py
Go to the documentation of this file.
1# \file
2# \ingroup tutorial_dataframe
3# \notebook -nodraw
4#
5# This example shows processing of a TTree-based dataset with horizontal
6# concatenations (friends) and event matching (based on TTreeIndex). In case
7# the current event being processed does not match one (or more) of the friend
8# datasets, one can use the FilterAvailable and DefaultValueFor functionalities
9# to act upon the situation.
10#
11# \macro_code
12# \macro_output
13#
14# \date September 2024
15# \author Vincenzo Eduardo Padulano (CERN)
16import os
17import ROOT
18import numpy
19
20
21class DatasetContext:
22 """A helper class to create the dataset for the tutorial below."""
23
24 main_file = "df037_TTreeEventMatching_py_main.root"
25 aux_file_1 = "df037_TTreeEventMatching_py_aux_1.root"
26 aux_file_2 = "df037_TTreeEventMatching_py_aux_2.root"
27 main_tree_name = "events"
28 aux_tree_name_1 = "auxdata_1"
29 aux_tree_name_2 = "auxdata_2"
30
31 def __init__(self):
32 with ROOT.TFile(self.main_file, "RECREATE") as f:
33 main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)
34 idx = numpy.array([0], dtype=int)
35 x = numpy.array([0], dtype=int)
36 main_tree.Branch("idx", idx, "idx/I")
37 main_tree.Branch("x", x, "x/I")
38
39 idx[0] = 1
40 x[0] = 1
42 idx[0] = 2
43 x[0] = 2
45 idx[0] = 3
46 x[0] = 3
48
50
51 # The first auxiliary file has matching indices 1 and 2, but not 3
52 with ROOT.TFile(self.aux_file_1, "RECREATE") as f:
53 aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)
54 idx = numpy.array([0], dtype=int)
55 y = numpy.array([0], dtype=int)
56 aux_tree_1.Branch("idx", idx, "idx/I")
57 aux_tree_1.Branch("y", y, "y/I")
58
59 idx[0] = 1
60 y[0] = 4
62 idx[0] = 2
63 y[0] = 5
65
67
68 # The second auxiliary file has matching indices 1 and 3, but not 2
69 with ROOT.TFile(self.aux_file_2, "RECREATE") as f:
70 aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)
71 idx = numpy.array([0], dtype=int)
72 z = numpy.array([0], dtype=int)
73 aux_tree_2.Branch("idx", idx, "idx/I")
74 aux_tree_2.Branch("z", z, "z/I")
75
76 idx[0] = 1
77 z[0] = 6
79 idx[0] = 3
80 z[0] = 7
82
84
85 def __enter__(self):
86 return self
87
88 def __exit__(self, *_):
89 os.remove(self.main_file)
90 os.remove(self.aux_file_1)
91 os.remove(self.aux_file_2)
92
93
94def df037_TTreeEventMatching(dataset: DatasetContext):
95 # The input dataset has one main TTree and two auxiliary. The 'idx' branch
96 # is used as the index to match events between the trees.
97 # - The main tree has 3 entries, with 'idx' values(1, 2, 3).
98 # - The first auxiliary tree has 2 entries, with 'idx' values(1, 2).
99 # - The second auxiliary tree has 2 entries, with 'idx' values(1, 3).
100 # The two auxiliary trees are concatenated horizontally with the main one.
103
107
111
112 main_chain.AddFriend(aux_chain_1)
113 main_chain.AddFriend(aux_chain_2)
114
115 # Create an RDataFrame to process the input dataset. The DefaultValueFor and
116 # FilterAvailable functionalities can be used to decide what to do for
117 # the events that do not match entirely according to the index column 'idx'
118 df = ROOT.RDataFrame(main_chain)
119
120 aux_tree_1_colidx = dataset.aux_tree_name_1 + ".idx"
121 aux_tree_1_coly = dataset.aux_tree_name_1 + ".y"
122 aux_tree_2_colidx = dataset.aux_tree_name_2 + ".idx"
123 aux_tree_2_colz = dataset.aux_tree_name_2 + ".z"
124
125 default_value = ROOT.std.numeric_limits[int].min()
126
127 # Example 1: provide default values for all columns in case there was no
128 # match
129 display_1 = (
130 df.DefaultValueFor(aux_tree_1_colidx, default_value)
131 .DefaultValueFor(aux_tree_1_coly, default_value)
132 .DefaultValueFor(aux_tree_2_colidx, default_value)
133 .DefaultValueFor(aux_tree_2_colz, default_value)
134 .Display(
135 ("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
136 )
137
138 # Example 2: skip the entire entry when there was no match for a column
139 # in the first auxiliary tree, but keep the entries when there is no match
140 # in the second auxiliary tree and provide a default value for those
141 display_2 = (
142 df.DefaultValueFor(aux_tree_2_colidx, default_value)
143 .DefaultValueFor(aux_tree_2_colz, default_value)
144 .FilterAvailable(aux_tree_1_coly)
145 .Display(
146 ("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
147 )
148
149 # Example 3: Keep entries from the main tree for which there is no
150 # corresponding match in entries of the first auxiliary tree
151 display_3 = df.FilterMissing(aux_tree_1_colidx).Display(("idx", "x"))
152
153 print("Example 1: provide default values for all columns")
155 print("Example 2: always skip the entry when there is no match")
157 print("Example 3: keep entries from the main tree for which there is no match in the auxiliary tree")
159
160
161if __name__ == "__main__":
162 with DatasetContext() as dataset:
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...