Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df037_TTreeEventMatching.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -nodraw
4##
5## This example shows processing of a TTree-based dataset with horizontal
6## concatenations (friends) and event matching (based on TTreeIndex). In case
7## the current event being processed does not match one (or more) of the friend
8## datasets, one can use the FilterAvailable and DefaultValueFor functionalities
9## to act upon the situation.
10##
11## \macro_code
12## \macro_output
13##
14## \date September 2024
15## \author Vincenzo Eduardo Padulano (CERN)
16
17import array
18import os
19
20import ROOT
21
22
23class DatasetContext:
24 """A helper class to create the dataset for the tutorial below."""
25
26 main_file = "df037_TTreeEventMatching_py_main.root"
27 aux_file_1 = "df037_TTreeEventMatching_py_aux_1.root"
28 aux_file_2 = "df037_TTreeEventMatching_py_aux_2.root"
29 main_tree_name = "events"
30 aux_tree_name_1 = "auxdata_1"
31 aux_tree_name_2 = "auxdata_2"
32
33 def __init__(self):
34 with ROOT.TFile(self.main_file, "RECREATE"):
35 main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)
36 idx = array.array("i", [0]) # any array can also be a numpy array
37 x = array.array("i", [0])
38 main_tree.Branch("idx", idx, "idx/I")
39 main_tree.Branch("x", x, "x/I")
40
41 idx[0] = 1
42 x[0] = 1
44 idx[0] = 2
45 x[0] = 2
47 idx[0] = 3
48 x[0] = 3
50
52
53 # The first auxiliary file has matching indices 1 and 2, but not 3
54 with ROOT.TFile(self.aux_file_1, "RECREATE"):
55 aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)
56 idx = array.array("i", [0]) # any array can also be a numpy array
57 y = array.array("i", [0])
58 aux_tree_1.Branch("idx", idx, "idx/I")
59 aux_tree_1.Branch("y", y, "y/I")
60
61 idx[0] = 1
62 y[0] = 4
64 idx[0] = 2
65 y[0] = 5
67
69
70 # The second auxiliary file has matching indices 1 and 3, but not 2
71 with ROOT.TFile(self.aux_file_2, "RECREATE"):
72 aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)
73 idx = array.array("i", [0]) # any array can also be a numpy array
74 z = array.array("i", [0])
75 aux_tree_2.Branch("idx", idx, "idx/I")
76 aux_tree_2.Branch("z", z, "z/I")
77
78 idx[0] = 1
79 z[0] = 6
81 idx[0] = 3
82 z[0] = 7
84
86
87 def __enter__(self):
88 return self
89
90 def __exit__(self, *_):
91 os.remove(self.main_file)
92 os.remove(self.aux_file_1)
93 os.remove(self.aux_file_2)
94
95
96def df037_TTreeEventMatching(dataset: DatasetContext):
97 # The input dataset has one main TTree and two auxiliary. The 'idx' branch
98 # is used as the index to match events between the trees.
99 # - The main tree has 3 entries, with 'idx' values(1, 2, 3).
100 # - The first auxiliary tree has 2 entries, with 'idx' values(1, 2).
101 # - The second auxiliary tree has 2 entries, with 'idx' values(1, 3).
102 # The two auxiliary trees are concatenated horizontally with the main one.
105
109
113
114 main_chain.AddFriend(aux_chain_1)
115 main_chain.AddFriend(aux_chain_2)
116
117 # Create an RDataFrame to process the input dataset. The DefaultValueFor and
118 # FilterAvailable functionalities can be used to decide what to do for
119 # the events that do not match entirely according to the index column 'idx'
120 df = ROOT.RDataFrame(main_chain)
121
122 aux_tree_1_colidx = dataset.aux_tree_name_1 + ".idx"
123 aux_tree_1_coly = dataset.aux_tree_name_1 + ".y"
124 aux_tree_2_colidx = dataset.aux_tree_name_2 + ".idx"
125 aux_tree_2_colz = dataset.aux_tree_name_2 + ".z"
126
127 default_value = ROOT.std.numeric_limits[int].min()
128
129 # Example 1: provide default values for all columns in case there was no
130 # match
131 display_1 = (
132 df.DefaultValueFor(aux_tree_1_colidx, default_value)
133 .DefaultValueFor(aux_tree_1_coly, default_value)
134 .DefaultValueFor(aux_tree_2_colidx, default_value)
135 .DefaultValueFor(aux_tree_2_colz, default_value)
136 .Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
137 )
138
139 # Example 2: skip the entire entry when there was no match for a column
140 # in the first auxiliary tree, but keep the entries when there is no match
141 # in the second auxiliary tree and provide a default value for those
142 display_2 = (
143 df.DefaultValueFor(aux_tree_2_colidx, default_value)
144 .DefaultValueFor(aux_tree_2_colz, default_value)
145 .FilterAvailable(aux_tree_1_coly)
146 .Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
147 )
148
149 # Example 3: Keep entries from the main tree for which there is no
150 # corresponding match in entries of the first auxiliary tree
151 display_3 = df.FilterMissing(aux_tree_1_colidx).Display(("idx", "x"))
152
153 print("Example 1: provide default values for all columns")
155 print("Example 2: always skip the entry when there is no match")
157 print("Example 3: keep entries from the main tree for which there is no match in the auxiliary tree")
159
160
161if __name__ == "__main__":
162 with DatasetContext() as dataset:
163 df037_TTreeEventMatching(dataset)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...