This example shows processing of a TTree-based dataset with horizontal concatenations (friends) and event matching (based on TTreeIndex). In case the current event being processed does not match one (or more) of the friend datasets, one can use the FilterAvailable and DefaultValueFor functionalities to act upon the situation.
import array
import os
import ROOT
class DatasetContext:
"""A helper class to create the dataset for the tutorial below."""
main_file = "df037_TTreeEventMatching_py_main.root"
aux_file_1 = "df037_TTreeEventMatching_py_aux_1.root"
aux_file_2 = "df037_TTreeEventMatching_py_aux_2.root"
main_tree_name = "events"
aux_tree_name_1 = "auxdata_1"
aux_tree_name_2 = "auxdata_2"
def __init__(self):
with ROOT.TFile(self.main_file, "RECREATE"):
main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)
idx = array.array("i", [0])
x = array.array("i", [0])
main_tree.Branch("idx", idx, "idx/I")
main_tree.Branch("x", x, "x/I")
idx[0] = 1
x[0] = 1
main_tree.Fill()
idx[0] = 2
x[0] = 2
main_tree.Fill()
idx[0] = 3
x[0] = 3
main_tree.Fill()
main_tree.Write()
with ROOT.TFile(self.aux_file_1, "RECREATE"):
aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)
idx = array.array("i", [0])
y = array.array("i", [0])
aux_tree_1.Branch("idx", idx, "idx/I")
aux_tree_1.Branch("y", y, "y/I")
idx[0] = 1
y[0] = 4
aux_tree_1.Fill()
idx[0] = 2
y[0] = 5
aux_tree_1.Fill()
aux_tree_1.Write()
with ROOT.TFile(self.aux_file_2, "RECREATE"):
aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)
idx = array.array("i", [0])
z = array.array("i", [0])
aux_tree_2.Branch("idx", idx, "idx/I")
aux_tree_2.Branch("z", z, "z/I")
idx[0] = 1
z[0] = 6
aux_tree_2.Fill()
idx[0] = 3
z[0] = 7
aux_tree_2.Fill()
aux_tree_2.Write()
def __enter__(self):
return self
def __exit__(self, *_):
os.remove(self.main_file)
os.remove(self.aux_file_1)
os.remove(self.aux_file_2)
def df037_TTreeEventMatching(dataset: DatasetContext):
main_chain = ROOT.TChain(dataset.main_tree_name)
main_chain.Add(dataset.main_file)
aux_chain_1 = ROOT.TChain(dataset.aux_tree_name_1)
aux_chain_1.Add(dataset.aux_file_1)
aux_chain_1.BuildIndex("idx")
aux_chain_2 = ROOT.TChain(dataset.aux_tree_name_2)
aux_chain_2.Add(dataset.aux_file_2)
aux_chain_2.BuildIndex("idx")
main_chain.AddFriend(aux_chain_1)
main_chain.AddFriend(aux_chain_2)
aux_tree_1_colidx = dataset.aux_tree_name_1 + ".idx"
aux_tree_1_coly = dataset.aux_tree_name_1 + ".y"
aux_tree_2_colidx = dataset.aux_tree_name_2 + ".idx"
aux_tree_2_colz = dataset.aux_tree_name_2 + ".z"
default_value = ROOT.std.numeric_limits[int].min()
display_1 = (
df.DefaultValueFor(aux_tree_1_colidx, default_value)
.DefaultValueFor(aux_tree_1_coly, default_value)
.DefaultValueFor(aux_tree_2_colidx, default_value)
.DefaultValueFor(aux_tree_2_colz, default_value)
.Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
)
display_2 = (
df.DefaultValueFor(aux_tree_2_colidx, default_value)
.DefaultValueFor(aux_tree_2_colz, default_value)
.FilterAvailable(aux_tree_1_coly)
.Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
)
display_3 = df.FilterMissing(aux_tree_1_colidx).Display(("idx", "x"))
print("Example 1: provide default values for all columns")
display_1.Print()
print("Example 2: always skip the entry when there is no match")
display_2.Print()
print("Example 3: keep entries from the main tree for which there is no match in the auxiliary tree")
display_3.Print()
if __name__ == "__main__":
with DatasetContext() as dataset:
df037_TTreeEventMatching(dataset)
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Example 1: provide default values for all columns
Example 2: always skip the entry when there is no match
Example 3: keep entries from the main tree for which there is no match in the auxiliary tree
+-----+-----+---------------+---------------+---+-------------+-------------+
| Row | idx | auxdata_1.idx | auxdata_2.idx | x | auxdata_1.y | auxdata_2.z |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 0 | 1 | 1 | 1 | 1 | 4 | 6 |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 1 | 2 | 2 | -2147483648 | 2 | 5 | -2147483648 |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 2 | 3 | -2147483648 | 3 | 3 | -2147483648 | 7 |
+-----+-----+---------------+---------------+---+-------------+-------------+
+-----+-----+---------------+---------------+---+-------------+-------------+
| Row | idx | auxdata_1.idx | auxdata_2.idx | x | auxdata_1.y | auxdata_2.z |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 0 | 1 | 1 | 1 | 1 | 4 | 6 |
+-----+-----+---------------+---------------+---+-------------+-------------+
| 1 | 2 | 2 | -2147483648 | 2 | 5 | -2147483648 |
+-----+-----+---------------+---------------+---+-------------+-------------+
+-----+-----+---+
| Row | idx | x |
+-----+-----+---+
| 2 | 3 | 3 |
+-----+-----+---+