Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df036_missingBranches.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -nodraw
4##
5## This example shows how to process a dataset where entries might be
6## incomplete due to one or more missing branches in one or more of the files
7## in the dataset. It shows usage of the FilterAvailable and DefaultValueFor
8## RDataFrame functionalities to act upon the missing entries.
9##
10## \macro_code
11## \macro_output
12##
13## \date September 2024
14## \author Vincenzo Eduardo Padulano (CERN)
15
16import array
17import os
18
19import ROOT
20
21
22class DatasetContext:
23 """A helper class to create the dataset for the tutorial below."""
24
25 filenames = [
26 "df036_missingBranches_py_file_1.root",
27 "df036_missingBranches_py_file_2.root",
28 "df036_missingBranches_py_file_3.root",
29 ]
30 treenames = ["tree_1", "tree_2", "tree_3"]
31 nentries = 5
32
33 def __init__(self):
34 with ROOT.TFile(self.filenames[0], "RECREATE"):
35 t = ROOT.TTree(self.treenames[0], self.treenames[0])
36 x = array.array("i", [0]) # any array can also be a numpy array
37 y = array.array("i", [0])
38 t.Branch("x", x, "x/I")
39 t.Branch("y", y, "y/I")
40
41 for i in range(1, self.nentries + 1):
42 x[0] = i
43 y[0] = 2 * i
44 t.Fill()
45
46 t.Write()
47
48 with ROOT.TFile(self.filenames[1], "RECREATE"):
49 t = ROOT.TTree(self.treenames[1], self.treenames[1])
50 y = array.array("i", [0]) # any array can also be a numpy array
51 t.Branch("y", y, "y/I")
52
53 for i in range(1, self.nentries + 1):
54 y[0] = 3 * i
55 t.Fill()
56
57 t.Write()
58
59 with ROOT.TFile(self.filenames[2], "RECREATE"):
60 t = ROOT.TTree(self.treenames[2], self.treenames[2])
61 x = array.array("i", [0]) # any array can also be a numpy array
62 t.Branch("x", x, "x/I")
63
64 for i in range(1, self.nentries + 1):
65 x[0] = 4 * i
66 t.Fill()
67
68 t.Write()
69
70 def __enter__(self):
71 """Enable using the class as a context manager."""
72 return self
73
74 def __exit__(self, *_):
75 """
76 Enable using the class as a context manager. At the end of the context,
77 remove the files created.
78 """
79 for filename in self.filenames:
80 os.remove(filename)
81
82
83def df036_missingBranches(dataset: DatasetContext):
84 # The input dataset contains three files, with one TTree each.
85 # The first contains branches (x, y), the second only branch y, the third
86 # only branch x. The TChain will process the three files, encountering a
87 # different missing branch when switching to the next tree
88 chain = ROOT.TChain()
89 for fname, tname in zip(dataset.filenames, dataset.treenames):
90 chain.Add(fname + "?#" + tname)
91
92 df = ROOT.RDataFrame(chain)
93
94 default_value = ROOT.std.numeric_limits[int].min()
95
96 # Example 1: provide a default value for all missing branches
97 display_1 = (
98 df.DefaultValueFor("x", default_value)
99 .DefaultValueFor("y", default_value)
100 .Display(columnList=("x", "y"), nRows=15)
101 )
102
103 # Example 2: provide a default value for branch y, but skip events where
104 # branch x is missing
105 display_2 = df.DefaultValueFor("y", default_value).FilterAvailable("x").Display(columnList=("x", "y"), nRows=15)
106
107 # Example 3: only keep events where branch y is missing and display values for branch x
108 display_3 = df.FilterMissing("y").Display(columnList=("x",), nRows=15)
109
110 print("Example 1: provide a default value for all missing branches")
112 print("Example 2: provide a default value for branch y, but skip events where branch x is missing")
114 print("Example 3: only keep events where branch y is missing and display values for branch x")
116
117
118if __name__ == "__main__":
119 with DatasetContext() as dataset:
120 df036_missingBranches(dataset)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...