Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df036_missingBranches.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -nodraw
4///
5/// This example shows how to process a dataset where entries might be
6/// incomplete due to one or more missing branches in one or more of the files
7/// in the dataset. It shows usage of the FilterAvailable and DefaultValueFor
8/// RDataFrame functionalities to act upon the missing entries.
9///
10/// \macro_code
11/// \macro_output
12///
13/// \date September 2024
14/// \author Vincenzo Eduardo Padulano (CERN)
15#include <ROOT/RDataFrame.hxx>
16#include <TChain.h>
17#include <TFile.h>
18#include <TTree.h>
19
20#include <iostream>
21#include <numeric>
22
23// A helper class to create the dataset for the tutorial below.
24struct Dataset {
25
26 constexpr static std::array<const char *, 3> fFileNames{"df036_missingBranches_C_file_1.root",
27 "df036_missingBranches_C_file_2.root",
28 "df036_missingBranches_C_file_3.root"};
29 constexpr static std::array<const char *, 3> fTreeNames{"tree_1", "tree_2", "tree_3"};
30 constexpr static auto fTreeEntries{5};
31
32 Dataset()
33 {
34 {
35 TFile f(fFileNames[0], "RECREATE");
36 TTree t(fTreeNames[0], fTreeNames[0]);
37 int x{};
38 int y{};
39 t.Branch("x", &x, "x/I");
40 t.Branch("y", &y, "y/I");
41 for (int i = 1; i <= fTreeEntries; i++) {
42 x = i;
43 y = 2 * i;
44 t.Fill();
45 }
46
47 t.Write();
48 }
49
50 {
51 TFile f(fFileNames[1], "RECREATE");
52 TTree t(fTreeNames[1], fTreeNames[1]);
53 int y{};
54 t.Branch("y", &y, "y/I");
55 for (int i = 1; i <= fTreeEntries; i++) {
56 y = 3 * i;
57 t.Fill();
58 }
59
60 t.Write();
61 }
62
63 {
64 TFile f(fFileNames[2], "RECREATE");
65 TTree t(fTreeNames[2], fTreeNames[2]);
66 int x{};
67 t.Branch("x", &x, "x/I");
68 for (int i = 1; i <= fTreeEntries; i++) {
69 x = 4 * i;
70 t.Fill();
71 }
72
73 t.Write();
74 }
75 }
76
77 ~Dataset()
78 {
79 for (auto &&fileName : fFileNames)
80 std::remove(fileName);
81 }
82};
83
85{
86 // Create the example dataset. Three files are created with one TTree each.
87 // The first contains branches (x, y), the second only branch y, the third
88 // only branch x.
89 Dataset trees{};
90
91 // The TChain will process the three files, encountering a different missing
92 // branch when switching to the next tree
93 TChain c{};
94 for (auto i = 0; i < trees.fFileNames.size(); i++) {
95 const auto fullPath = std::string(trees.fFileNames[i]) + "?#" + trees.fTreeNames[i];
96 c.Add(fullPath.c_str());
97 }
98
100
101 constexpr static auto defaultValue = std::numeric_limits<int>::min();
102
103 // Example 1: provide a default value for all missing branches
104 auto display1 = df.DefaultValueFor("x", defaultValue)
105 .DefaultValueFor("y", defaultValue)
106 .Display<int, int>({"x", "y"}, /*nRows*/ 15);
107
108 // Example 2: provide a default value for branch y, but skip events where
109 // branch x is missing
110 auto display2 =
111 df.DefaultValueFor("y", defaultValue).FilterAvailable("x").Display<int, int>({"x", "y"}, /*nRows*/ 15);
112
113 // Example 3: only keep events where branch y is missing and display values for branch x
114 auto display3 = df.FilterMissing("y").Display<int>({"x"}, /*nRows*/ 15);
115
116 std::cout << "Example 1: provide a default value for all missing branches\n";
117 display1->Print();
118
119 std::cout << "Example 2: provide a default value for branch y, but skip events where branch x is missing\n";
120 display2->Print();
121
122 std::cout << "Example 3: only keep events where branch y is missing and display values for branch x\n";
123 display3->Print();
124}
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition TChain.cxx:219
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
A TTree represents a columnar dataset.
Definition TTree.h:79
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17