This example shows how to process a dataset where entries might be incomplete due to one or more missing branches in one or more of the files in the dataset. It shows usage of the FilterAvailable and DefaultValueFor RDataFrame functionalities to act upon the missing entries.
#include <iostream>
#include <numeric>
constexpr static std::array<const char *, 3> fFileNames{"df036_missingBranches_C_file_1.root",
"df036_missingBranches_C_file_2.root",
"df036_missingBranches_C_file_3.root"};
constexpr static std::array<const char *, 3> fTreeNames{"tree_1", "tree_2", "tree_3"};
constexpr static auto fTreeEntries{5};
{
{
TFile f(fFileNames[0],
"RECREATE");
TTree t(fTreeNames[0], fTreeNames[0]);
t.Branch(
"x", &
x,
"x/I");
t.Branch(
"y", &
y,
"y/I");
for (int i = 1; i <= fTreeEntries; i++) {
t.Fill();
}
t.Write();
}
{
TFile f(fFileNames[1],
"RECREATE");
TTree t(fTreeNames[1], fTreeNames[1]);
t.Branch(
"y", &
y,
"y/I");
for (int i = 1; i <= fTreeEntries; i++) {
t.Fill();
}
t.Write();
}
{
TFile f(fFileNames[2],
"RECREATE");
TTree t(fTreeNames[2], fTreeNames[2]);
t.Branch(
"x", &
x,
"x/I");
for (int i = 1; i <= fTreeEntries; i++) {
t.Fill();
}
t.Write();
}
}
{
for (auto &&fileName : fFileNames)
std::remove(fileName);
}
};
{
for (
auto i = 0; i <
trees.fFileNames.size(); i++) {
}
constexpr static auto defaultValue = std::numeric_limits<int>::min();
auto display1 = df.DefaultValueFor(
"x", defaultValue)
.DefaultValueFor("y", defaultValue)
.Display<
int,
int>({
"x",
"y"}, 15);
df.DefaultValueFor(
"y", defaultValue).FilterAvailable(
"x").Display<
int,
int>({
"x",
"y"}, 15);
auto display3 = df.FilterMissing(
"y").Display<
int>({
"x"}, 15);
std::cout << "Example 1: provide a default value for all missing branches\n";
std::cout << "Example 2: provide a default value for branch y, but skip events where branch x is missing\n";
std::cout << "Example 3: only keep events where branch y is missing and display values for branch x\n";
}
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A chain is a collection of files containing TTree objects.
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
A TTree represents a columnar dataset.
Example 1: provide a default value for all missing branches
+-----+-------------+-------------+
| Row | x | y |
+-----+-------------+-------------+
| 0 | 1 | 2 |
+-----+-------------+-------------+
| 1 | 2 | 4 |
+-----+-------------+-------------+
| 2 | 3 | 6 |
+-----+-------------+-------------+
| 3 | 4 | 8 |
+-----+-------------+-------------+
| 4 | 5 | 10 |
+-----+-------------+-------------+
| 5 | -2147483648 | 3 |
+-----+-------------+-------------+
| 6 | -2147483648 | 6 |
+-----+-------------+-------------+
| 7 | -2147483648 | 9 |
+-----+-------------+-------------+
| 8 | -2147483648 | 12 |
+-----+-------------+-------------+
| 9 | -2147483648 | 15 |
+-----+-------------+-------------+
| 10 | 4 | -2147483648 |
+-----+-------------+-------------+
| 11 | 8 | -2147483648 |
+-----+-------------+-------------+
| 12 | 12 | -2147483648 |
+-----+-------------+-------------+
| 13 | 16 | -2147483648 |
+-----+-------------+-------------+
| 14 | 20 | -2147483648 |
+-----+-------------+-------------+
Example 2: provide a default value for branch y, but skip events where branch x is missing
+-----+----+-------------+
| Row | x | y |
+-----+----+-------------+
| 0 | 1 | 2 |
+-----+----+-------------+
| 1 | 2 | 4 |
+-----+----+-------------+
| 2 | 3 | 6 |
+-----+----+-------------+
| 3 | 4 | 8 |
+-----+----+-------------+
| 4 | 5 | 10 |
+-----+----+-------------+
| 10 | 4 | -2147483648 |
+-----+----+-------------+
| 11 | 8 | -2147483648 |
+-----+----+-------------+
| 12 | 12 | -2147483648 |
+-----+----+-------------+
| 13 | 16 | -2147483648 |
+-----+----+-------------+
| 14 | 20 | -2147483648 |
+-----+----+-------------+
Example 3: only keep events where branch y is missing and display values for branch x
+-----+----+
| Row | x |
+-----+----+
| 10 | 4 |
+-----+----+
| 11 | 8 |
+-----+----+
| 12 | 12 |
+-----+----+
| 13 | 16 |
+-----+----+
| 14 | 20 |
+-----+----+