This tutorial illustrates the basic features of the RDataFrame class, a utility which allows to interact with data stored in TTrees following a functional-chain like approach.
void fill_tree(const char *treeName, const char *fileName)
{
int i(0);
d.Define(
"b1", [&i]() {
return (
double)i; })
.Define("b2",
[&i]() {
auto j = i * i;
++i;
return j;
})
.Snapshot(treeName, fileName);
}
{
auto fileName = "df001_introduction.root";
auto treeName = "myTree";
fill_tree(treeName, fileName);
auto cutb1 = [](double b1) { return b1 < 5.; };
auto cutb1b2 = [](int b2, double b1) { return b2 % 2 && b1 < 4.; };
auto entries1 =
d.Filter(cutb1)
.Filter(cutb1b2, {"b2", "b1"})
.Count();
std::cout << *entries1 << " entries passed all filters" << std::endl;
auto entries2 =
d.Filter(
"b1 < 5.").Count();
std::cout << *entries2 << " entries passed the string filter" << std::endl;
auto b1b2_cut =
d.Filter(cutb1b2, {
"b2",
"b1"});
auto minVal = b1b2_cut.Min();
auto maxVal = b1b2_cut.Max();
auto meanVal = b1b2_cut.Mean();
auto nonDefmeanVal = b1b2_cut.Mean("b2");
std::cout << "The mean is always included between the min and the max: " << *minVal << " <= " << *meanVal
<< " <= " << *maxVal << std::endl;
auto b1_cut =
d.Filter(cutb1);
auto b1Vec = b1_cut.Take<double>();
auto b1List = b1_cut.Take<
double, std::list<double>>();
std::cout << "Selected b1 entries" << std::endl;
for (auto b1_entry : *b1List)
std::cout << b1_entry << " ";
std::cout << std::endl;
std::cout << "The type of b1Vec is " << b1VecCl->GetName() << std::endl;
auto hist =
d.Filter(cutb1).Histo1D();
std::cout << "Filled h " << hist->GetEntries() << " times, mean: " << hist->GetMean() << std::endl;
TH1F h(
"h",
"h", 12, -1, 11);
d.Filter([](
int b2) {
return b2 % 2 == 0; }, {
"b2"}).Foreach([&
h](
double b1) {
h.Fill(b1); });
std::cout <<
"Filled h with " <<
h.GetEntries() <<
" entries" << std::endl;
auto cutb1_result =
d.Filter(cutb1);
auto cutb1b2_result =
d.Filter(cutb1b2, {
"b2",
"b1"});
auto cutb1_cutb1b2_result = cutb1_result.Filter(cutb1b2, {"b2", "b1"});
auto evts_cutb1_result = cutb1_result.Count();
auto evts_cutb1b2_result = cutb1b2_result.Count();
auto evts_cutb1_cutb1b2_result = cutb1_cutb1b2_result.Count();
std::cout << "Events passing cutb1: " << *evts_cutb1_result << std::endl
<< "Events passing cutb1b2: " << *evts_cutb1b2_result << std::endl
<< "Events passing both: " << *evts_cutb1_cutb1b2_result << std::endl;
auto entries_sum =
d.Define(
"sum", [](
double b1,
int b2) {
return b2 + b1; }, {
"b1",
"b2"})
.Count();
std::cout << *entries_sum << std::endl;
auto entries_sum2 =
d.Define(
"sum2",
"b1 + b2").Filter(
"sum2 > 4.2").Count();
std::cout << *entries_sum2 << std::endl;
auto printEntrySlot = [](
ULong64_t iEntry,
unsigned int slot) {
std::cout << "Entry: " << iEntry << " Slot: " << slot << std::endl;
};
d.Foreach(printEntrySlot, {
"rdfentry_",
"rdfslot_"});
return 0;
}
unsigned long long ULong64_t
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
1-D histogram with a float per channel (see TH1 documentation)}
RVec< T > Filter(const RVec< T > &v, F &&f)
Create a new collection with the elements passing the filter expressed by the predicate.
static uint64_t sum(uint64_t i)
2 entries passed all filters
5 entries passed the string filter
The mean is always included between the min and the max: 1 <= 2 <= 3
Selected b1 entries
0 1 2 3 4
The type of b1Vec is vector<double>
Filled h 5 times, mean: 2
Filled h with 5 entries
Events passing cutb1: 5
Events passing cutb1b2: 2
Events passing both: 2
8
8
Entry: 0 Slot: 0
Entry: 1 Slot: 0
Entry: 2 Slot: 0
Entry: 3 Slot: 0
Entry: 4 Slot: 0
Entry: 5 Slot: 0
Entry: 6 Slot: 0
Entry: 7 Slot: 0
Entry: 8 Slot: 0
Entry: 9 Slot: 0
(int) 0