Logo ROOT   6.16/01
Reference Guide
df007_snapshot.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -draw
4/// This tutorial shows how to write out datasets in ROOT formatusing the RDataFrame
5/// \macro_code
6///
7/// \date April 2017
8/// \author Danilo Piparo
9
10// A simple helper function to fill a test tree: this makes the example
11// stand-alone.
12void fill_tree(const char *treeName, const char *fileName)
13{
14 ROOT::RDataFrame d(10000);
15 int i(0);
16 d.Define("b1", [&i]() { return i; })
17 .Define("b2",
18 [&i]() {
19 float j = i * i;
20 ++i;
21 return j;
22 })
23 .Snapshot(treeName, fileName);
24}
25
27{
28 // We prepare an input tree to run on
29 auto fileName = "df007_snapshot.root";
30 auto outFileName = "df007_snapshot_output.root";
31 auto outFileNameAllColumns = "df007_snapshot_output_allColumns.root";
32 auto treeName = "myTree";
33 fill_tree(treeName, fileName);
34
35 // We read the tree from the file and create a RDataFrame.
36 ROOT::RDataFrame d(treeName, fileName);
37
38 // ## Select entries
39 // We now select some entries in the dataset
40 auto d_cut = d.Filter("b1 % 2 == 0");
41 // ## Enrich the dataset
42 // Build some temporary columns: we'll write them out
43 auto d2 = d_cut.Define("b1_square", "b1 * b1")
44 .Define("b2_vector",
45 [](float b2) {
46 std::vector<float> v;
47 for (int i = 0; i < 3; i++)
48 v.push_back(b2 * i);
49 return v;
50 },
51 {"b2"});
52
53 // ## Write it to disk in ROOT format
54 // We now write to disk a new dataset with one of the variables originally
55 // present in the tree and the new variables.
56 // The user can explicitly specify the types of the columns as template
57 // arguments of the Snapshot method, otherwise they will be automatically
58 // inferred.
59 d2.Snapshot(treeName, outFileName, {"b1", "b1_square", "b2_vector"});
60
61 // Open the new file and list the columns of the tree
62 TFile f1(outFileName);
63 TTree *t;
64 f1.GetObject(treeName, t);
65 std::cout << "These are the columns b1, b1_square and b2_vector:" << std::endl;
66 for (auto branch : *t->GetListOfBranches()) {
67 std::cout << "Branch: " << branch->GetName() << std::endl;
68 }
69 f1.Close();
70
71 // We are not forced to write the full set of column names. We can also
72 // specify a regular expression for that. In case nothing is specified, all
73 // columns are persistified.
74 d2.Snapshot(treeName, outFileNameAllColumns);
75
76 // Open the new file and list the columns of the tree
77 TFile f2(outFileNameAllColumns);
78 f2.GetObject(treeName, t);
79 std::cout << "These are all the columns available to this tdf:" << std::endl;
80 for (auto branch : *t->GetListOfBranches()) {
81 std::cout << "Branch: " << branch->GetName() << std::endl;
82 }
83 f2.Close();
84
85 // We can also get a fresh RDataFrame out of the snapshot and restart the
86 // analysis chain from it. The default columns are the one selected.
87 // Notice also how we can decide to be more explicit with the types of the
88 // columns.
89 auto snapshot_tdf = d2.Snapshot<int>(treeName, outFileName, {"b1_square"});
90 auto h = snapshot_tdf->Histo1D();
91 auto c = new TCanvas();
92 h->DrawClone();
93
94 return 0;
95}
SVector< double, 2 > v
Definition: Dict.h:5
#define d(i)
Definition: RSha256.hxx:102
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:41
The Canvas class.
Definition: TCanvas.h:31
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
A TTree object has a header with a name and a title.
Definition: TTree.h:71
virtual TObjArray * GetListOfBranches()
Definition: TTree.h:427
TF1 * f1
Definition: legend1.C:11