Logo ROOT   6.10/09
Reference Guide
tdf007_snapshot.py
Go to the documentation of this file.
1 ## \file
2 ## \ingroup tutorial_tdataframe
3 ## \notebook
4 ## This tutorial shows how to write out datasets in ROOT formatusing the TDataFrame
5 ## \macro_code
6 ##
7 ## \date April 2017
8 ## \author Danilo Piparo
9 
10 import ROOT
11 
12 fill_tree_code ='''
13 void fill_tree(const char *filename, const char *treeName)
14 {
15  TFile f(filename, "RECREATE");
16  TTree t(treeName, treeName);
17  int b1;
18  float b2;
19  t.Branch("b1", &b1);
20  t.Branch("b2", &b2);
21  for (int i = 0; i < 10000; ++i) {
22  b1 = i;
23  b2 = i * i;
24  t.Fill();
25  }
26  t.Write();
27  f.Close();
28  return;
29 }
30 '''
31 
32 # We prepare an input tree to run on
33 fileName = "tdf007_snapshot_py.root"
34 outFileName = "tdf007_snapshot_output_py.root"
35 outFileNameAllColumns = "tdf007_snapshot_output_allColumns_py.root"
36 treeName = "myTree"
37 ROOT.gInterpreter.Declare(fill_tree_code)
38 ROOT.fill_tree(fileName, treeName)
39 
40 # We read the tree from the file and create a TDataFrame.
41 TDF = ROOT.ROOT.Experimental.TDataFrame
42 d = TDF(treeName, fileName)
43 
44 # ## Select entries
45 # We now select some entries in the dataset
46 d_cut = d.Filter("b1 % 2 == 0")
47 # ## Enrich the dataset
48 # Build some temporary columns: we'll write them out
49 
50 getVector_code ='''
51 std::vector<float> getVector (float b2)
52 {
53  std::vector<float> v;
54  for (int i = 0; i < 3; i++) v.push_back(b2*i);
55  return v;
56 }
57 '''
58 ROOT.gInterpreter.Declare(getVector_code)
59 
60 d2 = d_cut.Define("b1_square", "b1 * b1") \
61  .Define("b2_vector", "getVector( b2 )")
62 
63 # ## Write it to disk in ROOT format
64 # We now write to disk a new dataset with one of the variables originally
65 # present in the tree and the new variables.
66 # The user can explicitly specify the types of the columns as template
67 # arguments of the Snapshot method, otherwise they will be automatically
68 # inferred.
69 branchList = ROOT.vector('string')()
70 for branchName in ["b1", "b1_square", "b2_vector"]:
71  branchList.push_back(branchName)
72 d2.Snapshot(treeName, outFileName, branchList)
73 
74 # Open the new file and list the columns of the tree
75 f1 = ROOT.TFile(outFileName)
76 t = f1.myTree
77 print("These are the columns b1, b1_square and b2_vector:")
78 for branch in t.GetListOfBranches():
79  print("Branch: %s" %branch.GetName())
80 
81 f1.Close()
82 
83 # We are not forced to write the full set of column names. We can also
84 # specify a regular expression for that. In case nothing is specified, all
85 # columns are persistified.
86 d2.Snapshot(treeName, outFileNameAllColumns)
87 
88 # Open the new file and list the columns of the tree
89 f2 = ROOT.TFile(outFileNameAllColumns)
90 t = f2.myTree
91 print("These are all the columns available to this tdf:")
92 for branch in t.GetListOfBranches():
93  print("Branch: %s" %branch.GetName())
94 
95 f2.Close()
96 
97 # We can also get a fresh TDataFrame out of the snapshot and restart the
98 # analysis chain from it.
99 
100 branchList.clear()
101 branchList.push_back("b1_square")
102 snapshot_tdf = d2.Snapshot(treeName, outFileName, branchList);
103 h = snapshot_tdf.Histo1D("b1_square")
104 c = ROOT.TCanvas()
105 h.Draw()
106