Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df015_LazyDataSource.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -draw
4/// Use the lazy RDataFrame data source to concatenate computation graphs.
5///
6/// This tutorial illustrates how to take advantage of a *lazy data source*
7/// creating a data frame from columns of one or multiple parent dataframe(s),
8/// delaying the creation of the columns to the actual usage of the daughter
9/// data frame.
10/// Dataset Reference:
11/// McCauley, T. (2014). Dimuon event information derived from the Run2010B
12/// public Mu dataset. CERN Open Data Portal.
13/// DOI: [10.7483/OPENDATA.CMS.CB8H.MFFA](http://opendata.cern.ch/record/700).
14/// From the ROOT website: https://root.cern/files/tutorials/tdf014_CsvDataSource_MuRun2010B.csv
15///
16/// \macro_code
17/// \macro_image
18///
19/// \date February 2018
20/// \author Danilo Piparo (CERN)
21
22int df015_LazyDataSource()
23{
24 using namespace ROOT::RDF;
25
26 // Let's first create a RDF that will read from the CSV file.
27 // See the tutorial (https://root.cern/doc/master/df014__CSVDataSource_8C.html) on CSV data sources for more details!
28 auto fileNameUrl = "http://root.cern/files/tutorials/df014_CsvDataSource_MuRun2010B.csv";
29 auto fileName = "df015_CsvDataSource_MuRun2010B.csv";
30 if(gSystem->AccessPathName(fileName))
31 TFile::Cp(fileNameUrl, fileName);
32
33 auto csv_rdf = FromCSV(fileName);
34
35 // Now we take out two columns: px and py of the first muon in the muon pair
36 std::string px1Name = "px1";
37 auto px1 = csv_rdf.Take<double>(px1Name);
38 std::string py1Name = "py1";
39 auto py1 = csv_rdf.Take<double>(py1Name);
40
41 // Now we create a new dataframe built on top of the columns above. Note that up to now, no event loop
42 // has been carried out!
43 auto df = MakeLazyDataFrame(std::make_pair(px1Name, px1), std::make_pair(py1Name, py1));
44
45 // We build a histogram of the transverse momentum of the muons.
46 auto ptFormula = [](double px, double py) { return sqrt(px * px + py * py); };
47 auto pt_h = df.Define("pt", ptFormula, {"px1", "py1"})
48 .Histo1D<double>({"pt", "Muon p_{T};p_{T} [GeV/c];", 128, 0, 128}, "pt");
49
50 auto can = new TCanvas();
51 can->SetLogy();
52 pt_h->DrawCopy();
53
54 return 0;
55}
R__EXTERN TSystem * gSystem
Definition TSystem.h:555
The Canvas class.
Definition TCanvas.h:23
virtual Bool_t Cp(const char *dst, Bool_t progressbar=kTRUE, UInt_t buffersize=1000000)
Allows to copy this file to the dst URL.
Definition TFile.cxx:4995
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition TSystem.cxx:1296
VecExpr< UnaryOp< Sqrt< T >, VecExpr< A, T, D >, T >, T, D > sqrt(const VecExpr< A, T, D > &rhs)
RDataFrame MakeLazyDataFrame(std::pair< std::string, RResultPtr< std::vector< ColumnTypes > > > &&... colNameProxyPairs)
Factory method to create a Lazy RDataFrame.
Definition RLazyDS.hxx:29
RDataFrame FromCSV(std::string_view fileName, bool readHeaders=true, char delimiter=',', Long64_t linesChunkSize=-1LL, std::unordered_map< std::string, char > &&colTypes={})
Factory method to create a CSV RDataFrame.
Definition RCsvDS.cxx:558