Logo ROOT  
Reference Guide
df014_CSVDataSource.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -draw
4/// Process a CSV file with RDataFrame and the CSV data source.
5///
6/// This tutorial illustrates how use the RDataFrame in combination with a
7/// RDataSource. In this case we use a RCsvDS. This data source allows to read
8/// a CSV file from a RDataFrame.
9/// As a result of running this tutorial, we will produce plots of the dimuon
10/// spectrum starting from a subset of the CMS collision events of Run2010B.
11/// Dataset Reference:
12/// McCauley, T. (2014). Dimuon event information derived from the Run2010B
13/// public Mu dataset. CERN Open Data Portal.
14/// DOI: [10.7483/OPENDATA.CMS.CB8H.MFFA](http://opendata.cern.ch/record/700).
15///
16/// \macro_code
17/// \macro_image
18///
19/// \date October 2017
20/// \author Enric Tejedor (CERN)
21
23{
24 // Let's first create a RDF that will read from the CSV file.
25 // The types of the columns will be automatically inferred.
26 auto fileNameUrl = "http://root.cern.ch/files/tutorials/df014_CsvDataSource_MuRun2010B.csv";
27 auto fileName = "df014_CsvDataSource_MuRun2010B_cpp.csv";
28 if(gSystem->AccessPathName(fileName))
29 TFile::Cp(fileNameUrl, fileName);
30 auto df = ROOT::RDF::MakeCsvDataFrame(fileName);
31
32 // Now we will apply a first filter based on two columns of the CSV,
33 // and we will define a new column that will contain the invariant mass.
34 // Note how the new invariant mass column is defined from several other
35 // columns that already existed in the CSV file.
36 auto filteredEvents =
37 df.Filter("Q1 * Q2 == -1")
38 .Define("m", "sqrt(pow(E1 + E2, 2) - (pow(px1 + px2, 2) + pow(py1 + py2, 2) + pow(pz1 + pz2, 2)))");
39
40 // Next we create a histogram to hold the invariant mass values and we draw it.
41 auto invMass =
42 filteredEvents.Histo1D({"invMass", "CMS Opendata: #mu#mu mass;#mu#mu mass [GeV];Events", 512, 2, 110}, "m");
43
44 auto c = new TCanvas();
45 c->SetLogx();
46 c->SetLogy();
47 invMass->DrawClone();
48
49 // We will now produce a plot also for the J/Psi particle. We will plot
50 // on the same canvas the full spectrum and the zoom in on the J/psi particle.
51 // First we will create the full spectrum histogram from the invariant mass
52 // column, using a different histogram model than before.
53 auto fullSpectrum =
54 filteredEvents.Histo1D({"Spectrum", "Subset of CMS Run 2010B;#mu#mu mass [GeV];Events", 1024, 2, 110}, "m");
55
56 // Next we will create the histogram for the J/psi particle, applying first
57 // the corresponding cut.
58 double jpsiLow = 2.95;
59 double jpsiHigh = 3.25;
60 auto jpsiCut = [jpsiLow, jpsiHigh](double m) { return m < jpsiHigh && m > jpsiLow; };
61 auto jpsi =
62 filteredEvents.Filter(jpsiCut, {"m"})
63 .Histo1D({"jpsi", "Subset of CMS Run 2010B: J/#psi window;#mu#mu mass [GeV];Events", 128, jpsiLow, jpsiHigh},
64 "m");
65
66 // Finally we draw the two histograms side by side.
67 auto dualCanvas = new TCanvas("DualCanvas", "DualCanvas", 800, 512);
68 dualCanvas->Divide(2, 1);
69 auto leftPad = dualCanvas->cd(1);
70 leftPad->SetLogx();
71 leftPad->SetLogy();
72 fullSpectrum->DrawClone("Hist");
73 dualCanvas->cd(2);
74 jpsi->DrawClone("HistP");
75
76 return 0;
77}
#define c(i)
Definition: RSha256.hxx:101
R__EXTERN TSystem * gSystem
Definition: TSystem.h:559
The Canvas class.
Definition: TCanvas.h:23
virtual Bool_t Cp(const char *dst, Bool_t progressbar=kTRUE, UInt_t buffersize=1000000)
Allows to copy this file to the dst URL.
Definition: TFile.cxx:4916
virtual Bool_t AccessPathName(const char *path, EAccessMode mode=kFileExists)
Returns FALSE if one can access a file using the specified access mode.
Definition: TSystem.cxx:1296
RDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders=true, char delimiter=',', Long64_t linesChunkSize=-1LL, std::unordered_map< std::string, char > &&colTypes={})
Factory method to create a CSV RDataFrame.
Definition: RCsvDS.cxx:558
TMarker m
Definition: textangle.C:8