Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df106_HiggsToFourLeptons.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_dataframe
3/// \notebook -draw
4/// The Higgs to four lepton analysis from the ATLAS Open Data release of 2020, with RDataFrame.
5///
6/// This tutorial is the Higgs to four lepton analysis from the ATLAS Open Data release in 2020
7/// (http://opendata.atlas.cern/release/2020/documentation/). The data was taken with the ATLAS detector
8/// during 2016 at a center-of-mass energy of 13 TeV. The decay of the Standard Model Higgs boson
9/// to two Z bosons and subsequently to four leptons is called the "golden channel". The selection leads
10/// to a narrow invariant mass peak on top a relatively smooth and small background, revealing
11/// the Higgs at 125 GeV.
12/// The analysis is translated to an RDataFrame workflow processing about 300 MB of simulated events and data.
13///
14/// Lepton selection efficiency corrections ("scale factors") are applied to simulated samples to correct for the
15/// differences in the trigger, reconstruction, and identification efficiencies in simulation compared to real data.
16/// Systematic uncertainties for those scale factors are evaluated and the Vary function of RDataFrame is used to
17/// propagate the variations to the final four leptons mass distribution.
18///
19/// \macro_code
20/// \macro_image
21///
22/// \date March 2020, August 2022, August 2023
23/// \authors Stefan Wunsch (KIT, CERN), Julia Mathe (CERN), Marta Czurylo (CERN)
24
25#include <Math/Vector4D.h>
26#include <ROOT/RDFHelpers.hxx>
27#include <ROOT/RDataFrame.hxx>
28#include <ROOT/RVec.hxx>
29#include <TCanvas.h>
30#include <TGraph.h>
31#include <TH1D.h>
32#include <THStack.h>
33#include <TLatex.h>
34#include <TLegend.h>
35#include <TProfile.h>
36#include <TStyle.h>
37
38using namespace ROOT::VecOps;
40using ROOT::RVecF;
42using namespace ROOT::RDF::Experimental;
43
44// Define functions needed in the analysis
45// Select events for the analysis
46bool GoodElectronsAndMuons(const ROOT::RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e,
47 const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0)
48{
49 for (size_t i = 0; i < type.size(); i++) {
50 PtEtaPhiEVectorF p(0.001 * pt[i], eta[i], phi[i], 0.001 * e[i]);
51 if (type[i] == 11) {
52 if (pt[i] < 7000 || abs(eta[i]) > 2.47 || abs(trackd0pv[i] / tracksigd0pv[i]) > 5 ||
53 abs(z0[i] * sin(p.Theta())) > 0.5)
54 return false;
55 } else {
56 if (abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5)
57 return false;
58 }
59 }
60 return true;
61}
62
63// Compute the invariant mass of a four-lepton-system.
64float ComputeInvariantMass(const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e)
65{
66 PtEtaPhiEVectorF p1(pt[0], eta[0], phi[0], e[0]);
67 PtEtaPhiEVectorF p2(pt[1], eta[1], phi[1], e[1]);
68 PtEtaPhiEVectorF p3(pt[2], eta[2], phi[2], e[2]);
69 PtEtaPhiEVectorF p4(pt[3], eta[3], phi[3], e[3]);
70 return 0.001 * (p1 + p2 + p3 + p4).M();
71}
72
74{
75
76 // Enable Multithreading
78
79 // Create the RDataFrame from the spec json file. The df106_HiggsToFourLeptons_spec.json is provided in the same
80 // folder as this tutorial
81 std::string dataset_spec = gROOT->GetTutorialsDir() + std::string("/dataframe/df106_HiggsToFourLeptons_spec.json");
83
84 // Add the ProgressBar feature
86
87 // Perform the analysis
88 // Access metadata information that is stored in the JSON config file of the RDataFrame
89 // The metadata contained in the JSON file is accessible within a `DefinePerSample` call, through the `RDFSampleInfo`
90 // class
91 auto df_analysis =
92 df.DefinePerSample("xsecs", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("xsecs"); })
93 .DefinePerSample("lumi", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("lumi"); })
94 .DefinePerSample("sumws", [](unsigned int slot, const RSampleInfo &id) { return id.GetD("sumws"); })
95 .DefinePerSample("sample_category",
96 [](unsigned int slot, const RSampleInfo &id) { return id.GetS("sample_category"); })
97 // Apply an MC correction for the ZZ decay due to missing gg->ZZ process
98 .DefinePerSample("scale",
99 [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
100 return id.Contains("mc_363490.llll.4lep.root") ? 1.3f : 1.0f;
101 })
102 // Select electron or muon trigger
103 .Filter("trigE || trigM")
104 // Select events with exactly four good leptons conserving charge and lepton numbers
105 // Note that all collections are RVecs and good_lep is the mask for the good leptons.
106 // The lepton types are PDG numbers and set to 11 or 13 for an electron or muon
107 // irrespective of the charge.
108 .Define("good_lep",
109 "abs(lep_eta) < 2.5 && lep_pt > 5000 && lep_ptcone30 / lep_pt < 0.3 && lep_etcone20 / lep_pt < 0.3")
110 .Filter("Sum(good_lep) == 4")
111 .Filter("Sum(lep_charge[good_lep]) == 0")
112 .Define("goodlep_sumtypes", "Sum(lep_type[good_lep])")
113 .Filter("goodlep_sumtypes == 44 || goodlep_sumtypes == 52 || goodlep_sumtypes == 48")
114 // Apply additional cuts depending on lepton flavour
115 .Filter(
116 "GoodElectronsAndMuons(lep_type[good_lep], lep_pt[good_lep], lep_eta[good_lep], lep_phi[good_lep], "
117 "lep_E[good_lep], lep_trackd0pvunbiased[good_lep], lep_tracksigd0pvunbiased[good_lep], lep_z0[good_lep])")
118 // Create new columns with the kinematics of good leptons
119 .Define("goodlep_pt", "lep_pt[good_lep]")
120 .Define("goodlep_eta", "lep_eta[good_lep]")
121 .Define("goodlep_phi", "lep_phi[good_lep]")
122 .Define("goodlep_E", "lep_E[good_lep]")
123 .Define("goodlep_type", "lep_type[good_lep]")
124 // Select leptons with high transverse momentum
125 .Filter("goodlep_pt[0] > 25000 && goodlep_pt[1] > 15000 && goodlep_pt[2] > 10000")
126 // Compute invariant mass
127 .Define("m4l", "ComputeInvariantMass(goodlep_pt, goodlep_eta, goodlep_phi, goodlep_E)")
128 // Reweighting of the samples is different for "data" and "MC"
129 .DefinePerSample("reweighting", [](unsigned int slot, const RSampleInfo &id) { return id.Contains("mc"); });
130
131 // Define the weight column (scale factor) for the MC samples
132 auto df_mc = df_analysis.Filter("reweighting == true")
133 .Define("weight", ("scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * "
134 "scaleFactor_PILEUP * mcWeight * scale * xsecs / sumws * lumi"));
135
136 // Book histograms for individual MC samples
137 auto df_higgs = df_mc.Filter(R"(sample_category == "higgs")")
138 .Histo1D<float>(ROOT::RDF::TH1DModel("higgs", "m4l", 24, 80, 170), "m4l", "weight");
139 auto df_zz = df_mc.Filter("sample_category == \"zz\"")
140 .Histo1D<float>(ROOT::RDF::TH1DModel("zz", "m4l", 24, 80, 170), "m4l", "weight");
141 auto df_other = df_mc.Filter("sample_category == \"other\"")
142 .Histo1D<float>(ROOT::RDF::TH1DModel("other", "m4l", 24, 80, 170), "m4l", "weight");
143
144 // Book the invariant mass histogram for the data
145 auto df_h_mass_data = df_analysis.Filter("reweighting == false")
146 .Filter("sample_category == \"data\"")
147 .Define("weight_", []() { return 1; })
148 .Histo1D<float>(ROOT::RDF::TH1DModel("data", "m4l", 24, 80, 170), "m4l", "weight_");
149
150 // Evaluate the systematic uncertainty
151
152 // The systematic uncertainty in this analysis is the MC scale factor uncertainty that depends on lepton
153 // kinematics such as pT or pseudorapidity.
154 // Muons uncertainties are negligible, as stated in https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/MUON
155 // Electrons uncertainties are evaluated based on the plots available in https://doi.org/10.48550/arXiv.1908.0
156 // The uncertainties are linearly interpolated, using the `TGraph::Eval()` method, to cover a range of pT values
157 // covered by the analysis.
158 const std::vector<double> x{5.50e3, 5.52e3, 12.54e3, 17.43e3, 22.40e3, 27.48e3, 30e3, 10000e3};
159 const std::vector<double> y{0.06628, 0.06395, 0.06396, 0.03372, 0.02441, 0.01403, 0, 0};
160 TGraph graph(x.size(), x.data(), y.data());
161
162 // Use the Vary method to add the systematic variations to the total MC scale factor ("weight") of the analysis
163 // The input consists of the input column to be varied and the lambda function to compute the systematic variations.
164 // The new output columns contain the varied values of the input column.
165 auto df_with_variations_mc =
166 df_mc
167 .Vary("weight",
168 [&graph](double x, const RVecF &pt, const RVec<unsigned int> &type) {
169 const auto v = Mean(Map(pt[type == 11], [&graph](auto p) { return graph.Eval(p); }));
170 return RVec<double>{(1 + v) * x, (1 - v) * x};
171 },
172 {"weight", "goodlep_pt", "goodlep_type"}, {"up", "down"})
173 .Histo1D<float>(ROOT::RDF::TH1DModel("Invariant Mass", "m4l", 24, 80, 170), "m4l", "weight");
174
175 // Create the total MC scale factor histograms: "nominal", "weight:up" and "weight:down".
176 auto histos_mc = VariationsFor(df_with_variations_mc);
177
178 // Evaluate the total MC uncertainty based on the variations. Note, in this case the uncertainties are symmetric.
179 for (unsigned int i = 0; i < histos_mc["nominal"].GetXaxis()->GetNbins(); i++) {
180 histos_mc["nominal"].SetBinError(
181 i, (histos_mc["weight:up"].GetBinContent(i) - histos_mc["nominal"].GetBinContent(i)));
182 }
183
184 // Make the plot of the data, individual MC contributions and the total MC scale factor systematic variations.
185 gROOT->SetStyle("ATLAS");
186
187 // Create canvas with pad
188 auto c = new TCanvas("c", " ", 600, 600);
189 auto pad = new TPad("upper_pad", "", 0, 0, 1, 1);
190 pad->SetTickx(0);
191 pad->SetTicky(0);
192 pad->Draw();
193 pad->cd();
194
195 // Draw stack with MC contributions
196 auto stack = new THStack("stack", "");
197 auto h_other = df_other.GetPtr();
198 h_other->SetFillColor(kViolet - 9);
199 stack->Add(h_other);
200 auto h_zz = df_zz.GetPtr();
201 h_zz->SetFillColor(kAzure - 9);
202 stack->Add(h_zz);
203 auto h_higgs = df_higgs.GetPtr();
204 h_higgs->SetFillColor(kRed + 2);
205 stack->Add(h_higgs);
206
207 stack->Draw("HIST");
208 stack->GetHistogram()->SetTitle("");
209 stack->GetHistogram()->GetXaxis()->SetLabelSize(0.035);
210 stack->GetHistogram()->GetXaxis()->SetTitleSize(0.045);
211 stack->GetHistogram()->GetXaxis()->SetTitleOffset(1.3);
212 stack->GetHistogram()->GetXaxis()->SetTitle("m_{4l}^{H#rightarrow ZZ} [GeV]");
213 stack->GetHistogram()->GetYaxis()->SetLabelSize(0.035);
214 stack->GetHistogram()->GetYaxis()->SetTitleSize(0.045);
215 stack->GetHistogram()->GetYaxis()->SetTitle("Events");
216 stack->SetMaximum(35);
217 stack->GetHistogram()->GetYaxis()->ChangeLabel(1, -1, 0);
218 stack->DrawClone(
219 "HIST"); // DrawClone() method is necessary to draw a TObject in case the original object goes out of scope
220
221 // Draw MC scale factor and variations
222 histos_mc["nominal"].SetStats(false);
223 histos_mc["nominal"].SetFillColor(kBlack);
224 histos_mc["nominal"].SetFillStyle(3254);
225 histos_mc["nominal"].DrawClone("E2 sames");
226 histos_mc["weight:up"].SetLineColor(kGreen + 2);
227 histos_mc["weight:up"].SetStats(false);
228 histos_mc["weight:up"].DrawClone("HIST sames");
229 histos_mc["weight:down"].SetLineColor(kBlue + 2);
230 histos_mc["weight:down"].SetStats(false);
231 histos_mc["weight:down"].DrawClone("HIST sames");
232
233 // Draw data histogram
234 auto h_data = df_h_mass_data.GetPtr();
235 h_data->SetMarkerStyle(20);
236 h_data->SetMarkerSize(1.);
237 h_data->SetLineWidth(2);
238 h_data->SetLineColor(kBlack);
239 h_data->SetStats(false);
240 h_data->DrawClone("E sames");
241
242 // Add legend
243 TLegend legend(0.57, 0.65, 0.94, 0.94);
244 legend.SetTextFont(42);
245 legend.SetFillStyle(0);
246 legend.SetBorderSize(0);
247 legend.SetTextSize(0.025);
248 legend.SetTextAlign(32);
249 legend.AddEntry(h_data, "Data", "lep");
250 legend.AddEntry(h_higgs, "Higgs MC", "f");
251 legend.AddEntry(h_zz, "ZZ MC", "f");
252 legend.AddEntry(h_other, "Other MC", "f");
253 legend.AddEntry(&(histos_mc["weight:down"]), "Total MC Variations Down", "l");
254 legend.AddEntry(&(histos_mc["weight:up"]), "Total MC Variations Up", "l");
255 legend.AddEntry(&(histos_mc["nominal"]), "Total MC Uncertainty", "f");
256 legend.DrawClone("Same");
257
258 // Add ATLAS label
259 TLatex atlas_label;
260 atlas_label.SetTextFont(70);
261 atlas_label.SetTextSize(0.04);
262 atlas_label.DrawLatexNDC(0.19, 0.85, "ATLAS");
263 TLatex data_label;
264 data_label.SetTextFont(42);
265 data_label.DrawLatexNDC(0.19 + 0.13, 0.85, "Open Data");
266 TLatex header;
267 data_label.SetTextFont(42);
268 header.SetTextSize(0.035);
269 header.DrawLatexNDC(0.21, 0.8, "#sqrt{s} = 13 TeV, 10 fb^{-1}");
270
271 // Save the plot.
272 c->SaveAs("df106_HiggsToFourLeptons_cpp.png");
273 std::cout << "Saved figure to df106_HiggsToFourLeptons_cpp.png" << std::endl;
274}
275
276int main()
277{
279}
int main()
Definition Prototype.cxx:12
#define c(i)
Definition RSha256.hxx:101
#define e(i)
Definition RSha256.hxx:103
@ kRed
Definition Rtypes.h:66
@ kBlack
Definition Rtypes.h:65
@ kGreen
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
@ kAzure
Definition Rtypes.h:67
@ kViolet
Definition Rtypes.h:67
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
#define gROOT
Definition TROOT.h:407
Class describing a generic LorentzVector in the 4D space-time, using the specified coordinate system ...
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition TAttText.h:46
virtual void SetTextSize(Float_t tsize=1)
Set the text size.
Definition TAttText.h:47
The Canvas class.
Definition TCanvas.h:23
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
The Histogram stack class.
Definition THStack.h:38
To draw Mathematical Formula.
Definition TLatex.h:18
TLatex * DrawLatexNDC(Double_t x, Double_t y, const char *text)
Draw this TLatex with new coordinates in NDC.
Definition TLatex.cxx:1942
This class displays a legend box (TPaveText) containing several legend entries.
Definition TLegend.h:23
virtual void SaveAs(const char *filename="", Option_t *option="") const
Save this object in the file specified by filename.
Definition TObject.cxx:686
The most important graphics class in the ROOT system.
Definition TPad.h:28
TPaveText * pt
RVec< PromoteType< T > > abs(const RVec< T > &v)
Definition RVec.hxx:1795
auto Map(Args &&... args)
Create new collection applying a callable to the elements of the input collection.
Definition RVec.hxx:2113
RVec< PromoteType< T > > sin(const RVec< T > &v)
Definition RVec.hxx:1814
RVec< T > Filter(const RVec< T > &v, F &&f)
Create a new collection with the elements passing the filter expressed by the predicate.
Definition RVec.hxx:2145
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
ROOT::RDataFrame FromSpec(const std::string &jsonFile)
Factory method to create an RDataFrame from a JSON specification file.
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
void AddProgressBar(ROOT::RDF::RNode df)
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539
Double_t Mean(Long64_t n, const T *a, const Double_t *w=nullptr)
Returns the weighted mean of an array a with length n.
Definition TMath.h:1089
Definition graph.py:1
A struct which stores the parameters of a TH1D.