Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df106_HiggsToFourLeptons.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -draw
4## The Higgs to four lepton analysis from the ATLAS Open Data release of 2020, with RDataFrame.
5##
6## This tutorial is the Higgs to four lepton analysis from the ATLAS Open Data release in 2020
7## (http://opendata.atlas.cern/release/2020/documentation/). The data was taken with the ATLAS detector
8## during 2016 at a center-of-mass energy of 13 TeV. The decay of the Standard Model Higgs boson
9## to two Z bosons and subsequently to four leptons is called the "golden channel". The selection leads
10## to a narrow invariant mass peak on top a relatively smooth and small background, revealing
11## the Higgs at 125 GeV.
12## Systematic errors for the MC scale factors are computed and the Vary function of RDataFrame is used for plotting.
13## The analysis is translated to an RDataFrame workflow processing about 300 MB of simulated events and data.
14##
15## See the [corresponding spec json file](https://github.com/root-project/root/blob/master/tutorials/analysis/dataframe/df106_HiggsToFourLeptons_spec.json).
16##
17## \macro_image
18## \macro_code
19## \macro_output
20##
21## \date March 2020, August 2022, August 2023
22## \authors Stefan Wunsch (KIT, CERN), Julia Mathe (CERN), Marta Czurylo (CERN)
23
24import ROOT
25import os
26
27# Enable Multi-threaded mode
29
30# Create the RDataFrame from the spec json file. The df106_HiggsToFourLeptons_spec.json is provided in the same folder as this tutorial
31dataset_spec = os.path.join(ROOT.gROOT.GetTutorialsDir(), "analysis", "dataframe", "df106_HiggsToFourLeptons_spec.json")
32df = ROOT.RDF.Experimental.FromSpec(dataset_spec) # Creates a single dataframe for all the samples
33
34# Add the ProgressBar feature
36
37# Access metadata information that is stored in the JSON config file of the RDataFrame.
38# The metadata contained in the JSON file is accessible within a `DefinePerSample` call, through the `RSampleInfo` class.
39df = df.DefinePerSample("xsecs", 'rdfsampleinfo_.GetD("xsecs")')
40df = df.DefinePerSample("lumi", 'rdfsampleinfo_.GetD("lumi")')
41df = df.DefinePerSample("sumws", 'rdfsampleinfo_.GetD("sumws")')
42df = df.DefinePerSample("sample_category", 'rdfsampleinfo_.GetS("sample_category")')
43
44# We must further apply an MC correction for the ZZ decay due to missing gg->ZZ processes.
45ROOT.gInterpreter.Declare(
46 """
47float scale(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
48 return id.Contains("mc_363490.llll.4lep.root") ? 1.3f : 1.0f;
49}
50"""
51)
52
53df = df.DefinePerSample("scale", "scale(rdfslot_, rdfsampleinfo_)")
54
55# Select events for the analysis
56ROOT.gInterpreter.Declare(
57 """
58using ROOT::RVecF;
59using ROOT::RVecI;
60bool GoodElectronsAndMuons(const RVecI &type, const RVecF &pt, const RVecF &eta, const RVecF &phi, const RVecF &e, const RVecF &trackd0pv, const RVecF &tracksigd0pv, const RVecF &z0)
61{
62 for (size_t i = 0; i < type.size(); i++) {
63 ROOT::Math::PtEtaPhiEVector p(0.001*pt[i], eta[i], phi[i], 0.001*e[i]);
64 if (type[i] == 11) {
65 if (pt[i] < 7000 || abs(eta[i]) > 2.47 || abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5) return false;
66 } else {
67 if (abs(trackd0pv[i] / tracksigd0pv[i]) > 5 || abs(z0[i] * sin(p.Theta())) > 0.5) return false;
68 }
69 }
70 return true;
71}
72"""
73)
74
75# Select electron or muon trigger
76df = df.Filter("trigE || trigM")
77
78# Select events with exactly four good leptons conserving charge and lepton numbers
79# Note that all collections are RVecs and good_lep is the mask for the good leptons.
80# The lepton types are PDG numbers and set to 11 or 13 for an electron or muon
81# irrespective of the charge.
82
83df = (
84 df.Define(
85 "good_lep",
86 "abs(lep_eta) < 2.5 && lep_pt > 5000 && lep_ptcone30 / lep_pt < 0.3 && lep_etcone20 / lep_pt < 0.3",
87 )
88 .Filter("Sum(good_lep) == 4")
89 .Filter("Sum(lep_charge[good_lep]) == 0")
90 .Define("goodlep_sumtypes", "Sum(lep_type[good_lep])")
91 .Filter("goodlep_sumtypes == 44 || goodlep_sumtypes == 52 || goodlep_sumtypes == 48")
92)
93
94# Apply additional cuts depending on lepton flavour
95df = df.Filter(
96 "GoodElectronsAndMuons(lep_type[good_lep], lep_pt[good_lep], lep_eta[good_lep], lep_phi[good_lep], lep_E[good_lep], lep_trackd0pvunbiased[good_lep], lep_tracksigd0pvunbiased[good_lep], lep_z0[good_lep])"
97)
98
99# Create new columns with the kinematics of good leptons
100df = (
101 df.Define("goodlep_pt", "lep_pt[good_lep]")
102 .Define("goodlep_eta", "lep_eta[good_lep]")
103 .Define("goodlep_phi", "lep_phi[good_lep]")
104 .Define("goodlep_E", "lep_E[good_lep]")
105 .Define("goodlep_type", "lep_type[good_lep]")
106)
107
108# Select leptons with high transverse momentum
109df = df.Filter("goodlep_pt[0] > 25000 && goodlep_pt[1] > 15000 && goodlep_pt[2] > 10000")
110
111# Reweighting of the samples is different for "data" and "MC". This is the function to add reweighting for MC samples
112ROOT.gInterpreter.Declare(
113 """
114double weights(float scaleFactor_1, float scaleFactor_2, float scaleFactor_3, float scaleFactor_4, float scale, float mcWeight, double xsecs, double sumws, double lumi)
115{
116 return scaleFactor_1 * scaleFactor_2 * scaleFactor_3 * scaleFactor_4 * scale * mcWeight * xsecs / sumws * lumi;
117}
118"""
119)
120
121# Use DefinePerSample to define which samples are MC and hence need reweighting
122df = df.DefinePerSample("isMC", 'rdfsampleinfo_.Contains("mc")')
123df = df.Define(
124 "weight",
125 "double x; return isMC ? weights(scaleFactor_ELE, scaleFactor_MUON, scaleFactor_LepTRIGGER, scaleFactor_PILEUP, scale, mcWeight, xsecs, sumws, lumi) : 1.;",
126)
127
128# Compute invariant mass of the four lepton system
129ROOT.gInterpreter.Declare(
130 """
131float ComputeInvariantMass(RVecF pt, RVecF eta, RVecF phi, RVecF e)
132{
133 ROOT::Math::PtEtaPhiEVector p1{pt[0], eta[0], phi[0], e[0]};
134 ROOT::Math::PtEtaPhiEVector p2{pt[1], eta[1], phi[1], e[1]};
135 ROOT::Math::PtEtaPhiEVector p3{pt[2], eta[2], phi[2], e[2]};
136 ROOT::Math::PtEtaPhiEVector p4{pt[3], eta[3], phi[3], e[3]};
137 return 0.001 * (p1 + p2 + p3 + p4).M();
138}
139"""
140)
141
142df = df.Define("m4l", "ComputeInvariantMass(goodlep_pt, goodlep_eta, goodlep_phi, goodlep_E)")
143
144# Save data for statistical analysis tutorial (rf618_mixture_models.py)
145df.Snapshot("tree", ROOT.gROOT.GetTutorialDir().Data() + "/analysis/dataframe/df106_HiggsToFourLeptons.root", ["m4l", "sample_category", "weight"])
146
147# Book histograms for the four different samples: data, higgs, zz and other (this is specific to this particular analysis)
148histos = []
149for sample_category in ["data", "higgs", "zz", "other"]:
150 histos.append(
151 df.Filter(f'sample_category == "{sample_category}"').Histo1D(
152 ROOT.RDF.TH1DModel(f"{sample_category}", "m4l", 24, 80, 170),
153 "m4l",
154 "weight",
155 )
156 )
157
158# Evaluate the systematic uncertainty
159
160# The systematic uncertainty in this analysis is the MC scale factor uncertainty that depends on lepton
161# kinematics such as pT or pseudorapidity.
162# Muons uncertainties are negligible, as stated in https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/MUON-2018-03/.
163# Electrons uncertainties are evaluated based on the plots available in https://doi.org/10.48550/arXiv.1908.00005.
164# The uncertainties are linearly interpolated, using the `TGraph::Eval()` method, to cover a range of pT values covered by the analysis.
165
166# Create a VaryHelper to interpolate the available data.
167ROOT.gInterpreter.Declare(
168 """
169using namespace ROOT::VecOps;
170
171class VaryHelper
172{
173 const std::vector<double> x{5.50e3, 5.52e3, 12.54e3, 17.43e3, 22.40e3, 27.48e3, 30e3, 10000e3};
174 const std::vector<double> y{0.06628, 0.06395, 0.06396, 0.03372, 0.02441, 0.01403, 0, 0};
175 TGraph graph;
176
177public:
178 VaryHelper() : graph(x.size(), x.data(), y.data()) {}
179 RVec<double> operator()(const double &w, const RVecF &pt, const RVec<unsigned int> &type)
180 {
181 const auto v = Mean(Map(pt[type == 11], [this](auto p)
182 {return this->graph.Eval(p); })
183 );
184 return RVec<double>{(1 + v) * w, (1 - v) * w};
185 }
186};
187
188VaryHelper variationsFactory;
189"""
190)
191
192# Use the Vary method to add the systematic variations to the total MC scale factor ("weight") of the analysis.
193df_variations_mc = (
194 df.Filter("isMC == true")
195 .Vary("weight", "variationsFactory(weight, goodlep_pt, goodlep_type)", ["up", "down"])
196 .Histo1D(ROOT.RDF.TH1DModel("Invariant Mass", "m4l", 24, 80, 170), "m4l", "weight")
197)
198histos_mc = ROOT.RDF.Experimental.VariationsFor(df_variations_mc)
199
200# We reached the end of the analysis part. We now evaluate the total MC uncertainty based on the variations.
201# No computation graph was triggered yet, we trigger the computation graph for all histograms at once now,
202# by calling 'histos_mc["nominal"].GetXaxis()'.
203# Note, in this case the uncertainties are symmetric.
204for i in range(0, histos_mc["nominal"].GetXaxis().GetNbins()):
205 (
206 histos_mc["nominal"].SetBinError(
207 i, (histos_mc["weight:up"].GetBinContent(i) - histos_mc["nominal"].GetBinContent(i))
208 )
209 )
210
211# Make the plot of the data, individual MC contributions and the total MC scale factor systematic variations.
212
213# Set styles
214ROOT.gROOT.SetStyle("ATLAS")
215
216# Create canvas with pad
217c1 = ROOT.TCanvas("c", "", 600, 600)
218pad = ROOT.TPad("upper_pad", "", 0, 0, 1, 1)
219pad.SetTickx(False)
220pad.SetTicky(False)
221pad.Draw()
222pad.cd()
223
224# Draw stack with MC contributions
225stack = ROOT.THStack()
226
227# Retrieve values of the data and MC histograms in order to plot them.
228# Draw cloned histograms to preserve graphics when original objects goes out of scope
229# Note: GetValue() action operation is performed after all lazy actions of the RDF were defined first.
230h_data = histos[0].GetValue().Clone()
231h_higgs = histos[1].GetValue().Clone()
232h_zz = histos[2].GetValue().Clone()
233h_other = histos[3].GetValue().Clone()
234
235for h, color in zip([h_other, h_zz, h_higgs], [ROOT.kViolet - 9, ROOT.kAzure - 9, ROOT.kRed + 2]):
236 h.SetLineWidth(1)
237 h.SetLineColor(1)
238 h.SetFillColor(color)
239 stack.Add(h)
240
241stack.Draw("HIST")
242stack.GetXaxis().SetLabelSize(0.04)
243stack.GetXaxis().SetTitleSize(0.045)
244stack.GetXaxis().SetTitleOffset(1.3)
245stack.GetXaxis().SetTitle("m_{4l}^{H#rightarrow ZZ} [GeV]")
246stack.GetYaxis().SetLabelSize(0.04)
247stack.GetYaxis().SetTitleSize(0.045)
248stack.GetYaxis().SetTitle("Events")
249stack.SetMaximum(35)
250stack.GetYaxis().ChangeLabel(1, -1, 0)
251
252# Draw MC scale factor and variations
253histos_mc["nominal"].SetFillColor(ROOT.kBlack)
254histos_mc["nominal"].SetFillStyle(3254)
255h_nominal = histos_mc["nominal"].DrawClone("E2 same")
256histos_mc["weight:up"].SetLineColor(ROOT.kGreen + 2)
257h_weight_up = histos_mc["weight:up"].DrawClone("HIST SAME")
258histos_mc["weight:down"].SetLineColor(ROOT.kBlue + 2)
259h_weight_down = histos_mc["weight:down"].DrawClone("HIST SAME")
260
261# Draw data histogram
262h_data.SetMarkerStyle(20)
263h_data.SetMarkerSize(1.2)
264h_data.SetLineWidth(2)
265h_data.SetLineColor(ROOT.kBlack)
266h_data.Draw("E SAME") # Draw raw data with errorbars
267
268# Add legend
269legend = ROOT.TLegend(0.57, 0.65, 0.94, 0.94)
270legend.SetTextFont(42)
271legend.SetFillStyle(0)
272legend.SetBorderSize(0)
273legend.SetTextSize(0.025)
274legend.SetTextAlign(32)
275legend.AddEntry(h_data, "Data", "lep")
276legend.AddEntry(h_higgs, "Higgs MC", "f")
277legend.AddEntry(h_zz, "ZZ MC", "f")
278legend.AddEntry(h_other, "Other MC", "f")
279legend.AddEntry(h_weight_down, "Total MC Variations Down", "l")
280legend.AddEntry(h_weight_up, "Total MC Variations Up", "l")
281legend.AddEntry(h_nominal, "Total MC Uncertainty", "f")
282legend.Draw()
283
284text = ROOT.TLatex()
285text.SetTextFont(72)
286text.SetTextSize(0.04)
287text.DrawLatexNDC(0.19, 0.85, "ATLAS")
288text.SetTextFont(42)
289text.DrawLatexNDC(0.19 + 0.15, 0.85, "Open Data")
290text.SetTextSize(0.035)
291text.DrawLatexNDC(0.21, 0.80, "#sqrt{s} = 13 TeV, 10 fb^{-1}")
292
293c1.Update()
294
295# Save the plot
296c1.SaveAs("df106_HiggsToFourLeptons_python.png")
297print("Saved figure to df106_HiggsToFourLeptons_python.png")
Option_t Option_t SetFillStyle
Option_t Option_t SetLineColor
Option_t Option_t SetFillColor
ROOT::RDataFrame FromSpec(const std::string &jsonFile)
Factory method to create an RDataFrame from a JSON specification file.
RResultMap< T > VariationsFor(RResultPtr< T > resPtr)
Produce all required systematic variations for the given result.
void AddProgressBar(ROOT::RDF::RNode df)
Add ProgressBar to a ROOT::RDF::RNode.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539
A struct which stores the parameters of a TH1D.