Logo ROOT  
Reference Guide
df105_WBosonAnalysis.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -draw
4## This tutorial is the analysis of the W boson mass taken from the ATLAS Open Data release in 2020
5## (http://opendata.atlas.cern/release/2020/documentation/). The data was recorded with the ATLAS detector
6## during 2016 at a center-of-mass energy of 13 TeV. W bosons are produced frequently at the LHC and
7## are an important background to studies of Standard Model processes, for example the Higgs boson analyses.
8##
9## The analysis is translated to a RDataFrame workflow processing a subset of 60 GB of simulated events and data.
10## The original analysis is reduced by a factor given as the command line argument --lumi-scale.
11##
12## \macro_image
13## \macro_code
14## \macro_output
15##
16## \date March 2020
17## \author Stefan Wunsch (KIT, CERN)
18
19import ROOT
20import json
21import argparse
22import os
23
24# Argument parsing
25parser = argparse.ArgumentParser()
26parser.add_argument("--lumi-scale", default=0.01, help="Scale of the overall lumi of 10 fb^-1")
27parser.add_argument("-b", action="store_true", default=False, help="Use ROOT batch mode")
28args = parser.parse_args()
29if args.b: ROOT.gROOT.SetBatch(True)
30
31# Create a ROOT dataframe for each dataset
32# Note that we load the filenames from an external json file.
33path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
34files = json.load(open(os.path.join(os.environ["ROOTSYS"], "tutorials/dataframe", "df105_WBosonAnalysis.json")))
35processes = files.keys()
36df = {}
37xsecs = {}
38sumws = {}
39samples = []
40for p in processes:
41 for d in files[p]:
42 # Construct the dataframes
43 folder = d[0] # Folder name
44 sample = d[1] # Sample name
45 xsecs[sample] = d[2] # Cross-section
46 sumws[sample] = d[3] # Sum of weights
47 num_events = d[4] # Number of events
48 samples.append(sample)
49 df[sample] = ROOT.RDataFrame("mini", "{}/1lep/{}/{}.1lep.root".format(path, folder, sample))
50
51 # Scale down the datasets
52 df[sample] = df[sample].Range(int(num_events * args.lumi_scale))
53
54# Select events for the analysis
55ROOT.gInterpreter.Declare("""
56bool GoodElectronOrMuon(int type, float pt, float eta, float phi, float e, float trackd0pv, float tracksigd0pv, float z0)
57{
58 ROOT::Math::PtEtaPhiEVector p(pt / 1000.0, eta, phi, e / 1000.0);
59 if (abs(z0 * sin(p.theta())) > 0.5) return false;
60 if (type == 11 && abs(eta) < 2.46 && (abs(eta) < 1.37 || abs(eta) > 1.52)) {
61 if (abs(trackd0pv / tracksigd0pv) > 5) return false;
62 return true;
63 }
64 if (type == 13 && abs(eta) < 2.5) {
65 if (abs(trackd0pv / tracksigd0pv) > 3) return false;
66 return true;
67 }
68 return false;
69}
70""")
71
72for s in samples:
73 # Require missing transverse energy larger than 30 GeV
74 df[s] = df[s].Filter("met_et > 30000")
75 # Select electron or muon trigger
76 df[s] = df[s].Filter("trigE || trigM")
77 # Select events with exactly one good lepton
78 df[s] = df[s].Define("good_lep", "lep_isTightID && lep_pt > 35000 && lep_ptcone30 / lep_pt < 0.1 && lep_etcone20 / lep_pt < 0.1")\
79 .Filter("Sum(good_lep) == 1")
80 # Apply additional cuts in case the lepton is an electron or muon
81 df[s] = df[s].Define("idx", "ROOT::VecOps::ArgMax(good_lep)")\
82 .Filter("GoodElectronOrMuon(lep_type[idx], lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx], lep_trackd0pvunbiased[idx], lep_tracksigd0pvunbiased[idx], lep_z0[idx])")
83
84# Apply luminosity, scale factors and MC weights for simulated events
85lumi = 10064.0
86for s in samples:
87 if "data" in s:
88 df[s] = df[s].Define("weight", "1.0")
89 else:
90 df[s] = df[s].Define("weight", "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * mcWeight * {} / {} * {}".format(xsecs[s], sumws[s], lumi))
91
92# Compute transverse mass of the W boson using the lepton and the missing transverse energy and make a histogram
93ROOT.gInterpreter.Declare("""
94float ComputeTransverseMass(float met_et, float met_phi, float lep_pt, float lep_eta, float lep_phi, float lep_e)
95{
96 ROOT::Math::PtEtaPhiEVector met(met_et, 0, met_phi, met_et);
97 ROOT::Math::PtEtaPhiEVector lep(lep_pt, lep_eta, lep_phi, lep_e);
98 return (met + lep).Mt() / 1000.0;
99}
100""")
101
102histos = {}
103for s in samples:
104 df[s] = df[s].Define("mt_w", "ComputeTransverseMass(met_et, met_phi, lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx])")
105 histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel(s, "mt_w", 40, 60, 180), "mt_w", "weight")
106
107# Run the event loop and merge histograms of the respective processes
108
109def merge_histos(label):
110 h = None
111 for i, d in enumerate(files[label]):
112 t = histos[d[1]].GetValue()
113 if i == 0: h = t.Clone()
114 else: h.Add(t)
115 h.SetNameTitle(label, label)
116 return h
117
118data = merge_histos("data")
119wjets = merge_histos("wjets")
120zjets = merge_histos("zjets")
121ttbar = merge_histos("ttbar")
122diboson = merge_histos("diboson")
123singletop = merge_histos("singletop")
124
125# Create the plot
126
127# Set styles
128ROOT.gROOT.SetStyle("ATLAS")
129
130# Create canvas with pad
131c = ROOT.TCanvas("c", "", 600, 600)
132pad = ROOT.TPad("upper_pad", "", 0, 0, 1, 1)
133pad.SetTickx(False)
134pad.SetTicky(False)
135pad.SetLogy()
136pad.Draw()
137pad.cd()
138
139# Draw stack with MC contributions
140stack = ROOT.THStack()
141for h, color in zip(
142 [singletop, diboson, ttbar, zjets, wjets],
143 [(208, 240, 193), (195, 138, 145), (155, 152, 204), (248, 206, 104), (222, 90, 106)]):
144 h.SetLineWidth(1)
145 h.SetLineColor(1)
146 h.SetFillColor(ROOT.TColor.GetColor(*color))
147 stack.Add(h)
148stack.Draw("HIST")
149stack.GetXaxis().SetLabelSize(0.04)
150stack.GetXaxis().SetTitleSize(0.045)
151stack.GetXaxis().SetTitleOffset(1.3)
152stack.GetXaxis().SetTitle("m_{T}^{W#rightarrow l#nu} [GeV]")
153stack.GetYaxis().SetTitle("Events")
154stack.GetYaxis().SetLabelSize(0.04)
155stack.GetYaxis().SetTitleSize(0.045)
156stack.SetMaximum(1e10 * args.lumi_scale)
157stack.SetMinimum(1e1)
158
159# Draw data
160data.SetMarkerStyle(20)
161data.SetMarkerSize(1.2)
162data.SetLineWidth(2)
163data.SetLineColor(ROOT.kBlack)
164data.Draw("E SAME")
165
166# Add legend
167legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
168legend.SetTextFont(42)
169legend.SetFillStyle(0)
170legend.SetBorderSize(0)
171legend.SetTextSize(0.04)
172legend.SetTextAlign(32)
173legend.AddEntry(data, "Data" ,"lep")
174legend.AddEntry(wjets, "W+jets", "f")
175legend.AddEntry(zjets, "Z+jets", "f")
176legend.AddEntry(ttbar, "t#bar{t}", "f")
177legend.AddEntry(diboson, "Diboson", "f")
178legend.AddEntry(singletop, "Single top", "f")
179legend.Draw("SAME")
180
181# Add ATLAS label
182text = ROOT.TLatex()
183text.SetNDC()
184text.SetTextFont(72)
185text.SetTextSize(0.045)
186text.DrawLatex(0.21, 0.86, "ATLAS")
187text.SetTextFont(42)
188text.DrawLatex(0.21 + 0.16, 0.86, "Open Data")
189text.SetTextSize(0.04)
190text.DrawLatex(0.21, 0.80, "#sqrt{{s}} = 13 TeV, {:.1f} fb^{{-1}}".format(lumi * args.lumi_scale / 1000.0))
191
192# Save the plot
193c.SaveAs("WBosonAnalysis.pdf")
double sin(double)
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
RVec< T > Filter(const RVec< T > &v, F &&f)
Create a new collection with the elements passing the filter expressed by the predicate.
Definition: RVec.hxx:939
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
Ta Range(0, 0, 1, 1)