Logo ROOT  
Reference Guide
df105.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_rcanvas
3## The W boson mass analysis from the ATLAS Open Data release of 2020, with RDataFrame.
4##
5## This tutorial is the analysis of the W boson mass taken from the ATLAS Open Data release in 2020
6## (http://opendata.atlas.cern/release/2020/documentation/). The data was recorded with the ATLAS detector
7## during 2016 at a center-of-mass energy of 13 TeV. W bosons are produced frequently at the LHC and
8## are an important background to studies of Standard Model processes, for example the Higgs boson analyses.
9##
10## The analysis is translated to a RDataFrame workflow processing up to 60 GB of simulated events and data.
11## By default the analysis runs on a preskimmed dataset to reduce the runtime. The full dataset can be used with
12## the --full-dataset argument and you can also run only on a fraction of the original dataset using the argument --lumi-scale.
13##
14## This macro is replica of tutorials/dataframe/df105_WBosonAnalysis.py, but with usage of ROOT7 graphics
15## Run macro with python3 -i df105.py command to get interactive canvas
16##
17## \macro_image (rcanvas_js)
18## \macro_code
19##
20## \date March 2020
21## \authors Stefan Wunsch (KIT, CERN) Sergey Linev (GSI)
22
23import ROOT
24import json
25import argparse
26import os
27from ROOT.Experimental import RCanvas, RText, RAttrText, RAttrFont, RPadPos, TObjectDrawable
28
29# Argument parsing
30parser = argparse.ArgumentParser()
31parser.add_argument("--lumi-scale", type=float, default=0.001,
32 help="Run only on a fraction of the total available 10 fb^-1 (only usable together with --full-dataset)")
33parser.add_argument("--full-dataset", action="store_true", default=False,
34 help="Use the full dataset (use --lumi-scale to run only on a fraction of it)")
35parser.add_argument("-b", action="store_true", default=False, help="Use ROOT batch mode")
36parser.add_argument("-t", action="store_true", default=False, help="Use implicit multi threading (for the full dataset only possible with --lumi-scale 1.0)")
37args = parser.parse_args()
38
39if args.b: ROOT.gROOT.SetWebDisplay("batch")
40if args.t: ROOT.EnableImplicitMT()
41
42if not args.full_dataset: lumi_scale = 0.001 # The preskimmed dataset contains only 0.01 fb^-1
43else: lumi_scale = args.lumi_scale
44lumi = 10064.0
45print('Run on data corresponding to {:.2f} fb^-1 ...'.format(lumi * lumi_scale / 1000.0))
46
47if args.full_dataset: dataset_path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
48else: dataset_path = "root://eospublic.cern.ch//eos/root-eos/reduced_atlas_opendata/w"
49
50# Create a ROOT dataframe for each dataset
51# Note that we load the filenames from the external json file placed in the same folder than this script.
52files = json.load(open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "df105.json")))
53processes = files.keys()
54df = {}
55xsecs = {}
56sumws = {}
57samples = []
58for p in processes:
59 for d in files[p]:
60 # Construct the dataframes
61 folder = d[0] # Folder name
62 sample = d[1] # Sample name
63 xsecs[sample] = d[2] # Cross-section
64 sumws[sample] = d[3] # Sum of weights
65 num_events = d[4] # Number of events
66 samples.append(sample)
67 df[sample] = ROOT.RDataFrame("mini", "{}/1lep/{}/{}.1lep.root".format(dataset_path, folder, sample))
68
69 # Scale down the datasets if requested
70 if args.full_dataset and lumi_scale < 1.0:
71 df[sample] = df[sample].Range(int(num_events * lumi_scale))
72
73# Select events for the analysis
74
75# Just-in-time compile custom helper function performing complex computations
76ROOT.gInterpreter.Declare("""
77bool GoodElectronOrMuon(int type, float pt, float eta, float phi, float e, float trackd0pv, float tracksigd0pv, float z0)
78{
79 ROOT::Math::PtEtaPhiEVector p(pt / 1000.0, eta, phi, e / 1000.0);
80 if (abs(z0 * sin(p.theta())) > 0.5) return false;
81 if (type == 11 && abs(eta) < 2.46 && (abs(eta) < 1.37 || abs(eta) > 1.52)) {
82 if (abs(trackd0pv / tracksigd0pv) > 5) return false;
83 return true;
84 }
85 if (type == 13 && abs(eta) < 2.5) {
86 if (abs(trackd0pv / tracksigd0pv) > 3) return false;
87 return true;
88 }
89 return false;
90}
91""")
92
93for s in samples:
94 # Select events with a muon or electron trigger and with a missing transverse energy larger than 30 GeV
95 df[s] = df[s].Filter("trigE || trigM")\
96 .Filter("met_et > 30000")
97
98 # Find events with exactly one good lepton
99 df[s] = df[s].Define("good_lep", "lep_isTightID && lep_pt > 35000 && lep_ptcone30 / lep_pt < 0.1 && lep_etcone20 / lep_pt < 0.1")\
100 .Filter("ROOT::VecOps::Sum(good_lep) == 1")
101
102 # Apply additional cuts in case the lepton is an electron or muon
103 df[s] = df[s].Define("idx", "ROOT::VecOps::ArgMax(good_lep)")\
104 .Filter("GoodElectronOrMuon(lep_type[idx], lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx], lep_trackd0pvunbiased[idx], lep_tracksigd0pvunbiased[idx], lep_z0[idx])")
105
106# Apply luminosity, scale factors and MC weights for simulated events
107for s in samples:
108 if "data" in s:
109 df[s] = df[s].Define("weight", "1.0")
110 else:
111 df[s] = df[s].Define("weight", "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * mcWeight * {} / {} * {}".format(xsecs[s], sumws[s], lumi))
112
113# Compute transverse mass of the W boson using the lepton and the missing transverse energy and make a histogram
114ROOT.gInterpreter.Declare("""
115float ComputeTransverseMass(float met_et, float met_phi, float lep_pt, float lep_eta, float lep_phi, float lep_e)
116{
117 ROOT::Math::PtEtaPhiEVector met(met_et, 0, met_phi, met_et);
118 ROOT::Math::PtEtaPhiEVector lep(lep_pt, lep_eta, lep_phi, lep_e);
119 return (met + lep).Mt() / 1000.0;
120}
121""")
122
123histos = {}
124for s in samples:
125 df[s] = df[s].Define("mt_w", "ComputeTransverseMass(met_et, met_phi, lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx])")
126 histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel(s, "mt_w", 24, 60, 180), "mt_w", "weight")
127
128# Run the event loop and merge histograms of the respective processes
129
130# RunGraphs allows to run the event loops of the separate RDataFrame graphs
131# concurrently. This results in an improved usage of the available resources
132# if each separate RDataFrame can not utilize all available resources, e.g.,
133# because not enough data is available.
134ROOT.RDF.RunGraphs([histos[s] for s in samples])
135
136def merge_histos(label):
137 h = None
138 for i, d in enumerate(files[label]):
139 t = histos[d[1]].GetValue()
140 if i == 0: h = t.Clone()
141 else: h.Add(t)
142 h.SetNameTitle(label, label)
143 return h
144
145data = merge_histos("data")
146wjets = merge_histos("wjets")
147zjets = merge_histos("zjets")
148ttbar = merge_histos("ttbar")
149diboson = merge_histos("diboson")
150singletop = merge_histos("singletop")
151
152# Create the plot
153
154# Set styles
155ROOT.gROOT.SetStyle("ATLAS")
156
157# Create canvas and configure frame with axis attributes
158c = RCanvas.Create("df105_WBosonAnalysis")
159frame = c.AddFrame()
160frame.margins.top = 0.05
161frame.margins.right = 0.05
162frame.margins.left = 0.16
163frame.margins.bottom = 0.16
164# c.SetTickx(0)
165# c.SetTicky(0)
166
167frame.x.min = 60
168frame.x.max = 180
169frame.x.title.value = "m_{T}^{W#rightarrow l#nu} [GeV]"
170frame.x.title.size = 0.045
171frame.x.title.offset = 0.01
172frame.x.labels.size = 0.04
173
174frame.y.min = 1
175frame.y.max = 1e10*args.lumi_scale
176frame.y.log = 10
177frame.y.title.value = "Events"
178frame.y.title.size = 0.045
179frame.y.labels.size = 0.04
180
181# instruct RFrame to draw axes
182frame.drawAxes = True
183
184# Draw stack with MC contributions
185stack = ROOT.THStack()
186for h, color in zip(
187 [singletop, diboson, ttbar, zjets, wjets],
188 [(208, 240, 193), (195, 138, 145), (155, 152, 204), (248, 206, 104), (222, 90, 106)]):
189 h.SetLineWidth(1)
190 h.SetLineColor(1)
191 h.SetFillColor(ROOT.TColor.GetColor(*color))
192 stack.Add(h)
193c.Add[TObjectDrawable]().Set(stack, "HIST SAME")
194
195# Draw data
196data.SetMarkerStyle(20)
197data.SetMarkerSize(1.2)
198data.SetLineWidth(2)
199data.SetLineColor(ROOT.kBlack)
200c.Add[TObjectDrawable]().Set(data, "E SAME")
201
202# Add TLegend while histograms packed in the THStack
203legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
204legend.SetTextFont(42)
205legend.SetFillStyle(0)
206legend.SetBorderSize(0)
207legend.SetTextSize(0.04)
208legend.SetTextAlign(32)
209legend.AddEntry(data, "Data" ,"lep")
210legend.AddEntry(wjets, "W+jets", "f")
211legend.AddEntry(zjets, "Z+jets", "f")
212legend.AddEntry(ttbar, "t#bar{t}", "f")
213legend.AddEntry(diboson, "Diboson", "f")
214legend.AddEntry(singletop, "Single top", "f")
215c.Add[TObjectDrawable]().Set(legend)
216
217# Add ATLAS label
218lbl1 = c.Add[RText](RPadPos(0.05, 0.88), "ATLAS")
219lbl1.onFrame = True
220lbl1.text.font = RAttrFont.kArialBoldOblique
221lbl1.text.size = 0.04
222lbl1.text.align = RAttrText.kLeftBottom
223lbl2 = c.Add[RText](RPadPos(0.05 + 0.20, 0.88), "Open Data")
224lbl2.onFrame = True
225lbl2.text.font = RAttrFont.kArial
226lbl2.text.size = 0.04
227lbl2.text.align = RAttrText.kLeftBottom
228lbl3 = c.Add[RText](RPadPos(0.05, 0.82), "#sqrt{{s}} = 13 TeV, {:.2f} fb^{{-1}}".format(lumi * args.lumi_scale / 1000.0))
229lbl3.onFrame = True
230lbl3.text.font = RAttrFont.kArial
231lbl3.text.size = 0.03
232lbl3.text.align = RAttrText.kLeftBottom
233
234# show canvas finally
235c.SetSize(600, 600)
236c.Show()
237
238# Save the plot
239c.SaveAs("df105.png")
240print("Saved figure to df105.png")
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:40
RVec< T > Filter(const RVec< T > &v, F &&f)
Create a new collection with the elements passing the filter expressed by the predicate.
Definition: RVec.hxx:2052
void RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.
Definition: RDFHelpers.cxx:23
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:527
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
Ta Range(0, 0, 1, 1)