Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df107_SingleTopAnalysis.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_dataframe
3## \notebook -draw
4## A single top analysis using the ATLAS Open Data release of 2020, with RDataFrame.
5##
6## This tutorial is the analysis of single top production adapted from the ATLAS Open Data release in 2020
7## (http://opendata.atlas.cern/release/2020/documentation/). The data was recorded with the ATLAS detector
8## during 2016 at a center-of-mass energy of 13 TeV. Top quarks with a mass of about 172 GeV are mostly
9## produced in pairs but also appear alone, dominantly from the decays of a W boson in association with a light jet.
10##
11## The analysis is translated to a RDataFrame workflow processing up to 60 GB of simulated events and data.
12## By default the analysis runs on a preskimmed dataset to reduce the runtime. The full dataset can be used with
13## the --full-dataset argument and you can also run only on a fraction of the original dataset using the argument --lumi-scale.
14##
15## See the [corresponding spec json file](https://github.com/root-project/root/blob/master/tutorials/dataframe/df107_SingleTopAnalysis.json).
16##
17## \macro_image
18## \macro_code
19## \macro_output
20##
21## \date July 2020
22## \author Stefan Wunsch (KIT, CERN)
23
24import ROOT
25import sys
26import json
27import argparse
28import os
29
30# Argument parsing
31parser = argparse.ArgumentParser()
32parser.add_argument("--lumi-scale", type=float, default=0.05,
33 help="Run only on a fraction of the total available 10 fb^-1 (only usable together with --full-dataset)")
34parser.add_argument("--full-dataset", action="store_true", default=False,
35 help="Use the full dataset (use --lumi-scale to run only on a fraction of it)")
36parser.add_argument("-b", action="store_true", default=False, help="Use ROOT batch mode")
37parser.add_argument("-t", action="store_true", default=False, help="Use implicit multi threading (for the full dataset only possible with --lumi-scale 1.0)")
38if 'df107_SingleTopAnalysis.py' in sys.argv[0]:
39 # Script
40 args = parser.parse_args()
41else:
42 # Notebook
43 args = parser.parse_args(args=[])
44
45if args.b: ROOT.gROOT.SetBatch(True)
46if args.t: ROOT.EnableImplicitMT()
47
48if not args.full_dataset: lumi_scale = 0.05 # The preskimmed dataset contains only 0.5 fb^-1
49else: lumi_scale = args.lumi_scale
50lumi = 10064.0
51print('Run on data corresponding to {:.1f} fb^-1 ...'.format(lumi * lumi_scale / 1000.0))
52
53if args.full_dataset: dataset_path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
54else: dataset_path = "root://eospublic.cern.ch//eos/root-eos/reduced_atlas_opendata/singletop"
55
56# Create a ROOT dataframe for each dataset
57# Note that we load the filenames from the external json file placed in the same folder than this script.
58files = json.load(open(os.path.join(ROOT.gROOT.GetTutorialsDir(), "dataframe/df107_SingleTopAnalysis.json")))
59processes = files.keys()
60df = {}
61xsecs = {}
62sumws = {}
63samples = []
64for p in processes:
65 for d in files[p]:
66 # Construct the dataframes
67 folder = d[0] # Folder name
68 sample = d[1] # Sample name
69 xsecs[sample] = d[2] # Cross-section
70 sumws[sample] = d[3] # Sum of weights
71 num_events = d[4] # Number of events
72 samples.append(sample)
73 df[sample] = ROOT.RDataFrame("mini", "{}/1lep/{}/{}.1lep.root".format(dataset_path, folder, sample))
74
75 # Scale down the datasets if requested
76 if args.full_dataset and lumi_scale < 1.0:
77 df[sample] = df[sample].Range(int(num_events * lumi_scale))
78
79# Select events for the analysis and make histograms of the top mass
80
81# Just-in-time compile custom helper function performing complex computations
82ROOT.gInterpreter.Declare("""
83using cRVecF = const ROOT::RVecF &;
84using cRVecI = const ROOT::RVecI &;
85int FindGoodLepton(cRVecI goodlep, cRVecI type, cRVecF lep_pt, cRVecF lep_eta, cRVecF lep_phi, cRVecF lep_e, cRVecF trackd0pv, cRVecF tracksigd0pv, cRVecF z0)
86{
87 int idx = -1; // Return -1 if no good lepton is found.
88 for(auto i = 0; i < type.size(); i++) {
89 if(!goodlep[i]) continue;
90 if (type[i] == 11 && abs(lep_eta[i]) < 2.47 && (abs(lep_eta[i]) < 1.37 || abs(lep_eta[i]) > 1.52) && abs(trackd0pv[i] / tracksigd0pv[i]) < 5) {
91 const ROOT::Math::PtEtaPhiEVector p(lep_pt[i], lep_eta[i], lep_phi[i], lep_e[i]);
92 if (abs(z0[i] * sin(p.Theta())) < 0.5) {
93 if (idx == -1) idx = i;
94 else return -1; // Accept only events with exactly one good lepton
95 }
96 }
97 if (type[i] == 13 && abs(lep_eta[i]) < 2.5 && abs(trackd0pv[i] / tracksigd0pv[i]) < 3) {
98 const ROOT::Math::PtEtaPhiEVector p(lep_pt[i], lep_eta[i], lep_phi[i], lep_e[i]);
99 if (abs(z0[i] * sin(p.Theta())) < 0.5) {
100 if (idx == -1) idx = i;
101 else return -1; // Accept only events with exactly one good lepton
102 }
103 }
104 }
105 return idx;
106}
107""")
108
109for s in samples:
110 # Select events with electron or muon trigger and with a missing transverse energy above 30 GeV
111 df[s] = df[s].Filter("trigE || trigM")\
112 .Filter("met_et > 30000")
113
114 # Perform preselection of highly isolated leptons
115 df[s] = df[s].Define("goodlep", "lep_isTightID && lep_pt > 35000 && lep_ptcone30 / lep_pt < 0.1 && lep_etcone20 / lep_pt < 0.1")\
116 .Filter("ROOT::VecOps::Sum(goodlep) > 0")
117
118 # Find a single good lepton, otherwise return -1 as index
119 df[s] = df[s].Define("idx_lep", "FindGoodLepton(goodlep, lep_type, lep_pt, lep_eta, lep_phi, lep_E, lep_trackd0pvunbiased, lep_tracksigd0pvunbiased, lep_z0)")\
120 .Filter("idx_lep != -1")
121
122 # Compute transverse mass of the W boson using the missing transverse energy and the good lepton
123 # Use only events with a transverse mass of the reconstructed W boson larger than 60 GeV
124 df[s] = df[s].Define("mtw", "sqrt(2 * lep_pt[idx_lep] * met_et * (1 - cos(lep_phi[idx_lep] - met_phi)))")\
125 .Filter("mtw > 60000")
126
127 # Perform preselection of jets
128 df[s] = df[s].Filter("ROOT::VecOps::Sum(jet_pt > 30000 && abs(jet_eta) < 2.5) > 0")
129
130 # Select events with two good jets and one b-jet and find the indices in the collections
131 df[s] = df[s].Define("goodjet", "jet_pt > 60000 || abs(jet_eta) > 2.4 || jet_jvt > 0.59")\
132 .Filter("ROOT::VecOps::Sum(goodjet) == 2")\
133 .Define("goodbjet", "goodjet && jet_MV2c10 > 0.8244273")\
134 .Filter("ROOT::VecOps::Sum(goodbjet) == 1")\
135 .Define("idx_tagged", "ROOT::VecOps::ArgMax(goodjet && goodbjet)")\
136 .Define("idx_untagged", "ROOT::VecOps::ArgMax(goodjet && !goodbjet)")
137
138 # Select events based on the jet kinematics and the scalar sum of the transverse momentum
139 # from the lepton, jets and met above 195 GeV
140 df[s] = df[s].Filter("abs(jet_eta[idx_untagged]) > 1.5 && abs(jet_eta[idx_tagged] - jet_eta[idx_untagged]) > 1.5")\
141 .Filter("lep_pt[idx_lep] + jet_pt[idx_tagged] + jet_pt[idx_untagged] + met_et > 195000")
142
143# Compute luminosity, scale factors and MC weights for simulated events
144for s in samples:
145 if "data" in s:
146 df[s] = df[s].Define("weight", "1.0")
147 else:
148 # The single top MC weights are either 1 or -1
149 if "single" in s: stop_norm = "mcWeight / abs(mcWeight)"
150 else: stop_norm = "mcWeight"
151 df[s] = df[s].Define("weight", "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * scaleFactor_BTAG * {} * {} / {} * {}".format(stop_norm, xsecs[s], sumws[s], lumi))
152
153# Reconstruct the top mass from the lepton, the missing transverse energy and the b-jet
154
155# Just-in-time compile the function to compute the top mass from the constituents
156ROOT.gInterpreter.Declare("""
157float ComputeTopMass(float lep_pt, float lep_eta, float lep_phi, float lep_e, float jet_pt, float jet_eta, float jet_phi, float jet_e, float met_et, float met_phi)
158{
159 const ROOT::Math::PtEtaPhiEVector lep(lep_pt / 1000.0, lep_eta, lep_phi, lep_e / 1000.0);
160 const ROOT::Math::PtEtaPhiEVector met(met_et / 1000.0, 0, met_phi, met_et / 1000.0);
161 const ROOT::Math::PtEtaPhiEVector bjet(jet_pt / 1000.0, jet_eta, jet_phi, jet_e / 1000.0);
162 // Please note that we treat here the missing transverse energy as the neutrino, even though the z component is missing!
163 return (lep + met + bjet).M();
164}
165""")
166
167histos = {}
168for s in samples:
169 df[s] = df[s].Define("top_mass", "ComputeTopMass(lep_pt[idx_lep], lep_eta[idx_lep], lep_phi[idx_lep], lep_E[idx_lep], jet_pt[idx_tagged], jet_eta[idx_tagged], jet_phi[idx_tagged], jet_E[idx_tagged], met_et, met_phi)")
170 histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel("top_mass", "", 10, 100, 400), "top_mass", "weight")
171
172# Run the event loop and merge histograms of the respective processes
173
174# RunGraphs allows to run the event loops of the separate RDataFrame graphs
175# concurrently. This results in an improved usage of the available resources
176# if each separate RDataFrame can not utilize all available resources, e.g.,
177# because not enough data is available.
178ROOT.RDF.RunGraphs([histos[s] for s in samples])
179
180def merge_histos(label):
181 h = None
182 for i, d in enumerate(files[label]):
183 t = histos[d[1]].GetValue()
184 if i == 0: h = t.Clone()
185 else: h.Add(t)
186 h.SetNameTitle(label, label)
187 return h
188
189data = merge_histos("data")
190twtb = merge_histos("twtb")
191singletop = merge_histos("singletop")
192wjets = merge_histos("wjets")
193
194# Create the plot
195
196# Set styles
197ROOT.gROOT.SetStyle("ATLAS")
198
199# Create canvas with pad
200c = ROOT.TCanvas("c", "", 600, 600)
201pad = ROOT.TPad("upper_pad", "", 0, 0, 1, 1)
202pad.SetTickx(False)
203pad.SetTicky(False)
204pad.Draw()
205pad.cd()
206
207# Draw stack with MC contributions
208stack = ROOT.THStack()
209wjets.Scale(1.1) # Corrected normalization derived from a validation region
210for h, color in zip(
211 [wjets, twtb, singletop],
212 [(222, 90, 106), (155, 152, 204), (208, 240, 193)]):
213 h.SetLineWidth(1)
214 h.SetLineColor(1)
215 h.SetFillColor(ROOT.TColor.GetColor(*color))
216 stack.Add(h)
217stack.Draw("HIST")
218stack.GetXaxis().SetTitle("m_{W(l#nu)+b} [GeV]")
219stack.GetYaxis().SetTitle("Events")
220stack.GetYaxis().SetLabelSize(0.04)
221stack.GetYaxis().SetTitleSize(0.045)
222stack.GetXaxis().SetLabelSize(0.04)
223stack.GetXaxis().SetTitleSize(0.045)
224stack.SetMinimum(0)
225stack.SetMaximum(5000 * lumi_scale)
226stack.GetYaxis().ChangeLabel(1, -1, 0)
227
228# Draw data
229data.SetMarkerStyle(20)
230data.SetMarkerSize(1.2)
231data.SetLineWidth(2)
232data.SetLineColor(ROOT.kBlack)
233data.Draw("E SAME")
234
235# Add legend
236legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
237legend.SetTextFont(42)
238legend.SetFillStyle(0)
239legend.SetBorderSize(0)
240legend.SetTextSize(0.035)
241legend.SetTextAlign(32)
242legend.AddEntry(data, "Data" ,"lep")
243legend.AddEntry(singletop, "Single top + jet", "f")
244legend.AddEntry(twtb, "t#bar{t},Wt,t#bar{b}", "f")
245legend.AddEntry(wjets, "W+jets", "f")
246legend.Draw("SAME")
247
248# Add ATLAS label
249text = ROOT.TLatex()
250text.SetNDC()
251text.SetTextFont(72)
252text.SetTextSize(0.045)
253text.DrawLatex(0.21, 0.86, "ATLAS")
254text.SetTextFont(42)
255text.DrawLatex(0.21 + 0.16, 0.86, "Open Data")
256text.SetTextSize(0.04)
257text.DrawLatex(0.21, 0.80, "#sqrt{{s}} = 13 TeV, {:.1f} fb^{{-1}}".format(lumi * lumi_scale / 1000.0))
258
259# Save the plot
260c.SaveAs("df107_SingleTopAnalysis.png")
261print("Saved figure to df107_SingleTopAnalysis.png")
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
unsigned int RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:539
A struct which stores the parameters of a TH1D.
Ta Range(0, 0, 1, 1)