This tutorial is a simplified but yet complex example of an analysis reconstructing the Higgs boson decaying to two Z bosons from events with four leptons. The data and simulated events are taken from CERN OpenData representing a subset of the data recorded in 2012 with the CMS detector at the LHC. The tutorials follows the Higgs to four leptons analysis published on CERN Open Data portal (10.7483/OPENDATA.CMS.JKB8.RR42). The resulting plots show the invariant mass of the selected four lepton systems in different decay modes (four muons, four electrons and two of each kind) and in a combined plot indicating the decay of the Higgs boson with a mass of about 125 GeV.
The following steps are performed for each sample with data and simulated events in order to reconstruct the Higgs boson from the selected muons and electrons:
Another aim of this version of the tutorial is to show a way to blend C++ and Python code. All the functions that make computations on data to define new columns or filter existing ones in a precise way, better suited to be written in C++, have been moved to a header that is then declared to the ROOT C++ interpreter. The functions that instead create nodes of the computational graph (e.g. Filter, Define) remain inside the main Python script.
The tutorial has the fast mode enabled by default, which reads the data from already skimmed datasets with a total size of only 51MB. If the fast mode is disabled, the tutorial runs over the full dataset with a size of 12GB.
import ROOT
import os
ROOT.ROOT.EnableImplicitMT()
higgs_header_path = os.path.join(os.sep, str(ROOT.gROOT.GetTutorialDir()) + os.sep, "dataframe" + os.sep,
"df103_NanoAODHiggsAnalysis_python.h")
ROOT.gInterpreter.Declare(
'#include "{}"'.
format(higgs_header_path))
def reco_higgs_to_2el2mu(df):
"""Reconstruct Higgs from two electrons and two muons"""
df_base = selection_2el2mu(df)
df_z_mass = df_base.Define("Z_mass", "compute_z_masses_2el2mu(Electron_pt, Electron_eta, Electron_phi,"
" Electron_mass, Muon_pt, Muon_eta, Muon_phi, Muon_mass)")
df_z_cut = filter_z_candidates(df_z_mass)
df_h_mass = df_z_cut.Define("H_mass", "compute_higgs_mass_2el2mu(Electron_pt, Electron_eta, Electron_phi,"
" Electron_mass, Muon_pt, Muon_eta, Muon_phi, Muon_mass)")
return df_h_mass
def selection_2el2mu(df):
"""Select interesting events with two electrons and two muons"""
df_ge2el2mu = df.Filter("nElectron>=2 && nMuon>=2", "At least two electrons and two muons")
df_eta = df_ge2el2mu.Filter("All(abs(Electron_eta)<2.5) && All(abs(Muon_eta)<2.4)", "Eta cuts")
df_pt = df_eta.Filter("pt_cuts(Muon_pt, Electron_pt)", "Pt cuts")
df_dr = df_pt.Filter("dr_cuts(Muon_eta, Muon_phi, Electron_eta, Electron_phi)", "Dr cuts")
df_iso = df_dr.Filter("All(abs(Electron_pfRelIso03_all)<0.40) && All(abs(Muon_pfRelIso04_all)<0.40)",
"Require good isolation")
df_el_ip3d = df_iso.Define("Electron_ip3d_el", "sqrt(Electron_dxy*Electron_dxy + Electron_dz*Electron_dz)")
df_el_sip3d = df_el_ip3d.Define("Electron_sip3d_el",
"Electron_ip3d_el/sqrt(Electron_dxyErr*Electron_dxyErr + "
"Electron_dzErr*Electron_dzErr)")
df_el_track = df_el_sip3d.Filter("All(Electron_sip3d_el<4) && All(abs(Electron_dxy)<0.5) &&"
" All(abs(Electron_dz)<1.0)",
"Electron track close to primary vertex with small uncertainty")
df_mu_ip3d = df_el_track.Define("Muon_ip3d_mu", "sqrt(Muon_dxy*Muon_dxy + Muon_dz*Muon_dz)")
df_mu_sip3d = df_mu_ip3d.Define("Muon_sip3d_mu",
"Muon_ip3d_mu/sqrt(Muon_dxyErr*Muon_dxyErr + Muon_dzErr*Muon_dzErr)")
df_mu_track = df_mu_sip3d.Filter("All(Muon_sip3d_mu<4) && All(abs(Muon_dxy)<0.5) && All(abs(Muon_dz)<1.0)",
"Muon track close to primary vertex with small uncertainty")
df_2p2n = df_mu_track.Filter("Sum(Electron_charge)==0 && Sum(Muon_charge)==0",
"Two opposite charged electron and muon pairs")
return df_2p2n
def reco_higgs_to_4mu(df):
"""Reconstruct Higgs from four muons"""
df_base = selection_4mu(df)
df_z_idx = df_base.Define("Z_idx", "reco_zz_to_4l(Muon_pt, Muon_eta, Muon_phi, Muon_mass, Muon_charge)")
df_z_dr = df_z_idx.Filter("filter_z_dr(Z_idx, Muon_eta, Muon_phi)", "Delta R separation of muons building Z system")
df_z_mass = df_z_dr.Define("Z_mass", "compute_z_masses_4l(Z_idx, Muon_pt, Muon_eta, Muon_phi, Muon_mass)")
df_z_cut = filter_z_candidates(df_z_mass)
df_h_mass = df_z_cut.Define("H_mass", "compute_higgs_mass_4l(Z_idx, Muon_pt, Muon_eta, Muon_phi, Muon_mass)")
return df_h_mass
def selection_4mu(df):
"""Select interesting events with four muons"""
df_ge4m = df.Filter("nMuon>=4", "At least four muons")
df_iso = df_ge4m.Filter("All(abs(Muon_pfRelIso04_all)<0.40)", "Require good isolation")
df_kin = df_iso.Filter("All(Muon_pt>5) && All(abs(Muon_eta)<2.4)", "Good muon kinematics")
df_ip3d = df_kin.Define("Muon_ip3d", "sqrt(Muon_dxy*Muon_dxy + Muon_dz*Muon_dz)")
df_sip3d = df_ip3d.Define("Muon_sip3d", "Muon_ip3d/sqrt(Muon_dxyErr*Muon_dxyErr + Muon_dzErr*Muon_dzErr)")
df_pv = df_sip3d.Filter("All(Muon_sip3d<4) && All(abs(Muon_dxy)<0.5) && All(abs(Muon_dz)<1.0)",
"Track close to primary vertex with small uncertainty")
df_2p2n = df_pv.Filter("nMuon==4 && Sum(Muon_charge==1)==2 && Sum(Muon_charge==-1)==2",
"Two positive and two negative muons")
return df_2p2n
def filter_z_candidates(df):
"""Apply selection on reconstructed Z candidates"""
df_z1_cut = df.Filter("Z_mass[0] > 40 && Z_mass[0] < 120", "Mass of first Z candidate in [40, 120]")
df_z2_cut = df_z1_cut.Filter("Z_mass[1] > 12 && Z_mass[1] < 120", "Mass of second Z candidate in [12, 120]")
return df_z2_cut
def reco_higgs_to_4el(df):
"""Reconstruct Higgs from four electrons"""
df_base = selection_4el(df)
df_z_idx = df_base.Define("Z_idx",
"reco_zz_to_4l(Electron_pt, Electron_eta, Electron_phi, Electron_mass, Electron_charge)")
df_z_dr = df_z_idx.Filter("filter_z_dr(Z_idx, Electron_eta, Electron_phi)",
"Delta R separation of Electrons building Z system")
df_z_mass = df_z_dr.Define("Z_mass",
"compute_z_masses_4l(Z_idx, Electron_pt, Electron_eta, Electron_phi, Electron_mass)")
df_z_cut = filter_z_candidates(df_z_mass)
df_h_mass = df_z_cut.Define("H_mass",
"compute_higgs_mass_4l(Z_idx, Electron_pt, Electron_eta, Electron_phi, Electron_mass)")
return df_h_mass
def selection_4el(df):
"""Select interesting events with four electrons"""
df_ge4el = df.Filter("nElectron>=4", "At least our electrons")
df_iso = df_ge4el.Filter("All(abs(Electron_pfRelIso03_all)<0.40)", "Require good isolation")
df_kin = df_iso.Filter("All(Electron_pt>7) && All(abs(Electron_eta)<2.5)", "Good Electron kinematics")
df_ip3d = df_kin.Define("Electron_ip3d", "sqrt(Electron_dxy*Electron_dxy + Electron_dz*Electron_dz)")
df_sip3d = df_ip3d.Define("Electron_sip3d",
"Electron_ip3d/sqrt(Electron_dxyErr*Electron_dxyErr + Electron_dzErr*Electron_dzErr)")
df_pv = df_sip3d.Filter("All(Electron_sip3d<4) && All(abs(Electron_dxy)<0.5) && All(abs(Electron_dz)<1.0)",
"Track close to primary vertex with small uncertainty")
df_2p2n = df_pv.Filter("nElectron==4 && Sum(Electron_charge==1)==2 && Sum(Electron_charge==-1)==2",
"Two positive and two negative electrons")
return df_2p2n
def plot(sig, bkg, data, x_label, filename):
"""
Plot invariant mass for signal and background processes from simulated
events overlay the measured data.
"""
ROOT.gStyle.SetOptStat(0)
ROOT.gStyle.SetTextFont(42)
d = ROOT.TCanvas("", "", 800, 700)
ROOT.SetOwnership(d, False)
d.SetLeftMargin(0.15)
h_bkg = bkg
h_cmb = sig.Clone()
h_cmb.Add(h_bkg)
h_cmb.SetTitle("")
h_cmb.GetXaxis().SetTitle(x_label)
h_cmb.GetXaxis().SetTitleSize(0.04)
h_cmb.GetYaxis().SetTitle("N_{Events}")
h_cmb.GetYaxis().SetTitleSize(0.04)
h_cmb.SetLineColor(ROOT.kRed)
h_cmb.SetLineWidth(2)
h_cmb.SetMaximum(18)
h_bkg.SetLineWidth(2)
h_bkg.SetFillStyle(1001)
h_bkg.SetLineColor(ROOT.kBlack)
h_bkg.SetFillColor(ROOT.kAzure - 9)
h_data = data
h_data.SetLineWidth(1)
h_data.SetMarkerStyle(20)
h_data.SetMarkerSize(1.0)
h_data.SetMarkerColor(ROOT.kBlack)
h_data.SetLineColor(ROOT.kBlack)
h_cmb.DrawCopy("HIST")
h_bkg.DrawCopy("HIST SAME")
h_data.DrawCopy("PE1 SAME")
legend = ROOT.TLegend(0.62, 0.70, 0.82, 0.88)
legend.SetFillColor(0)
legend.SetBorderSize(0)
legend.SetTextSize(0.03)
legend.AddEntry(h_data, "Data", "PE1")
legend.AddEntry(h_bkg, "ZZ", "f")
legend.AddEntry(h_cmb, "m_{H} = 125 GeV", "f")
legend.Draw()
cms_label = ROOT.TLatex()
cms_label.SetTextSize(0.04)
cms_label.DrawLatexNDC(0.16, 0.92, "#bf{CMS Open Data}")
header = ROOT.TLatex()
header.SetTextSize(0.03)
header.DrawLatexNDC(0.63, 0.92, "#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}")
d.SaveAs(filename)
path = "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/"
run_fast = True
if run_fast: path = "root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod_skimmed/"
df_data_doublemu =
ROOT.RDataFrame(
"Events", (path + f
for f
in [
"Run2012B_DoubleMuParked.root",
"Run2012C_DoubleMuParked.root"]))
df_data_doubleel =
ROOT.RDataFrame(
"Events", (path + f
for f
in [
"Run2012B_DoubleElectron.root",
"Run2012C_DoubleElectron.root"]))
nbins = 36
luminosity = 11580.0
xsec_ZZTo4mu = 0.077
nevt_ZZTo4mu = 1499064.0
xsec_ZZTo4el = 0.077
nevt_ZZTo4el = 1499093.0
xsec_ZZTo2el2mu = 0.18
nevt_ZZTo2el2mu = 1497445.0
xsec_SMHiggsToZZTo4L = 0.0065
nevt_SMHiggsToZZTo4L = 299973.0
scale_ZZTo4l = 1.386
weight_sig_4mu = luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L
weight_bkg_4mu = luminosity * xsec_ZZTo4mu * scale_ZZTo4l / nevt_ZZTo4mu
weight_sig_4el = luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L
weight_bkg_4el = luminosity * xsec_ZZTo4el * scale_ZZTo4l / nevt_ZZTo4el
weight_sig_2el2mu = luminosity * xsec_SMHiggsToZZTo4L / nevt_SMHiggsToZZTo4L
weight_bkg_2el2mu = luminosity * xsec_ZZTo2el2mu * scale_ZZTo4l / nevt_ZZTo2el2mu
df_sig_4mu_reco = reco_higgs_to_4mu(df_sig_4l)
df_h_sig_4mu = df_sig_4mu_reco.Define(
"weight",
"{}".
format(weight_sig_4mu))\
.Histo1D(("h_sig_4mu", "", nbins, 70, 180), "H_mass", "weight")
df_bkg_4mu_reco = reco_higgs_to_4mu(df_bkg_4mu)
df_h_bkg_4mu = df_bkg_4mu_reco.Define(
"weight",
"{}".
format(weight_bkg_4mu))\
.Histo1D(("h_bkg_4mu", "", nbins, 70, 180), "H_mass", "weight")
df_data_4mu_reco = reco_higgs_to_4mu(df_data_doublemu)
df_h_data_4mu = df_data_4mu_reco.Define("weight", "1.0")\
.Histo1D(("h_data_4mu", "", nbins, 70, 180), "H_mass", "weight")
df_sig_4el_reco = reco_higgs_to_4el(df_sig_4l)
df_h_sig_4el = df_sig_4el_reco.Define(
"weight",
"{}".
format(weight_sig_4el))\
.Histo1D(("h_sig_4el", "", nbins, 70, 180), "H_mass", "weight")
df_bkg_4el_reco = reco_higgs_to_4el(df_bkg_4el)
df_h_bkg_4el = df_bkg_4el_reco.Define(
"weight",
"{}".
format(weight_bkg_4el))\
.Histo1D(("h_bkg_4el", "", nbins, 70, 180), "H_mass", "weight")
df_data_4el_reco = reco_higgs_to_4el(df_data_doubleel)
df_h_data_4el = df_data_4el_reco.Define("weight", "1.0")\
.Histo1D(("h_data_4el", "", nbins, 70, 180), "H_mass", "weight")
df_sig_2el2mu_reco = reco_higgs_to_2el2mu(df_sig_4l)
df_h_sig_2el2mu = df_sig_2el2mu_reco.Define(
"weight",
"{}".
format(weight_sig_2el2mu))\
.Histo1D(("h_sig_2el2mu", "", nbins, 70, 180), "H_mass", "weight")
df_bkg_2el2mu_reco = reco_higgs_to_2el2mu(df_bkg_2el2mu)
df_h_bkg_2el2mu = df_bkg_2el2mu_reco.Define(
"weight",
"{}".
format(weight_bkg_2el2mu))\
.Histo1D(("h_bkg_2el2mu", "", nbins, 70, 180), "H_mass", "weight")
df_data_2el2mu_reco = reco_higgs_to_2el2mu(df_data_doublemu)
df_h_data_2el2mu = df_data_2el2mu_reco.Define("weight", "1.0")\
.Histo1D(("h_data_2el2mu_doublemu", "", nbins, 70, 180), "H_mass", "weight")
df_h_sig_4el, df_h_bkg_4el, df_h_data_4el,
df_h_sig_2el2mu, df_h_bkg_2el2mu, df_h_data_2el2mu])
signal_4mu = df_h_sig_4mu.GetValue()
background_4mu = df_h_bkg_4mu.GetValue()
data_4mu = df_h_data_4mu.GetValue()
signal_4el = df_h_sig_4el.GetValue()
background_4el = df_h_bkg_4el.GetValue()
data_4el = df_h_data_4el.GetValue()
signal_2el2mu = df_h_sig_2el2mu.GetValue()
background_2el2mu = df_h_bkg_2el2mu.GetValue()
data_2el2mu = df_h_data_2el2mu.GetValue()
plot(signal_4mu, background_4mu, data_4mu,
"m_{4#mu} (GeV)",
"higgs_4mu.pdf")
plot(signal_4el, background_4el, data_4el,
"m_{4e} (GeV)",
"higgs_4el.pdf")
plot(signal_2el2mu, background_2el2mu, data_2el2mu,
"m_{2e2#mu} (GeV)",
"higgs_2el2mu.pdf")
h_sig_4l = signal_4mu
h_sig_4l.Add(signal_4el)
h_sig_4l.Add(signal_2el2mu)
h_bkg_4l = background_4mu
h_bkg_4l.Add(background_4el)
h_bkg_4l.Add(background_2el2mu)
h_data_4l = data_4mu
h_data_4l.Add(data_4el)
h_data_4l.Add(data_2el2mu)
plot(h_sig_4l, h_bkg_4l, h_data_4l,
"m_{4l} (GeV)",
"higgs_4l.pdf")
if __name__ == "__main__":
winID h TVirtualViewer3D TVirtualGLPainter char TVirtualGLPainter plot
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
unsigned int RunGraphs(std::vector< RResultHandle > handles)
Trigger the event loop of multiple RDataFrames concurrently.