19 Reduce initial dataset to only events which shall be used for training
21 return df.Filter(
"nElectron>=2 && nMuon>=2",
"At least two electrons and two muons")
24def define_variables(df):
26 Define the variables which shall be used for training
28 return df.Define(
"Muon_pt_1",
"Muon_pt[0]")\
29 .Define(
"Muon_pt_2",
"Muon_pt[1]")\
30 .Define(
"Electron_pt_1",
"Electron_pt[0]")\
31 .Define(
"Electron_pt_2",
"Electron_pt[1]")
34variables = [
"Muon_pt_1",
"Muon_pt_2",
"Electron_pt_1",
"Electron_pt_2"]
37if __name__ ==
"__main__":
38 for filename, label
in [[
"SMHiggsToZZTo4L.root",
"signal"], [
"ZZTo2e2mu.root",
"background"]]:
39 print(
">>> Extract the training and testing events for {} from the {} dataset.".format(
43 filepath =
"root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/" + filename
45 df = filter_events(df)
46 df = define_variables(df)
52 columns = ROOT.std.vector[
"string"](variables)
53 df.Filter(
"event % 2 == 0",
"Select events with even event number for training")\
54 .Snapshot(
"Events",
"train_" + label +
".root", columns)
55 df.Filter(
"event % 2 == 1",
"Select events with odd event number for training")\
56 .Snapshot(
"Events",
"test_" + label +
".root", columns)
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...