43df =
ROOT.RDataFrame(
"tree", ROOT.gROOT.GetTutorialDir().Data() +
"/analysis/dataframe/df106_HiggsToFourLeptons.root")
50data_dict = df.AsNumpy(columns=[
"m4l",
"sample_category",
"weight"])
54 name: data_dict[
"weight"][data_dict[
"sample_category"] == [name]].
sum()
for name
in (
"data",
"zz",
"other",
"higgs")
58for sample_category
in [
"data",
"higgs",
"zz",
"other"]:
60 weight_sum = weights_dict[sample_category]
62 mask = data_dict[
"sample_category"] == sample_category
64 weights = data_dict[
"weight"][mask]
66 weight_modified = weights / weight_sum
71 results[sample_category] = {
72 "weight_sum": weight_sum,
73 "weight_modified": weight_modified,
80higgs_data = data_dict[
"m4l"][data_dict[
"sample_category"] == [
"higgs"]]
81zz_data = data_dict[
"m4l"][data_dict[
"sample_category"] == [
"zz"]]
85sample_weight_higgs = np.array([results[
"higgs"][
"weight_modified"]]).flatten()
86sample_weight_zz = np.array([results[
"zz"][
"weight_modified"]]).flatten()
89sample_weight = np.concatenate([sample_weight_higgs, sample_weight_zz])
92sample_weight[sample_weight < 0] = 1e-6
95X = np.concatenate((higgs_data, zz_data), axis=0).reshape(-1, 1)
96y = np.concatenate([np.ones(
len(higgs_data)), np.zeros(
len(zz_data))])
99model_xgb = xgb.XGBClassifier(n_estimators=1000, max_depth=5, eta=0.2, min_child_weight=1e-6, nthread=1)
100model_xgb.fit(X, y, sample_weight=sample_weight)
104m4l = ROOT.RooRealVar(
"m4l",
"Four Lepton Invariant Mass", 0.0)
108def calculate_likelihood_xgb(m4l_arr: np.ndarray) -> np.ndarray:
109 prob = model_xgb.predict_proba(m4l_arr.T)[:, 0]
110 return (1 - prob) / prob
113llh = ROOT.RooFit.bindFunction(f
"llh", calculate_likelihood_xgb, m4l)
116n_signal = results[
"higgs"][
"weight"].
sum()
117n_back = results[
"zz"][
"weight"].
sum()
122 return n_back / (n_back + mu * n_signal)
125def weight_signal(mu):
126 return 1 - weight_back(mu)
130def likelihood_ratio(llr: np.ndarray, mu: np.ndarray) -> np.ndarray:
134 w_0 = np.array([weight_back(0), weight_signal(0)])
135 w_1 = np.array([weight_back(mu[0]), weight_signal(mu[0])])
137 w = np.outer(w_1, 1.0 / w_0)
139 p = np.ones((m, m,
len(llr)))
143 p[j, i] = 1.0 / p[i, j]
145 return 1.0 / np.sum(1.0 / np.sum(np.expand_dims(w, axis=2) * p, axis=0), axis=0)
148mu_var = ROOT.RooRealVar(
"mu",
"mu", 0.1, 5)
150nll_ratio = ROOT.RooFit.bindFunction(f
"nll", likelihood_ratio, llh, mu_var)
151pdf_learned = ROOT.RooWrapperPdf(
"learned_pdf",
"learned_pdf", nll_ratio, selfNormalized=
True)
154frame1 = m4l.frame(Title=
"Likelihood ratio r(m_{4l}|#mu=1);m_{4l};p(#mu=1)/p(#mu=0)", Range=(80, 170))
156nll_ratio.plotOn(frame1, ShiftToZero=
False, LineColor=
"kP6Blue")
158n_pred = ROOT.RooFormulaVar(
"n_pred", f
"{n_back} + mu * {n_signal}", [mu_var])
159pdf_learned_extended = ROOT.RooExtendPdf(
"final_pdf",
"final_pdf", pdf_learned, n_pred)
162data = ROOT.RooDataSet.from_numpy({
"m4l": data_dict[
"m4l"][data_dict[
"sample_category"] == [
"data"]]}, [m4l])
163nll = pdf_learned_extended.createNLL(data, Extended=
True)
166frame2 = mu_var.frame(Title=
"NLL sum;#mu (signal strength);#Delta NLL", Range=(0.5, 4))
167nll.plotOn(frame2, ShiftToZero=
True, LineColor=
"kP6Blue")
172c = ROOT.TCanvas(
"",
"", 1200
if single_canvas
else 600, 600)
176 ROOT.gPad.SetLeftMargin(0.15)
177 frame1.GetYaxis().SetTitleOffset(1.8)
182 ROOT.gPad.SetLeftMargin(0.15)
183 frame2.GetYaxis().SetTitleOffset(1.8)
185 c.SaveAs(
"rf618_plot_1.png")
186 c = ROOT.TCanvas(
"",
"", 600, 600)
191 c.SaveAs(
"rf618_plot_2.png")
194minimizer = ROOT.RooMinimizer(nll)
195minimizer.setErrorLevel(0.5)
196minimizer.setPrintLevel(-1)
197minimizer.minimize(
"Minuit2")
198result = minimizer.save()
199ROOT.SetOwnership(result,
True)
204del pdf_learned_extended
212del sys.modules[
"libROOTPythonizations"]
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
static uint64_t sum(uint64_t i)