Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
rf617_simulation_based_inference_multidimensional.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_roofit_main
3## \notebook
4## Use Simulation Based Inference (SBI) in multiple dimensions in RooFit.
5##
6## This tutorial shows how to use SBI in higher dimension in ROOT.
7## This tutorial transfers the simple concepts of the 1D case introduced in
8## rf615_simulation_based_inference.py onto the higher dimensional case.
9##
10## Again as reference distribution we
11## choose a simple uniform distribution. The target distribution is chosen to
12## be Gaussian with different mean values.
13## The classifier is trained to discriminate between the reference and target
14## distribution.
15## We see how the neural networks generalize to unknown mean values.
16##
17## Furthermore, we compare SBI to the approach of moment morphing. In this case,
18## we can conclude, that SBI is way more sample eficcient when it comes to
19## estimating the negative log likelihood ratio.
20##
21## For an introductory background see rf615_simulation_based_inference.py
22##
23## \macro_image
24## \macro_code
25## \macro_output
26##
27## \date July 2024
28## \author Robin Syring
29
30import ROOT
31import numpy as np
32from sklearn.neural_network import MLPClassifier
33import itertools
34
35# Kills warning messages
37
38n_samples_morph = 10000 # Number of samples for morphing
39n_bins = 4 # Number of 'sampled' Gaussians
40n_samples_train = n_samples_morph * n_bins # To have a fair comparison
41
42
43# Morphing as baseline
44def morphing(setting, n_dimensions):
45 # Define binning for morphing
46
47 binning = [ROOT.RooBinning(n_bins, 0.0, n_bins - 1.0) for dim in range(n_dimensions)]
48 grid = ROOT.RooMomentMorphFuncND.Grid(*binning)
49
50 # Set bins for each x variable
51 for x_var in x_vars:
53
54 # Define mu values as input for morphing for each dimension
55 mu_helps = [ROOT.RooRealVar(f"mu{i}", f"mu{i}", 0.0) for i in range(n_dimensions)]
56
57 # Create a product of Gaussians for all dimensions
58 gaussians = []
59 for j in range(n_dimensions):
60 gaussian = ROOT.RooGaussian(f"gdim{j}", f"gdim{j}", x_vars[j], mu_helps[j], sigmas[j])
61 gaussians.append(gaussian)
62
63 # Create a product PDF for the multidimensional Gaussian
64 gauss_product = ROOT.RooProdPdf("gauss_product", "gauss_product", ROOT.RooArgList(*gaussians))
65
66 templates = dict()
67
68 # Iterate through each tuple
69 for idx, nd_idx in enumerate(itertools.product(range(n_bins), repeat=n_dimensions)):
70 for i_dim in range(n_dimensions):
71 mu_helps[i_dim].setVal(nd_idx[i_dim])
72
73 # Fill the histograms
74 hist = gauss_product.generateBinned(ROOT.RooArgSet(*x_vars), n_samples_morph)
75
76 # Ensure that every bin is filled and there are no zero probabilities
77 for i_bin in range(hist.numEntries()):
78 hist.add(hist.get(i_bin), 1.0)
79
80 templates[nd_idx] = ROOT.RooHistPdf(f"histpdf{idx}", f"histpdf{idx}", ROOT.RooArgSet(*x_vars), hist, 1)
81 templates[nd_idx]._data_hist = hist # To keep the underlying RooDataHist alive
82
83 # Add the PDF to the grid
84 grid.addPdf(templates[nd_idx], *nd_idx)
85
86 # Create the morphing function and add it to the ws
87 morph_func = ROOT.RooMomentMorphFuncND("morph_func", "morph_func", [*mu_vars], [*x_vars], grid, setting)
89 morph = ROOT.RooWrapperPdf("morph", "morph", morph_func, True)
90
91 ws.Import(morph)
92
93 # Uncomment to see input plots for the first dimension (you might need to increase the morphed samples)
94 # f1 = x_vars[0].frame()
95 # for i in range(n_bins):
96 # templates[(i, 0)].plotOn(f1)
97 # ws["morph"].plotOn(f1, LineColor="r")
98 # c0 = ROOT.TCanvas()
99 # f1.Draw()
100 # input() # Wait for user input to proceed
101
102
103# Define the observed mean values for the Gaussian distributions
104mu_observed = [2.5, 2.0]
105sigmas = [1.5, 1.5]
106
107
108# Class used in this case to demonstrate the use of SBI in Root
109class SBI:
110 # Initializing the class SBI
111 def __init__(self, ws, n_vars):
112 # Choose the hyperparameters for training the neural network
113 self.classifier = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=1000, random_state=42)
114 self.data_model = None
115 self.data_ref = None
116 self.X_train = None
117 self.y_train = None
118 self.ws = ws
119 self.n_vars = n_vars
120 self._training_mus = None
121 self._reference_mu = None
122
123 # Defining the target / training data for different values of mean value mu
124 def model_data(self, model, x_vars, mu_vars, n_samples):
125 ws = self.ws
126 samples_gaussian = (
127 ws[model].generate([ws[x] for x in x_vars] + [ws[mu] for mu in mu_vars], n_samples).to_numpy()
128 )
129
130 self._training_mus = np.array([samples_gaussian[mu] for mu in mu_vars]).T
131 data_test_model = np.array([samples_gaussian[x] for x in x_vars]).T
132
133 self.data_model = data_test_model.reshape(-1, self.n_vars)
134
135 # Generating samples for the reference distribution
136 def reference_data(self, model, x_vars, mu_vars, n_samples, help_model):
137 ws = self.ws
138 # Ensuring the normalization with generating as many reference data as target data
139 samples_uniform = ws[model].generate([ws[x] for x in x_vars], n_samples)
140 data_reference_model = np.array(
141 [samples_uniform.get(i).getRealValue(x) for x in x_vars for i in range(samples_uniform.numEntries())]
142 )
143
144 self.data_ref = data_reference_model.reshape(-1, self.n_vars)
145
146 samples_mu = ws[help_model].generate([ws[mu] for mu in mu_vars], n_samples)
147 mu_data = np.array(
148 [samples_mu.get(i).getRealValue(mu) for mu in mu_vars for i in range(samples_mu.numEntries())]
149 )
150
151 self._reference_mu = mu_data.reshape(-1, self.n_vars)
152
153 # Bringing the data in the right format for training
154 def preprocessing(self):
155 thetas = np.concatenate((self._training_mus, self._reference_mu))
156 X = np.concatenate([self.data_model, self.data_ref])
157
158 self.y_train = np.concatenate([np.ones(len(self.data_model)), np.zeros(len(self.data_ref))])
159 self.X_train = np.concatenate([X, thetas], axis=1)
160
161 # Train the classifier
162 def train_classifier(self):
163 self.classifier.fit(self.X_train, self.y_train)
164
165
166# Define the "observed" data in a workspace
167def build_ws(mu_observed):
168 n_vars = len(mu_observed)
169 x_vars = [ROOT.RooRealVar(f"x{i}", f"x{i}", -5, 15) for i in range(n_vars)]
170 mu_vars = [ROOT.RooRealVar(f"mu{i}", f"mu{i}", mu_observed[i], 0, 4) for i in range(n_vars)]
171 gaussians = [ROOT.RooGaussian(f"gauss{i}", f"gauss{i}", x_vars[i], mu_vars[i], sigmas[i]) for i in range(n_vars)]
172 uniforms = [ROOT.RooUniform(f"uniform{i}", f"uniform{i}", x_vars[i]) for i in range(n_vars)]
173 uniforms_help = [ROOT.RooUniform(f"uniformh{i}", f"uniformh{i}", mu_vars[i]) for i in range(n_vars)]
174 # Create multi-dimensional PDFs
175 gauss = ROOT.RooProdPdf("gauss", "gauss", ROOT.RooArgList(*gaussians))
176 uniform = ROOT.RooProdPdf("uniform", "uniform", ROOT.RooArgList(*uniforms))
177 uniform_help = ROOT.RooProdPdf("uniform_help", "uniform_help", ROOT.RooArgList(*uniforms_help))
178 obs_data = gauss.generate(ROOT.RooArgSet(*x_vars), n_samples_morph)
179 obs_data.SetName("obs_data")
180
181 # Create and return the workspace
182 ws = ROOT.RooWorkspace()
183 ws.Import(x_vars)
184 ws.Import(mu_vars)
185 ws.Import(gauss)
186 ws.Import(uniform)
187 ws.Import(uniform_help)
188 ws.Import(obs_data)
189
190 return ws
191
192
193# Build the workspace and extract variables
194ws = build_ws(mu_observed)
195
196
197# Export the varibles from ws
198x_vars = [ws[f"x{i}"] for i in range(len(mu_observed))]
199mu_vars = [ws[f"mu{i}"] for i in range(len(mu_observed))]
200
201# Do the morphing
203
204# Calculate the nll for the moprhed distribution
205# TODO: Fix RooAddPdf::fixCoefNormalization(nset) warnings with new CPU backend
206nll_morph = ws["morph"].createNLL(ws["obs_data"], EvalBackend="legacy")
207
208# Initialize the SBI model
209model = SBI(ws, len(mu_observed))
210
211# Generate and preprocess training data
212model.model_data("gauss", [x.GetName() for x in x_vars], [mu.GetName() for mu in mu_vars], n_samples_train)
214 "uniform", [x.GetName() for x in x_vars], [mu.GetName() for mu in mu_vars], n_samples_train, "uniform_help"
215)
217
218# Train the neural network classifier
220sbi_model = model
221
222
223# Function to compute the likelihood ratio using the trained classifier
224def learned_likelihood_ratio(*args):
225 n = max(*(len(a) for a in args))
226 X = np.zeros((n, len(args)))
227 for i in range(len(args)):
228 X[:, i] = args[i]
230 return prob / (1.0 - prob)
231
232
233# Create combined variable list for ROOT
234combined_vars = ROOT.RooArgList()
235for var in x_vars + mu_vars:
237
238# Create a custom likelihood ratio function using the trained classifier
239lhr_learned = ROOT.RooFit.bindFunction("MyBinFunc", learned_likelihood_ratio, combined_vars)
240
241# Calculate the 'analytical' likelihood ratio
242lhr_calc = ROOT.RooFormulaVar("lhr_calc", "x[0] / x[1]", [ws["gauss"], ws["uniform"]])
243
244# Define the 'analytical' negative logarithmic likelihood ratio
245nll_gauss = ws["gauss"].createNLL(ws["obs_data"])
246
247# Create the learned pdf and NLL sum based on the learned likelihood ratio
248pdf_learned = ROOT.RooWrapperPdf("learned_pdf", "learned_pdf", lhr_learned, True)
249
250nllr_learned = pdf_learned.createNLL(ws["obs_data"])
251
252# Plot the learned and analytical summed negativelogarithmic likelihood
253frame1 = mu_vars[0].frame(
254 Title="NLL of SBI vs. Morphing;#mu_{1};NLL",
255 Range=(mu_observed[0] - 1, mu_observed[0] + 1),
256)
257nll_gauss.plotOn(frame1, ShiftToZero=True, LineColor="kP6Yellow", Name="gauss")
258ROOT.RooAbsReal.setEvalErrorLoggingMode("Ignore") # Silence some warnings
259nll_morph.plotOn(frame1, ShiftToZero=True, LineColor="kP6Red", Name="morph")
261nllr_learned.plotOn(frame1, LineColor="kP6Blue", ShiftToZero=True, Name="learned")
262
263
264# Declare a helper function in ROOT to dereference unique_ptr
266 """
267RooAbsArg &my_deref(std::unique_ptr<RooAbsArg> const& ptr) { return *ptr; }
268"""
269)
270
271# Choose normalization set for lhr_calc to plot over
272norm_set = ROOT.RooArgSet(x_vars)
273lhr_calc_final_ptr = ROOT.RooFit.Detail.compileForNormSet(lhr_calc, norm_set)
274lhr_calc_final = ROOT.my_deref(lhr_calc_final_ptr)
276
277# Plot the likelihood ratio functions
278frame2 = x_vars[0].frame(Title="Likelihood ratio r(x_{1}|#mu_{1}=2.5);x_{1};p_{gauss}/p_{uniform}")
279lhr_learned.plotOn(frame2, LineColor="kP6Blue", Name="learned_ratio")
280lhr_calc_final.plotOn(frame2, LineColor="kP6Yellow", Name="exact")
281
282# Write the plots into one canvas to show, or into separate canvases for saving.
283single_canvas = True
284
285c = ROOT.TCanvas("", "", 1200 if single_canvas else 600, 600)
286if single_canvas:
287 c.Divide(2)
288 c.cd(1)
290 frame1.GetYaxis().SetTitleOffset(1.8)
292
293legend1 = ROOT.TLegend(0.43, 0.63, 0.8, 0.87)
294legend1.SetFillColor("background")
295legend1.SetLineColor("background")
297legend1.AddEntry("learned", "learned (SBI)", "L")
298legend1.AddEntry("gauss", "true NLL", "L")
299legend1.AddEntry("morphed", "moment morphing", "L")
301
302if single_canvas:
303 c.cd(2)
305 frame2.GetYaxis().SetTitleOffset(1.8)
306else:
307 c.SaveAs("rf617_plot_1.png")
308 c = ROOT.TCanvas("", "", 600, 600)
309
311
312legend2 = ROOT.TLegend(0.53, 0.73, 0.87, 0.87)
313legend2.SetFillColor("background")
314legend2.SetLineColor("background")
316legend2.AddEntry("learned_ratio", "learned (SBI)", "L")
317legend2.AddEntry("exact", "true ratio", "L")
319
320if not single_canvas:
321 c.SaveAs("rf617_plot_2.png")
322
323
324# Use ROOT's minimizer to compute the minimum and display the results
325for nll in [nll_gauss, nllr_learned, nll_morph]:
326 minimizer = ROOT.RooMinimizer(nll)
329 minimizer.minimize("Minuit2")
330 result = minimizer.save()
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len