Use Simulation Based Inference (SBI) in multiple dimensions in RooFit.
This tutorial shows how to use SBI in higher dimension in ROOT. This tutorial transfers the simple concepts of the 1D case introduced in rf615_simulation_based_inference.py onto the higher dimensional case.
Again as reference distribution we choose a simple uniform distribution. The target distribution is chosen to be Gaussian with different mean values. The classifier is trained to discriminate between the reference and target distribution. We see how the neural networks generalize to unknown mean values.
Furthermore, we compare SBI to the approach of moment morphing. In this case, we can conclude, that SBI is way more sample eficcient when it comes to estimating the negative log likelihood ratio.
For an introductory background see rf615_simulation_based_inference.py
import ROOT
import numpy as np
import itertools
n_samples_morph = 10000
n_bins = 4
n_samples_train = n_samples_morph * n_bins
for x_var in x_vars:
gaussians = []
for j
in range(n_dimensions):
gaussian =
ROOT.RooGaussian(f
"gdim{j}", f
"gdim{j}", x_vars[j], mu_helps[j], sigmas[j])
templates = dict()
for i_dim
in range(n_dimensions):
mu_helps[i_dim].setVal(nd_idx[i_dim])
mu_observed = [2.5, 2.0]
sigmas = [1.5, 1.5]
def __init__(self, ws, n_vars):
self.classifier =
MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=1000, random_state=42)
self.data_model = None
self.data_ref = None
self.X_train = None
self.y_train = None
self.ws = ws
self.n_vars = n_vars
self._training_mus = None
self._reference_mu = None
def model_data(self, model, x_vars, mu_vars, n_samples):
ws = self.ws
samples_gaussian = (
ws[model].generate([ws[x]
for x
in x_vars] + [ws[mu]
for mu
in mu_vars], n_samples).
to_numpy()
)
self._training_mus =
np.array([samples_gaussian[mu]
for mu
in mu_vars]).T
data_test_model =
np.array([samples_gaussian[x]
for x
in x_vars]).T
def reference_data(self, model, x_vars, mu_vars, n_samples, help_model):
ws = self.ws
samples_uniform = ws[model].generate([ws[x] for x in x_vars], n_samples)
)
samples_mu = ws[help_model].generate([ws[mu] for mu in mu_vars], n_samples)
)
self.classifier.fit(self.X_train, self.y_train)
n_vars =
len(mu_observed)
gaussians = [
ROOT.RooGaussian(f
"gauss{i}", f
"gauss{i}", x_vars[i], mu_vars[i], sigmas[i])
for i
in range(n_vars)]
uniforms_help = [
ROOT.RooUniform(f
"uniformh{i}", f
"uniformh{i}", mu_vars[i])
for i
in range(n_vars)]
return ws
x_vars = [ws[f
"x{i}"]
for i
in range(
len(mu_observed))]
mu_vars = [ws[f
"mu{i}"]
for i
in range(
len(mu_observed))]
nll_morph = ws["morph"].createNLL(ws["obs_data"], EvalBackend="legacy")
model =
SBI(ws,
len(mu_observed))
"uniform", [
x.GetName()
for x
in x_vars], [
mu.GetName()
for mu
in mu_vars], n_samples_train,
"uniform_help"
)
sbi_model = model
n = max(*(
len(a)
for a
in args))
X[:, i] = args[i]
return prob / (1.0 - prob)
for var in x_vars + mu_vars:
nll_gauss = ws["gauss"].createNLL(ws["obs_data"])
frame1 = mu_vars[0].frame(
Title="NLL of SBI vs. Morphing;#mu_{1};NLL",
Range=(mu_observed[0] - 1, mu_observed[0] + 1),
)
"""
RooAbsArg &my_deref(std::unique_ptr<RooAbsArg> const& ptr) { return *ptr; }
"""
)
frame2 = x_vars[0].frame(Title="Likelihood ratio r(x_{1}|#mu_{1}=2.5);x_{1};p_{gauss}/p_{uniform}")
single_canvas = True
c =
ROOT.TCanvas(
"",
"", 1200
if single_canvas
else 600, 600)
if single_canvas:
if single_canvas:
else:
if not single_canvas:
for nll in [nll_gauss, nllr_learned, nll_morph]:
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
RooFitResult: minimized FCN value: 36418.9, estimated distance to minimum: 1.43276e-06
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
mu0 2.5116e+00 +/- 1.50e-02
mu1 2.0000e+00 +/- 1.50e-02
RooFitResult: minimized FCN value: -23474.8, estimated distance to minimum: 1.23305e-05
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
mu0 2.3674e+00 +/- 1.43e-02
mu1 2.0127e+00 +/- 1.41e-02
RooFitResult: minimized FCN value: 38093, estimated distance to minimum: 0.000220953
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
mu0 2.3835e+00 +/- 3.71e-03
mu1 1.8343e+00 +/- 1.78e-02
- Date
- July 2024
- Author
- Robin Syring
Definition in file rf617_simulation_based_inference_multidimensional.py.