This tutorials first how to export a RooDataSet to NumPy arrays or a Pandas DataFrame, and then it shows you how to create a RooDataSet from a Pandas DataFrame.
import ROOT
import numpy as np
n_events = 10000
x = ROOT.RooRealVar("x", "x", -10, 10)
mean = ROOT.RooRealVar("mean", "mean of gaussian", 1, -10, 10)
sigma = ROOT.RooRealVar("sigma", "width of gaussian", 1, 0.1, 10)
gauss = ROOT.RooGaussian("gauss", "gaussian PDF", x, mean, sigma)
data = gauss.generate(ROOT.RooArgSet(x), 10000)
arrays = data.to_numpy()
print("Mean of numpy array:", np.mean(arrays["x"]))
print("Standard deviation of numpy array:", np.std(arrays["x"]))
df = data.to_pandas()
try:
import matplotlib.pyplot as plt
df.hist(column="x", bins=x.bins())
except Exception:
print(
'Skipping `df.hist(column="x", bins=x.bins())` because matplotlib could not be imported or was not able to display the plot.'
)
del data
del arrays
del df
x_arr = np.random.normal(-1.0, 1.0, (n_events,))
data = ROOT.RooDataSet.from_numpy({"x": x_arr}, [x])
fit_result = gauss.fitTo(data, PrintLevel=-1, Save=True)
fit_result.Print()
xframe = x.frame(Title="Gaussian pdf")
data.plotOn(xframe)
gauss.plotOn(xframe)
c = ROOT.TCanvas("rf409_NumPyPandasToRooFit", "rf409_NumPyPandasToRooFit", 800, 400)
xframe.Draw()
c.SaveAs("rf409_NumPyPandasToRooFit.png")
def print_histogram_output(histogram_output):
counts, bin_edges = histogram_output
print(np.array(counts, dtype=int))
print(bin_edges[0])
datahist = data.binnedClone()
counts, bin_edges = datahist.to_numpy()
print("Counts and bin edges from RooDataHist.to_numpy:")
print_histogram_output((counts, bin_edges))
print("Counts and bin edges from np.histogram:")
print_histogram_output(np.histogramdd([x_arr], bins=[x.bins()]))
datahist_new_1 = ROOT.RooDataHist.from_numpy(counts, [x])
print("RooDataHist imported with default binning and exported back to numpy:")
print_histogram_output(datahist_new_1.to_numpy())
bins = [np.linspace(-10, 10, 21)]
counts, _ = np.histogramdd([x_arr], bins=bins)
datahist_new_2 = ROOT.RooDataHist.from_numpy(counts, [x], bins=bins)
print("RooDataHist imported with linspace binning and exported back to numpy:")
print_histogram_output(datahist_new_2.to_numpy())
bins = [20]
ranges = [(-10, 10)]
counts, _ = np.histogramdd([x_arr], bins=bins, range=ranges)
datahist_new_3 = ROOT.RooDataHist.from_numpy(counts, [x], bins=bins, ranges=ranges)
print("RooDataHist imported with uniform binning and exported back to numpy:")
print_histogram_output(datahist_new_3.to_numpy())
[#1] INFO:Fitting -- RooAbsPdf::fitTo(gauss_over_gauss_Int[x]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- using generic CPU library compiled with no vectorizations
[#1] INFO:Fitting -- Creation of NLL object took 20.9986 ms
[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_gauss_over_gauss_Int[x]_) Summation contains a RooNLLVar, using its error level
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: activating const optimization
[#1] INFO:Minimization -- [fitFCN] No discrete parameters, performing continuous minimization only
[#1] INFO:Minimization -- RooAbsMinimizerFcn::setOptimizeConst: deactivating const optimization
RooFitResult: minimized FCN value: 14132.3, estimated distance to minimum: 4.7497e-09
covariance matrix quality: Full, accurate covariance matrix
Status : MINIMIZE=0 HESSE=0
Floating Parameter FinalValue +/- Error
-------------------- --------------------------
mean -1.0201e+00 +/- 9.94e-03
sigma 9.9431e-01 +/- 7.03e-03
Mean of numpy array: 1.0066466535473986
Standard deviation of numpy array: 0.997349967781135
Counts and bin edges from RooDataHist.to_numpy:
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 3 6 6 15 17 29 51 91 152
185 294 360 428 531 660 683 756 814 805 718 678 657 509 430 364 258 183
118 66 68 20 23 10 6 5 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[-1.00000000e+01 -9.80000000e+00 -9.60000000e+00 -9.40000000e+00
-9.20000000e+00 -9.00000000e+00 -8.80000000e+00 -8.60000000e+00
-8.40000000e+00 -8.20000000e+00 -8.00000000e+00 -7.80000000e+00
-7.60000000e+00 -7.40000000e+00 -7.20000000e+00 -7.00000000e+00
-6.80000000e+00 -6.60000000e+00 -6.40000000e+00 -6.20000000e+00
-6.00000000e+00 -5.80000000e+00 -5.60000000e+00 -5.40000000e+00
-5.20000000e+00 -5.00000000e+00 -4.80000000e+00 -4.60000000e+00
-4.40000000e+00 -4.20000000e+00 -4.00000000e+00 -3.80000000e+00
-3.60000000e+00 -3.40000000e+00 -3.20000000e+00 -3.00000000e+00
-2.80000000e+00 -2.60000000e+00 -2.40000000e+00 -2.20000000e+00
-2.00000000e+00 -1.80000000e+00 -1.60000000e+00 -1.40000000e+00
-1.20000000e+00 -1.00000000e+00 -8.00000000e-01 -6.00000000e-01
-4.00000000e-01 -2.00000000e-01 5.55111512e-16 2.00000000e-01
4.00000000e-01 6.00000000e-01 8.00000000e-01 1.00000000e+00
1.20000000e+00 1.40000000e+00 1.60000000e+00 1.80000000e+00
2.00000000e+00 2.20000000e+00 2.40000000e+00 2.60000000e+00
2.80000000e+00 3.00000000e+00 3.20000000e+00 3.40000000e+00
3.60000000e+00 3.80000000e+00 4.00000000e+00 4.20000000e+00
4.40000000e+00 4.60000000e+00 4.80000000e+00 5.00000000e+00
5.20000000e+00 5.40000000e+00 5.60000000e+00 5.80000000e+00
6.00000000e+00 6.20000000e+00 6.40000000e+00 6.60000000e+00
6.80000000e+00 7.00000000e+00 7.20000000e+00 7.40000000e+00
7.60000000e+00 7.80000000e+00 8.00000000e+00 8.20000000e+00
8.40000000e+00 8.60000000e+00 8.80000000e+00 9.00000000e+00
9.20000000e+00 9.40000000e+00 9.60000000e+00 9.80000000e+00
1.00000000e+01]
Counts and bin edges from np.histogram:
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 3 6 6 15 17 29 51 91 152
185 294 360 428 531 660 683 756 814 805 718 678 657 509 430 364 258 183
118 66 68 20 23 10 6 5 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[-1.00000000e+01 -9.80000000e+00 -9.60000000e+00 -9.40000000e+00
-9.20000000e+00 -9.00000000e+00 -8.80000000e+00 -8.60000000e+00
-8.40000000e+00 -8.20000000e+00 -8.00000000e+00 -7.80000000e+00
-7.60000000e+00 -7.40000000e+00 -7.20000000e+00 -7.00000000e+00
-6.80000000e+00 -6.60000000e+00 -6.40000000e+00 -6.20000000e+00
-6.00000000e+00 -5.80000000e+00 -5.60000000e+00 -5.40000000e+00
-5.20000000e+00 -5.00000000e+00 -4.80000000e+00 -4.60000000e+00
-4.40000000e+00 -4.20000000e+00 -4.00000000e+00 -3.80000000e+00
-3.60000000e+00 -3.40000000e+00 -3.20000000e+00 -3.00000000e+00
-2.80000000e+00 -2.60000000e+00 -2.40000000e+00 -2.20000000e+00
-2.00000000e+00 -1.80000000e+00 -1.60000000e+00 -1.40000000e+00
-1.20000000e+00 -1.00000000e+00 -8.00000000e-01 -6.00000000e-01
-4.00000000e-01 -2.00000000e-01 5.55111512e-16 2.00000000e-01
4.00000000e-01 6.00000000e-01 8.00000000e-01 1.00000000e+00
1.20000000e+00 1.40000000e+00 1.60000000e+00 1.80000000e+00
2.00000000e+00 2.20000000e+00 2.40000000e+00 2.60000000e+00
2.80000000e+00 3.00000000e+00 3.20000000e+00 3.40000000e+00
3.60000000e+00 3.80000000e+00 4.00000000e+00 4.20000000e+00
4.40000000e+00 4.60000000e+00 4.80000000e+00 5.00000000e+00
5.20000000e+00 5.40000000e+00 5.60000000e+00 5.80000000e+00
6.00000000e+00 6.20000000e+00 6.40000000e+00 6.60000000e+00
6.80000000e+00 7.00000000e+00 7.20000000e+00 7.40000000e+00
7.60000000e+00 7.80000000e+00 8.00000000e+00 8.20000000e+00
8.40000000e+00 8.60000000e+00 8.80000000e+00 9.00000000e+00
9.20000000e+00 9.40000000e+00 9.60000000e+00 9.80000000e+00
1.00000000e+01]
RooDataHist imported with default binning and exported back to numpy:
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 3 6 6 15 17 29 51 91 152
185 294 360 428 531 660 683 756 814 805 718 678 657 509 430 364 258 183
118 66 68 20 23 10 6 5 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0]
[-1.00000000e+01 -9.80000000e+00 -9.60000000e+00 -9.40000000e+00
-9.20000000e+00 -9.00000000e+00 -8.80000000e+00 -8.60000000e+00
-8.40000000e+00 -8.20000000e+00 -8.00000000e+00 -7.80000000e+00
-7.60000000e+00 -7.40000000e+00 -7.20000000e+00 -7.00000000e+00
-6.80000000e+00 -6.60000000e+00 -6.40000000e+00 -6.20000000e+00
-6.00000000e+00 -5.80000000e+00 -5.60000000e+00 -5.40000000e+00
-5.20000000e+00 -5.00000000e+00 -4.80000000e+00 -4.60000000e+00
-4.40000000e+00 -4.20000000e+00 -4.00000000e+00 -3.80000000e+00
-3.60000000e+00 -3.40000000e+00 -3.20000000e+00 -3.00000000e+00
-2.80000000e+00 -2.60000000e+00 -2.40000000e+00 -2.20000000e+00
-2.00000000e+00 -1.80000000e+00 -1.60000000e+00 -1.40000000e+00
-1.20000000e+00 -1.00000000e+00 -8.00000000e-01 -6.00000000e-01
-4.00000000e-01 -2.00000000e-01 5.55111512e-16 2.00000000e-01
4.00000000e-01 6.00000000e-01 8.00000000e-01 1.00000000e+00
1.20000000e+00 1.40000000e+00 1.60000000e+00 1.80000000e+00
2.00000000e+00 2.20000000e+00 2.40000000e+00 2.60000000e+00
2.80000000e+00 3.00000000e+00 3.20000000e+00 3.40000000e+00
3.60000000e+00 3.80000000e+00 4.00000000e+00 4.20000000e+00
4.40000000e+00 4.60000000e+00 4.80000000e+00 5.00000000e+00
5.20000000e+00 5.40000000e+00 5.60000000e+00 5.80000000e+00
6.00000000e+00 6.20000000e+00 6.40000000e+00 6.60000000e+00
6.80000000e+00 7.00000000e+00 7.20000000e+00 7.40000000e+00
7.60000000e+00 7.80000000e+00 8.00000000e+00 8.20000000e+00
8.40000000e+00 8.60000000e+00 8.80000000e+00 9.00000000e+00
9.20000000e+00 9.40000000e+00 9.60000000e+00 9.80000000e+00
1.00000000e+01]
RooDataHist imported with linspace binning and exported back to numpy:
[ 0 0 0 0 0 16 203 1419 3444 3367 1353 187 11 0
0 0 0 0 0 0]
[-10. -9. -8. -7. -6. -5. -4. -3. -2. -1. 0. 1. 2. 3.
4. 5. 6. 7. 8. 9. 10.]
RooDataHist imported with uniform binning and exported back to numpy:
[ 0 0 0 0 0 16 203 1419 3444 3367 1353 187 11 0
0 0 0 0 0 0]
[-10. -9. -8. -7. -6. -5. -4. -3. -2. -1. 0. 1. 2. 3.
4. 5. 6. 7. 8. 9. 10.]