Logo ROOT   6.18/05
Reference Guide
PDF.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Asen Christov, Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Jan Therhaag, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : PDF *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
20 * *
21 * Copyright (c) 2005-2011: *
22 * CERN, Switzerland *
23 * U. of Victoria, Canada *
24 * MPI-K Heidelberg, Germany *
25 * Freiburg U., Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (http://tmva.sourceforge.net/LICENSE) *
31 **********************************************************************************/
32
33/*! \class TMVA::PDF
34\ingroup TMVA
35PDF wrapper for histograms; uses user-defined spline interpolation.
36*/
37
38#include "TMVA/PDF.h"
39
40#include "TMVA/Configurable.h"
41#include "TMVA/KDEKernel.h"
42#include "TMVA/MsgLogger.h"
43#include "TMVA/Types.h"
44#include "TMVA/Tools.h"
45#include "TMVA/TSpline1.h"
46#include "TMVA/TSpline2.h"
47#include "TMVA/Version.h"
48
49#include "Riostream.h"
50#include "TF1.h"
51#include "TH1F.h"
52#include "TMath.h"
53#include "TVectorD.h"
54
55#include <cstdlib>
56#include <iomanip>
57
58// static configuration settings
61const Double_t TMVA::PDF::fgEpsilon = 1.0e-12;
62
64
65////////////////////////////////////////////////////////////////////////////////
66/// default constructor needed for ROOT I/O
67
69: Configurable (""),
70 fUseHistogram ( kFALSE ),
71 fPDFName ( name ),
72 fNsmooth ( 0 ),
73 fMinNsmooth (-1 ),
74 fMaxNsmooth (-1 ),
75 fNSmoothHist ( 0 ),
76 fInterpolMethod( PDF::kSpline2 ),
77 fSpline ( 0 ),
78 fPDFHist ( 0 ),
79 fHist ( 0 ),
80 fHistOriginal ( 0 ),
81 fGraph ( 0 ),
82 fIGetVal ( 0 ),
83 fHistAvgEvtPerBin ( 0 ),
84 fHistDefinedNBins ( 0 ),
85 fKDEtypeString ( 0 ),
86 fKDEiterString ( 0 ),
87 fBorderMethodString( 0 ),
88 fInterpolateString ( 0 ),
89 fKDEtype ( KDEKernel::kNone ),
90 fKDEiter ( KDEKernel::kNonadaptiveKDE ),
91 fKDEborder ( KDEKernel::kNoTreatment ),
92 fFineFactor ( 0. ),
93 fReadingVersion( 0 ),
94 fCheckHist ( kFALSE ),
95 fNormalize ( norm ),
96 fSuffix ( "" ),
97 fLogger ( 0 )
98{
99 fLogger = new MsgLogger(this);
100 GetThisPdfThreadLocal() = this;
101}
102
103////////////////////////////////////////////////////////////////////////////////
104/// constructor of spline based PDF:
105
107 const TH1 *hist,
109 Int_t minnsmooth,
110 Int_t maxnsmooth,
111 Bool_t checkHist,
112 Bool_t norm) :
113 Configurable (""),
114 fUseHistogram ( kFALSE ),
115 fPDFName ( name ),
116 fMinNsmooth ( minnsmooth ),
117 fMaxNsmooth ( maxnsmooth ),
118 fNSmoothHist ( 0 ),
119 fInterpolMethod( method ),
120 fSpline ( 0 ),
121 fPDFHist ( 0 ),
122 fHist ( 0 ),
123 fHistOriginal ( 0 ),
124 fGraph ( 0 ),
125 fIGetVal ( 0 ),
126 fHistAvgEvtPerBin ( 0 ),
127 fHistDefinedNBins ( 0 ),
128 fKDEtypeString ( 0 ),
129 fKDEiterString ( 0 ),
130 fBorderMethodString( 0 ),
131 fInterpolateString ( 0 ),
132 fKDEtype ( KDEKernel::kNone ),
133 fKDEiter ( KDEKernel::kNonadaptiveKDE ),
134 fKDEborder ( KDEKernel::kNoTreatment ),
135 fFineFactor ( 0. ),
136 fReadingVersion( 0 ),
137 fCheckHist ( checkHist ),
138 fNormalize ( norm ),
139 fSuffix ( "" ),
140 fLogger ( 0 )
141{
142 fLogger = new MsgLogger(this);
143 BuildPDF( hist );
144}
145
146////////////////////////////////////////////////////////////////////////////////
147/// constructor of kernel based PDF:
148
150 const TH1* hist,
154 Float_t FineFactor,
155 Bool_t norm) :
156 Configurable (""),
157 fUseHistogram ( kFALSE ),
158 fPDFName ( name ),
159 fNsmooth ( 0 ),
160 fMinNsmooth (-1 ),
161 fMaxNsmooth (-1 ),
162 fNSmoothHist ( 0 ),
163 fInterpolMethod( PDF::kKDE ),
164 fSpline ( 0 ),
165 fPDFHist ( 0 ),
166 fHist ( 0 ),
167 fHistOriginal ( 0 ),
168 fGraph ( 0 ),
169 fIGetVal ( 0 ),
170 fHistAvgEvtPerBin ( 0 ),
171 fHistDefinedNBins ( 0 ),
172 fKDEtypeString ( 0 ),
173 fKDEiterString ( 0 ),
174 fBorderMethodString( 0 ),
175 fInterpolateString ( 0 ),
176 fKDEtype ( ktype ),
177 fKDEiter ( kiter ),
178 fKDEborder ( kborder ),
179 fFineFactor ( FineFactor),
180 fReadingVersion( 0 ),
181 fCheckHist ( kFALSE ),
182 fNormalize ( norm ),
183 fSuffix ( "" ),
184 fLogger ( 0 )
185{
186 fLogger = new MsgLogger(this);
187 BuildPDF( hist );
188}
189
190////////////////////////////////////////////////////////////////////////////////
191
193 const TString& options,
194 const TString& suffix,
195 PDF* defaultPDF,
196 Bool_t norm) :
197 Configurable (options),
198 fUseHistogram ( kFALSE ),
199 fPDFName ( name ),
200 fNsmooth ( 0 ),
201 fMinNsmooth ( -1 ),
202 fMaxNsmooth ( -1 ),
203 fNSmoothHist ( 0 ),
204 fInterpolMethod( PDF::kSpline0 ),
205 fSpline ( 0 ),
206 fPDFHist ( 0 ),
207 fHist ( 0 ),
208 fHistOriginal ( 0 ),
209 fGraph ( 0 ),
210 fIGetVal ( 0 ),
211 fHistAvgEvtPerBin ( 50 ),
212 fHistDefinedNBins ( 0 ),
213 fKDEtypeString ( "Gauss" ),
214 fKDEiterString ( "Nonadaptive" ),
215 fBorderMethodString( "None" ),
216 fInterpolateString ( "Spline2" ),
217 fKDEtype ( KDEKernel::kNone ),
218 fKDEiter ( KDEKernel::kNonadaptiveKDE ),
219 fKDEborder ( KDEKernel::kNoTreatment ),
220 fFineFactor ( 1. ),
221 fReadingVersion( 0 ),
222 fCheckHist ( kFALSE ),
223 fNormalize ( norm ),
224 fSuffix ( suffix ),
225 fLogger ( 0 )
226{
227 fLogger = new MsgLogger(this);
228 if (defaultPDF != 0) {
229 fNsmooth = defaultPDF->fNsmooth;
230 fMinNsmooth = defaultPDF->fMinNsmooth;
231 fMaxNsmooth = defaultPDF->fMaxNsmooth;
235 fKDEtypeString = defaultPDF->fKDEtypeString;
236 fKDEiterString = defaultPDF->fKDEiterString;
237 fFineFactor = defaultPDF->fFineFactor;
239 fCheckHist = defaultPDF->fCheckHist;
241 }
242}
243
244//___________________fNSmoothHist____________________________________________________
246{
247 // destructor
248 if (fSpline != NULL) delete fSpline;
249 if (fHist != NULL) delete fHist;
250 if (fPDFHist != NULL) delete fPDFHist;
251 if (fHistOriginal != NULL) delete fHistOriginal;
252 if (fIGetVal != NULL) delete fIGetVal;
253 if (fGraph != NULL) delete fGraph;
254 delete fLogger;
255}
256
257////////////////////////////////////////////////////////////////////////////////
258
259void TMVA::PDF::BuildPDF( const TH1* hist )
260{
261 GetThisPdfThreadLocal() = this;
262 // sanity check
263 if (hist == NULL) Log() << kFATAL << "Called without valid histogram pointer!" << Endl;
264
265 // histogram should be non empty
266 if (hist->GetEntries() <= 0)
267 Log() << kFATAL << "Number of entries <= 0 (" << hist->GetEntries() << " in histogram: " << hist->GetTitle() << ")" << Endl;
268
269 if (fInterpolMethod == PDF::kKDE) {
270 Log()<< kDEBUG << "Create "
271 << ((fKDEiter == KDEKernel::kNonadaptiveKDE) ? "nonadaptive " :
272 (fKDEiter == KDEKernel::kAdaptiveKDE) ? "adaptive " : "??? ")
273 << ((fKDEtype == KDEKernel::kGauss) ? "Gauss " : "??? ")
274 << "type KDE kernel for histogram: \"" << hist->GetName() << "\""
275 << Endl;
276 }
277 else {
278 // another sanity check (nsmooth<0 indicated build with KDE)
279 if (fMinNsmooth<0)
280 Log() << kFATAL << "PDF construction called with minnsmooth<0" << Endl;
281 else if (fMaxNsmooth<=0)
282 fMaxNsmooth = fMinNsmooth;
283 else if (fMaxNsmooth<fMinNsmooth)
284 Log() << kFATAL << "PDF construction called with maxnsmooth<minnsmooth" << Endl;
285 }
286
287 fHistOriginal = (TH1F*)hist->Clone( TString(hist->GetName()) + "_original" );
288 fHist = (TH1F*)hist->Clone( TString(hist->GetName()) + "_smoothed" );
289 fHistOriginal->SetTitle( fHistOriginal->GetName() ); // reset to new title as well
290 fHist ->SetTitle( fHist->GetName() );
291
292 // do not store in current target file
293 fHistOriginal->SetDirectory(0);
294 fHist ->SetDirectory(0);
295 fUseHistogram = kFALSE;
296
297 if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
298 else BuildSplinePDF();
299}
300
301////////////////////////////////////////////////////////////////////////////////
302
304{
305 Int_t ResolutionFactor = (fInterpolMethod == PDF::kKDE) ? 5 : 1;
306 if (evtNum == 0 && fHistDefinedNBins == 0)
307 Log() << kFATAL << "No number of bins set for PDF" << Endl;
308 else if (fHistDefinedNBins > 0)
309 return fHistDefinedNBins * ResolutionFactor;
310 else if ( evtNum > 0 && fHistAvgEvtPerBin > 0 )
311 return evtNum / fHistAvgEvtPerBin * ResolutionFactor;
312 else
313 Log() << kFATAL << "No number of bins or average event per bin set for PDF" << fHistAvgEvtPerBin << Endl;
314 return 0;
315}
316
317////////////////////////////////////////////////////////////////////////////////
318/// build the PDF from the original histograms
319
321{
322 // (not useful for discrete distributions, or if no splines are requested)
323 if (fInterpolMethod != PDF::kSpline0 && fCheckHist) CheckHist();
324 // use ROOT TH1 smooth method
325 fNSmoothHist = 0;
326 if (fMaxNsmooth > 0 && fMinNsmooth >=0 ) SmoothHistogram();
327
328 // fill histogramm to graph
329 FillHistToGraph();
330
331 // fGraph->Print();
332 switch (fInterpolMethod) {
333
334 case kSpline0:
335 // use original histogram as reference
336 // this is useful, eg, for discrete variables
337 fUseHistogram = kTRUE;
338 break;
339
340 case kSpline1:
341 fSpline = new TMVA::TSpline1( "spline1", new TGraph(*fGraph) );
342 break;
343
344 case kSpline2:
345 fSpline = new TMVA::TSpline2( "spline2", new TGraph(*fGraph) );
346 break;
347
348 case kSpline3:
349 fSpline = new TSpline3( "spline3", new TGraph(*fGraph) );
350 break;
351
352 case kSpline5:
353 fSpline = new TSpline5( "spline5", new TGraph(*fGraph) );
354 break;
355
356 default:
357 Log() << kWARNING << "No valid interpolation method given! Use Spline2" << Endl;
358 fSpline = new TMVA::TSpline2( "spline2", new TGraph(*fGraph) );
359 Log() << kFATAL << " Well.. .thinking about it, I better quit so you notice you are forced to fix the mistake " << Endl;
360 std::exit(1);
361 }
362 // fill into histogram
363 FillSplineToHist();
364
365 if (!UseHistogram()) {
366 fSpline->SetTitle( (TString)fHist->GetTitle() + fSpline->GetTitle() );
367 fSpline->SetName ( (TString)fHist->GetName() + fSpline->GetName() );
368 }
369
370
371 // sanity check
372 Double_t integral = GetIntegral();
373 if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
374
375 // normalize
376 if (fNormalize)
377 if (integral>0) fPDFHist->Scale( 1.0/integral );
378
379 fPDFHist->SetDirectory(0);
380}
381
382////////////////////////////////////////////////////////////////////////////////
383/// creates high-binned reference histogram to be used instead of the
384/// PDF for speed reasons
385
387{
388 fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
389 fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_KDE" );
390 fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_KDE" );
391
392 // create the kernel object
393 Float_t histoLowEdge = fHist->GetBinLowEdge(1);
394 Float_t histoUpperEdge = fPDFHist->GetBinLowEdge(fPDFHist->GetNbinsX()) + fPDFHist->GetBinWidth(fPDFHist->GetNbinsX());
395 KDEKernel *kern = new TMVA::KDEKernel( fKDEiter,
396 fHist, histoLowEdge, histoUpperEdge,
397 fKDEborder, fFineFactor );
398 kern->SetKernelType(fKDEtype);
399
400 for (Int_t i=1;i<fHist->GetNbinsX();i++) {
401 // loop over the bins of the original histo
402 for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
403 // loop over the bins of the new histo and fill it
404 fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
405 kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
406 fPDFHist->GetBinLowEdge(j+1),
407 fHist->GetBinCenter(i),
408 i)
409 );
410 }
411 if (fKDEborder == 3) { // mirror the samples and fill them again
412 // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
413 // and the last (the higher) 1/5 of the histo to the right.
414 // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
415 if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
416 for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
417 // loop over the bins of the PDF histo and fill it
418 fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
419 kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
420 fPDFHist->GetBinLowEdge(j+1),
421 2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
422 i)
423 );
424 }
425 }
426 if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
427 for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
428 // loop over the bins of the PDF histo and fill it
429 fPDFHist->AddBinContent( j,fHist->GetBinContent(i)*
430 kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
431 fPDFHist->GetBinLowEdge(j+1),
432 2*histoUpperEdge-fHist->GetBinCenter(i), i) );
433 }
434 }
435 }
436 }
437
438 fPDFHist->SetEntries(fHist->GetEntries());
439
440 delete kern;
441
442 // sanity check
443 Double_t integral = GetIntegral();
444 if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
445
446 // normalize
447 if (fNormalize)
448 if (integral>0) fPDFHist->Scale( 1.0/integral );
449 fPDFHist->SetDirectory(0);
450}
451
452////////////////////////////////////////////////////////////////////////////////
453
455{
456 if(fHist->GetNbinsX()==1) return;
457 if (fMaxNsmooth == fMinNsmooth) {
458 fHist->Smooth( fMinNsmooth );
459 return;
460 }
461
462 //calculating Mean, RMS of the relative errors and using them to set
463 //the boundaries of the linear transformation
464 Float_t Err=0, ErrAvg=0, ErrRMS=0 ; Int_t num=0, smooth;
465 for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
466 if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1)) continue;
467 Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
468 ErrAvg += Err; ErrRMS += Err*Err; num++;
469 }
470 ErrAvg /= num;
471 ErrRMS = TMath::Sqrt(ErrRMS/num-ErrAvg*ErrAvg) ;
472
473 //linearly convent the histogram to a vector of smoothnum
474 Float_t MaxErr=ErrAvg+ErrRMS, MinErr=ErrAvg-ErrRMS;
475 fNSmoothHist = new TH1I("","",fHist->GetNbinsX(),0,fHist->GetNbinsX());
476 fNSmoothHist->SetTitle( (TString)fHist->GetTitle() + "_Nsmooth" );
477 fNSmoothHist->SetName ( (TString)fHist->GetName() + "_Nsmooth" );
478 for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
479 if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1))
480 smooth=fMaxNsmooth;
481 else {
482 Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
483 smooth=(Int_t)((Err-MinErr) /(MaxErr-MinErr) * (fMaxNsmooth-fMinNsmooth)) + fMinNsmooth;
484 }
485 smooth = TMath::Max(fMinNsmooth,TMath::Min(fMaxNsmooth,smooth));
486 fNSmoothHist->SetBinContent(bin+1,smooth);
487 }
488
489 //find regions of constant smoothnum, starting from the highest amount of
490 //smoothing. So the last iteration all the histogram will be smoothed as a whole
491 for (Int_t n=fMaxNsmooth; n>=0; n--) {
492 //all the histogram has to be smoothed at least fMinNsmooth
493 if (n <= fMinNsmooth) { fHist->Smooth(); continue; }
494 Int_t MinBin=-1,MaxBin =-1;
495 for (Int_t bin=0; bin < fHist->GetNbinsX(); bin++) {
496 if (fNSmoothHist->GetBinContent(bin+1) >= n) {
497 if (MinBin==-1) MinBin = bin;
498 else MaxBin=bin;
499 }
500 else if (MaxBin >= 0) {
501#if ROOT_VERSION_CODE > ROOT_VERSION(5,19,2)
502 fHist->Smooth(1,"R");
503#else
504 fHist->Smooth(1,MinBin+1,MaxBin+1);
505#endif
506 MinBin=MaxBin=-1;
507 }
508 else // can't smooth a single bin
509 MinBin = -1;
510 }
511 }
512}
513
514////////////////////////////////////////////////////////////////////////////////
515/// Simple conversion
516
518{
519 fGraph=new TGraph(fHist);
520}
521
522////////////////////////////////////////////////////////////////////////////////
523/// creates high-binned reference histogram to be used instead of the
524/// PDF for speed reasons
525
527{
528 if (UseHistogram()) {
529 // no spline given, use the original histogram
530 fPDFHist = (TH1*)fHist->Clone();
531 fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_spline0" );
532 fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_spline0" );
533 }
534 else {
535 // create new reference histogram
536 fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
537 fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_" + fSpline->GetTitle() );
538 fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_" + fSpline->GetTitle() );
539
540 for (Int_t bin=1; bin <= fgNbin_PdfHist; bin++) {
541 Double_t x = fPDFHist->GetBinCenter( bin );
542 Double_t y = fSpline->Eval( x );
543 // sanity correction: in cases where strong slopes exist, accidentally, the
544 // splines can go to zero; in this case we set the corresponding bin content
545 // equal to the bin content of the original histogram
546 if (y <= fgEpsilon) y = fHist->GetBinContent( fHist->FindBin( x ) );
547 fPDFHist->SetBinContent( bin, TMath::Max(y, fgEpsilon) );
548 }
549 }
550 fPDFHist->SetDirectory(0);
551}
552
553////////////////////////////////////////////////////////////////////////////////
554/// sanity check: compare PDF with original histogram
555
557{
558 if (fHist == NULL) {
559 Log() << kFATAL << "<CheckHist> Called without valid histogram pointer!" << Endl;
560 }
561
562 Int_t nbins = fHist->GetNbinsX();
563
564 Int_t emptyBins=0;
565 // count number of empty bins
566 for (Int_t bin=1; bin<=nbins; bin++)
567 if (fHist->GetBinContent(bin) == 0) emptyBins++;
568
569 if (((Float_t)emptyBins/(Float_t)nbins) > 0.5) {
570 Log() << kWARNING << "More than 50% (" << (((Float_t)emptyBins/(Float_t)nbins)*100)
571 <<"%) of the bins in hist '"
572 << fHist->GetName() << "' are empty!" << Endl;
573 Log() << kWARNING << "X_min=" << GetXmin()
574 << " mean=" << fHist->GetMean() << " X_max= " << GetXmax() << Endl;
575 }
576}
577
578////////////////////////////////////////////////////////////////////////////////
579/// comparison of original histogram with reference PDF
580
581void TMVA::PDF::ValidatePDF( TH1* originalHist ) const
582{
583 // if no histogram is given, use the original one (the one the PDF was made from)
584 if (!originalHist) originalHist = fHistOriginal;
585
586 Int_t nbins = originalHist->GetNbinsX();
587
588 // treat errors properly
589 if (originalHist->GetSumw2()->GetSize() == 0) originalHist->Sumw2();
590
591 // ---- first validation: simple(st) possible chi2 test
592 // count number of empty bins
593 Double_t chi2 = 0;
594 Int_t ndof = 0;
595 Int_t nc1 = 0; // deviation counters
596 Int_t nc2 = 0; // deviation counters
597 Int_t nc3 = 0; // deviation counters
598 Int_t nc6 = 0; // deviation counters
599 for (Int_t bin=1; bin<=nbins; bin++) {
600 Double_t x = originalHist->GetBinCenter( bin );
601 Double_t y = originalHist->GetBinContent( bin );
602 Double_t ey = originalHist->GetBinError( bin );
603
604 Int_t binPdfHist = fPDFHist->FindBin( x );
605 if (binPdfHist<0) continue; // happens only if hist-dim>3
606
607 Double_t yref = GetVal( x );
608 Double_t rref = ( originalHist->GetSumOfWeights()/fPDFHist->GetSumOfWeights() *
609 originalHist->GetBinWidth( bin )/fPDFHist->GetBinWidth( binPdfHist ) );
610
611 if (y > 0) {
612 ndof++;
613 Double_t d = TMath::Abs( (y - yref*rref)/ey );
614 // std::cout << "bin: " << bin << " val: " << x << " data(err): " << y << "(" << ey << ") pdf: "
615 // << yref << " dev(chi2): " << d << "(" << chi2 << ") rref: " << rref << std::endl;
616 chi2 += d*d;
617 if (d > 1) { nc1++; if (d > 2) { nc2++; if (d > 3) { nc3++; if (d > 6) nc6++; } } }
618 }
619 }
620
621 Log() << kDEBUG << "Validation result for PDF \"" << originalHist->GetTitle() << "\"" << ": " << Endl;
622 Log() << kDEBUG << Form( " chi2/ndof(!=0) = %.1f/%i = %.2f (Prob = %.2f)",
623 chi2, ndof, chi2/ndof, TMath::Prob( chi2, ndof ) ) << Endl;
624 if ((1.0 - TMath::Prob( chi2, ndof )) > 0.9999994) {
625 Log() << kDEBUG << "Comparison of the original histogram \"" << originalHist->GetTitle() << "\"" << Endl;
626 Log() << kDEBUG << "with the corresponding PDF gave a chi2/ndof of " << chi2/ndof << "," << Endl;
627 Log() << kDEBUG << "which corresponds to a deviation of more than 5 sigma! Please check!" << Endl;
628 }
629 Log() << kDEBUG << Form( " #bins-found(#expected-bins) deviating > [1,2,3,6] sigmas: " \
630 "[%i(%i),%i(%i),%i(%i),%i(%i)]",
631 nc1, Int_t(TMath::Prob(1.0,1)*ndof), nc2, Int_t(TMath::Prob(4.0,1)*ndof),
632 nc3, Int_t(TMath::Prob(9.0,1)*ndof), nc6, Int_t(TMath::Prob(36.0,1)*ndof) ) << Endl;
633}
634
635////////////////////////////////////////////////////////////////////////////////
636/// computes normalisation
637
639{
640 Double_t integral = fPDFHist->GetSumOfWeights();
641 integral *= GetPdfHistBinWidth();
642
643 return integral;
644}
645
646////////////////////////////////////////////////////////////////////////////////
647/// static external auxiliary function (integrand)
648
650{
651 return ThisPDF()->GetVal( x[0] );
652}
653
654////////////////////////////////////////////////////////////////////////////////
655/// computes PDF integral within given ranges
656
658{
659 Double_t integral = 0;
660
661 if (fgManualIntegration) {
662
663 // compute integral by summing over bins
664 Int_t imin = fPDFHist->FindBin(xmin);
665 Int_t imax = fPDFHist->FindBin(xmax);
666 if (imin < 1) imin = 1;
667 if (imax > fPDFHist->GetNbinsX()) imax = fPDFHist->GetNbinsX();
668
669 for (Int_t bini = imin; bini <= imax; bini++) {
670 Float_t dx = fPDFHist->GetBinWidth(bini);
671 // correct for bin fractions in first and last bin
672 if (bini == imin) dx = fPDFHist->GetBinLowEdge(bini+1) - xmin;
673 else if (bini == imax) dx = xmax - fPDFHist->GetBinLowEdge(bini);
674 if (dx < 0 && dx > -1.0e-8) dx = 0; // protect against float->double numerical effects
675 if (dx<0) {
676 Log() << kWARNING
677 << "dx = " << dx << std::endl
678 << "bini = " << bini << std::endl
679 << "xmin = " << xmin << std::endl
680 << "xmax = " << xmax << std::endl
681 << "imin = " << imin << std::endl
682 << "imax = " << imax << std::endl
683 << "low edge of imin" << fPDFHist->GetBinLowEdge(imin) << std::endl
684 << "low edge of imin+1" << fPDFHist->GetBinLowEdge(imin+1) << Endl;
685 Log() << kFATAL << "<GetIntegral> dx = " << dx << " < 0" << Endl;
686 }
687 integral += fPDFHist->GetBinContent(bini)*dx;
688 }
689
690 }
691 else {
692
693 // compute via Gaussian quadrature (C++ version of CERNLIB function DGAUSS)
694 if (fIGetVal == 0) fIGetVal = new TF1( "IGetVal", PDF::IGetVal, GetXmin(), GetXmax(), 0 );
695 integral = fIGetVal->Integral( xmin, xmax );
696 }
697
698 return integral;
699}
700
701////////////////////////////////////////////////////////////////////////////////
702/// returns value PDF(x)
703
705{
706 // check which is filled
707 Int_t bin = fPDFHist->FindBin(x);
708 bin = TMath::Max(bin,1);
709 bin = TMath::Min(bin,fPDFHist->GetNbinsX());
710
711 Double_t retval = 0;
712
713 if (UseHistogram()) {
714 // use directly histogram bins (this is for discrete PDFs)
715 retval = fPDFHist->GetBinContent( bin );
716 }
717 else {
718 // linear interpolation
719 Int_t nextbin = bin;
720 if ((x > fPDFHist->GetBinCenter(bin) && bin != fPDFHist->GetNbinsX()) || bin == 1)
721 nextbin++;
722 else
723 nextbin--;
724
725 // linear interpolation between adjacent bins
726 Double_t dx = fPDFHist->GetBinCenter( bin ) - fPDFHist->GetBinCenter( nextbin );
727 Double_t dy = fPDFHist->GetBinContent( bin ) - fPDFHist->GetBinContent( nextbin );
728 retval = fPDFHist->GetBinContent( bin ) + (x - fPDFHist->GetBinCenter( bin ))*dy/dx;
729 }
730
731 return TMath::Max( retval, fgEpsilon );
732}
733
734////////////////////////////////////////////////////////////////////////////////
735/// returns value \f$ PDF^{-1}(y) \f$
736
737Double_t TMVA::PDF::GetValInverse( Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
738{
739 Int_t lowerBin=0, higherBin=0;
740 Double_t lowerBinValue=0, higherBinValue=0;
741 FindBinInverse(fPDFHist,lowerBin,higherBin,lowerBinValue,higherBinValue,y,isMonotonouslyIncreasingFunction);
742
743 Double_t xValueLowerBin =fPDFHist->GetBinCenter (lowerBin);
744 Double_t xValueHigherBin=fPDFHist->GetBinCenter (higherBin);
745
746 Double_t length =(higherBinValue-lowerBinValue);
747 Double_t fraction=lowerBinValue;
748 if (length>1.0e-10)
749 fraction=(y-lowerBinValue)/length;
750
751 Double_t lengthX =xValueHigherBin-xValueLowerBin;
752 Double_t x =xValueLowerBin+lengthX*fraction;
753
754 // comparison for test purposes
755 // std::cout << "lb " << lowerBin << " hb " << higherBin << " lbv " << lowerBinValue << " hbv " << higherBinValue << " frac " << fraction << std::endl;
756 // std::cout << "y " << y << " inv x " << x << " straight y " << GetVal(x) << std::endl;
757
758 return x;
759}
760
761////////////////////////////////////////////////////////////////////////////////
762/// find bin from value on ordinate
763
764void TMVA::PDF::FindBinInverse( const TH1* histogram, Int_t& lowerBin, Int_t& higherBin, Double_t& lowerBinValue, Double_t& higherBinValue,
765 Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
766{
767 if (isMonotonouslyIncreasingFunction) {
768 higherBin=histogram->GetNbinsX();
769 lowerBin =0;
770
771 Int_t bin=higherBin/2;
772
773 while (bin>lowerBin && bin<higherBin) {
774 Double_t binContent=histogram->GetBinContent(bin);
775
776 if (y<binContent) {
777 higherBin =bin;
778 higherBinValue=binContent;
779 }
780 else if (y>=binContent){
781 lowerBin =bin;
782 lowerBinValue =binContent;
783 }
784 bin=lowerBin+(higherBin-lowerBin)/2;
785 }
786 return;
787 }
788 // search sequentially
789 for (Int_t bin=0, binEnd=histogram->GetNbinsX(); bin<binEnd; ++bin) {
790 Double_t binContent=histogram->GetBinContent(bin);
791 if (binContent<y) {
792 lowerBin =bin;
793 higherBin=bin;
794 lowerBinValue =binContent;
795 higherBinValue=binContent;
796 }
797 else {
798 higherBin=bin;
799 higherBinValue=binContent;
800 break;
801 }
802 }
803}
804
805
806////////////////////////////////////////////////////////////////////////////////
807/// define the options (their key words) that can be set in the option string
808///
809/// know options:
810///
811/// PDFInterpol[ivar] <string> Spline0, Spline1, Spline2 <default>, Spline3, Spline5, KDE used to interpolate reference histograms
812/// if no variable index is given, it is valid for ALL the variables
813///
814/// - NSmooth <int> how often the input histos are smoothed
815/// - MinNSmooth <int> min number of smoothing iterations, for bins with most data
816/// - MaxNSmooth <int> max number of smoothing iterations, for bins with least data
817/// - NAvEvtPerBin <int> minimum average number of events per PDF bin
818/// - TransformOutput <bool> transform (often strongly peaked) likelihood output through sigmoid inversion
819/// - fKDEtype <KernelType> type of the Kernel to use (1 is Gaussian)
820/// - fKDEiter <KerneIter> number of iterations (1 --> "static KDE", 2 --> "adaptive KDE")
821/// - fBorderMethod <KernelBorder> the method to take care about "border" effects (1=no treatment , 2=kernel renormalization, 3=sample mirroring)
822
824{
825 DeclareOptionRef( fNsmooth, Form("NSmooth%s",fSuffix.Data()),
826 "Number of smoothing iterations for the input histograms" );
827 DeclareOptionRef( fMinNsmooth, Form("MinNSmooth%s",fSuffix.Data()),
828 "Min number of smoothing iterations, for bins with most data" );
829
830 DeclareOptionRef( fMaxNsmooth, Form("MaxNSmooth%s",fSuffix.Data()),
831 "Max number of smoothing iterations, for bins with least data" );
832
833 DeclareOptionRef( fHistAvgEvtPerBin, Form("NAvEvtPerBin%s",fSuffix.Data()),
834 "Average number of events per PDF bin" );
835
836 DeclareOptionRef( fHistDefinedNBins, Form("Nbins%s",fSuffix.Data()),
837 "Defined number of bins for the histogram from which the PDF is created" );
838
839 DeclareOptionRef( fCheckHist, Form("CheckHist%s",fSuffix.Data()),
840 "Whether or not to check the source histogram of the PDF" );
841
842 DeclareOptionRef( fInterpolateString, Form("PDFInterpol%s",fSuffix.Data()),
843 "Interpolation method for reference histograms (e.g. Spline2 or KDE)" );
844 AddPreDefVal(TString("Spline0")); // take histogram
845 AddPreDefVal(TString("Spline1")); // linear interpolation between bins
846 AddPreDefVal(TString("Spline2")); // quadratic interpolation
847 AddPreDefVal(TString("Spline3")); // cubic interpolation
848 AddPreDefVal(TString("Spline5")); // fifth order polynom interpolation
849 AddPreDefVal(TString("KDE")); // use kernel density estimator
850
851 DeclareOptionRef( fKDEtypeString, Form("KDEtype%s",fSuffix.Data()), "KDE kernel type (1=Gauss)" );
852 AddPreDefVal(TString("Gauss"));
853
854 DeclareOptionRef( fKDEiterString, Form("KDEiter%s",fSuffix.Data()), "Number of iterations (1=non-adaptive, 2=adaptive)" );
855 AddPreDefVal(TString("Nonadaptive"));
856 AddPreDefVal(TString("Adaptive"));
857
858 DeclareOptionRef( fFineFactor , Form("KDEFineFactor%s",fSuffix.Data()),
859 "Fine tuning factor for Adaptive KDE: Factor to multiply the width of the kernel");
860
861 DeclareOptionRef( fBorderMethodString, Form("KDEborder%s",fSuffix.Data()),
862 "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
863 AddPreDefVal(TString("None"));
864 AddPreDefVal(TString("Renorm"));
865 AddPreDefVal(TString("Mirror"));
866
867 SetConfigName( GetName() );
868 SetConfigDescription( "Configuration options for the PDF class" );
869}
870
871////////////////////////////////////////////////////////////////////////////////
872
874{
875 if (fNsmooth < 0) fNsmooth = 0; // set back to default
876
877 if (fMaxNsmooth < 0 || fMinNsmooth < 0) { // use "Nsmooth" variable
878 fMinNsmooth = fMaxNsmooth = fNsmooth;
879 }
880
881 if (fMaxNsmooth < fMinNsmooth && fMinNsmooth >= 0) { // sanity check
882 Log() << kFATAL << "ERROR: MaxNsmooth = "
883 << fMaxNsmooth << " < MinNsmooth = " << fMinNsmooth << Endl;
884 }
885
886 if (fMaxNsmooth < 0 || fMinNsmooth < 0) {
887 Log() << kFATAL << "ERROR: MaxNsmooth = "
888 << fMaxNsmooth << " or MinNsmooth = " << fMinNsmooth << " smaller than zero" << Endl;
889 }
890
891 //processing the options
892 if (fInterpolateString == "Spline0") fInterpolMethod = TMVA::PDF::kSpline0;
893 else if (fInterpolateString == "Spline1") fInterpolMethod = TMVA::PDF::kSpline1;
894 else if (fInterpolateString == "Spline2") fInterpolMethod = TMVA::PDF::kSpline2;
895 else if (fInterpolateString == "Spline3") fInterpolMethod = TMVA::PDF::kSpline3;
896 else if (fInterpolateString == "Spline5") fInterpolMethod = TMVA::PDF::kSpline5;
897 else if (fInterpolateString == "KDE" ) fInterpolMethod = TMVA::PDF::kKDE;
898 else if (fInterpolateString != "" ) {
899 Log() << kFATAL << "unknown setting for option 'InterpolateMethod': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
900 }
901
902 // init KDE options
903 if (fKDEtypeString == "Gauss" ) fKDEtype = KDEKernel::kGauss;
904 else if (fKDEtypeString != "" )
905 Log() << kFATAL << "unknown setting for option 'KDEtype': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
906 if (fKDEiterString == "Nonadaptive") fKDEiter = KDEKernel::kNonadaptiveKDE;
907 else if (fKDEiterString == "Adaptive" ) fKDEiter = KDEKernel::kAdaptiveKDE;
908 else if (fKDEiterString != "" )// nothing more known
909 Log() << kFATAL << "unknown setting for option 'KDEiter': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
910
911 if ( fBorderMethodString == "None" ) fKDEborder= KDEKernel::kNoTreatment;
912 else if ( fBorderMethodString == "Renorm" ) fKDEborder= KDEKernel::kKernelRenorm;
913 else if ( fBorderMethodString == "Mirror" ) fKDEborder= KDEKernel::kSampleMirror;
914 else if ( fKDEiterString != "" ) { // nothing more known
915 Log() << kFATAL << "unknown setting for option 'KDEBorder': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
916 }
917}
918
919////////////////////////////////////////////////////////////////////////////////
920/// XML file writing
921
922void TMVA::PDF::AddXMLTo( void* parent )
923{
924 void* pdfxml = gTools().AddChild(parent, "PDF");
925 gTools().AddAttr(pdfxml, "Name", fPDFName );
926 gTools().AddAttr(pdfxml, "MinNSmooth", fMinNsmooth );
927 gTools().AddAttr(pdfxml, "MaxNSmooth", fMaxNsmooth );
928 gTools().AddAttr(pdfxml, "InterpolMethod", fInterpolMethod );
929 gTools().AddAttr(pdfxml, "KDE_type", fKDEtype );
930 gTools().AddAttr(pdfxml, "KDE_iter", fKDEiter );
931 gTools().AddAttr(pdfxml, "KDE_border", fKDEborder );
932 gTools().AddAttr(pdfxml, "KDE_finefactor", fFineFactor );
933 void* pdfhist = gTools().AddChild(pdfxml,"Histogram" );
934 TH1* histToWrite = GetOriginalHist();
935 Bool_t hasEquidistantBinning = gTools().HistoHasEquidistantBins(*histToWrite);
936 gTools().AddAttr(pdfhist, "Name", histToWrite->GetName() );
937 gTools().AddAttr(pdfhist, "NBins", histToWrite->GetNbinsX() );
938 gTools().AddAttr(pdfhist, "XMin", histToWrite->GetXaxis()->GetXmin() );
939 gTools().AddAttr(pdfhist, "XMax", histToWrite->GetXaxis()->GetXmax() );
940 gTools().AddAttr(pdfhist, "HasEquidistantBins", hasEquidistantBinning );
941
942 TString bincontent("");
943 for (Int_t i=0; i<histToWrite->GetNbinsX(); i++) {
944 bincontent += gTools().StringFromDouble(histToWrite->GetBinContent(i+1));
945 bincontent += " ";
946 }
947 gTools().AddRawLine(pdfhist, bincontent );
948
949 if (!hasEquidistantBinning) {
950 void* pdfhistbins = gTools().AddChild(pdfxml,"HistogramBinning" );
951 gTools().AddAttr(pdfhistbins, "NBins", histToWrite->GetNbinsX() );
952 TString binns("");
953 for (Int_t i=1; i<=histToWrite->GetNbinsX()+1; i++) {
954 binns += gTools().StringFromDouble(histToWrite->GetXaxis()->GetBinLowEdge(i));
955 binns += " ";
956 }
957 gTools().AddRawLine(pdfhistbins, binns );
958 }
959}
960
961////////////////////////////////////////////////////////////////////////////////
962/// XML file reading
963
964void TMVA::PDF::ReadXML( void* pdfnode )
965{
966 UInt_t enumint;
967
968 gTools().ReadAttr(pdfnode, "MinNSmooth", fMinNsmooth );
969 gTools().ReadAttr(pdfnode, "MaxNSmooth", fMaxNsmooth );
970 gTools().ReadAttr(pdfnode, "InterpolMethod", enumint ); fInterpolMethod = EInterpolateMethod(enumint);
971 gTools().ReadAttr(pdfnode, "KDE_type", enumint ); fKDEtype = KDEKernel::EKernelType(enumint);
972 gTools().ReadAttr(pdfnode, "KDE_iter", enumint ); fKDEiter = KDEKernel::EKernelIter(enumint);
973 gTools().ReadAttr(pdfnode, "KDE_border", enumint ); fKDEborder = KDEKernel::EKernelBorder(enumint);
974 gTools().ReadAttr(pdfnode, "KDE_finefactor", fFineFactor );
975
976 TString hname;
977 UInt_t nbins;
979 Bool_t hasEquidistantBinning;
980
981 void* histch = gTools().GetChild(pdfnode);
982 gTools().ReadAttr( histch, "Name", hname );
983 gTools().ReadAttr( histch, "NBins", nbins );
984 gTools().ReadAttr( histch, "XMin", xmin );
985 gTools().ReadAttr( histch, "XMax", xmax );
986 gTools().ReadAttr( histch, "HasEquidistantBins", hasEquidistantBinning );
987
988 // recreate the original hist
989 TH1* newhist = 0;
990 if (hasEquidistantBinning) {
991 newhist = new TH1F( hname, hname, nbins, xmin, xmax );
992 newhist->SetDirectory(0);
993 const char* content = gTools().GetContent(histch);
994 std::stringstream s(content);
995 Double_t val;
996 for (UInt_t i=0; i<nbins; i++) {
997 s >> val;
998 newhist->SetBinContent(i+1,val);
999 }
1000 }
1001 else {
1002 const char* content = gTools().GetContent(histch);
1003 std::stringstream s(content);
1004 Double_t val;
1005 void* binch = gTools().GetNextChild(histch);
1006 UInt_t nbinning;
1007 gTools().ReadAttr( binch, "NBins", nbinning );
1008 TVectorD binns(nbinning+1);
1009 if (nbinning != nbins) {
1010 Log() << kFATAL << "Number of bins in content and binning array differs"<<Endl;
1011 }
1012 const char* binString = gTools().GetContent(binch);
1013 std::stringstream sb(binString);
1014 for (UInt_t i=0; i<=nbins; i++) sb >> binns[i];
1015 newhist = new TH1F( hname, hname, nbins, binns.GetMatrixArray() );
1016 newhist->SetDirectory(0);
1017 for (UInt_t i=0; i<nbins; i++) {
1018 s >> val;
1019 newhist->SetBinContent(i+1,val);
1020 }
1021 }
1022
1023 TString hnameSmooth = hname;
1024 hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1025
1026 if (fHistOriginal != 0) delete fHistOriginal;
1027 fHistOriginal = newhist;
1028 fHist = (TH1F*)fHistOriginal->Clone( hnameSmooth );
1029 fHist->SetTitle( hnameSmooth );
1030 fHist->SetDirectory(0);
1031
1032 if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
1033 else BuildSplinePDF();
1034}
1035
1036////////////////////////////////////////////////////////////////////////////////
1037/// write the pdf
1038
1039std::ostream& TMVA::operator<< ( std::ostream& os, const PDF& pdf )
1040{
1041 Int_t dp = os.precision();
1042 os << "MinNSmooth " << pdf.fMinNsmooth << std::endl;
1043 os << "MaxNSmooth " << pdf.fMaxNsmooth << std::endl;
1044 os << "InterpolMethod " << pdf.fInterpolMethod << std::endl;
1045 os << "KDE_type " << pdf.fKDEtype << std::endl;
1046 os << "KDE_iter " << pdf.fKDEiter << std::endl;
1047 os << "KDE_border " << pdf.fKDEborder << std::endl;
1048 os << "KDE_finefactor " << pdf.fFineFactor << std::endl;
1049
1050 TH1* histToWrite = pdf.GetOriginalHist();
1051
1052 const Int_t nBins = histToWrite->GetNbinsX();
1053
1054 // note: this is a schema change introduced for v3.7.3
1055 os << "Histogram "
1056 << histToWrite->GetName()
1057 << " " << nBins // nbins
1058 << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmin() // x_min
1059 << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmax() // x_max
1060 << std::endl;
1061
1062 // write the smoothed hist
1063 os << "Weights " << std::endl;
1064 os << std::setprecision(8);
1065 for (Int_t i=0; i<nBins; i++) {
1066 os << std::setw(15) << std::left << histToWrite->GetBinContent(i+1) << std::right << " ";
1067 if ((i+1)%5==0) os << std::endl;
1068 }
1069
1070 os << std::setprecision(dp);
1071 return os; // Return the output stream.
1072}
1073
1074////////////////////////////////////////////////////////////////////////////////
1075/// read the tree from an std::istream
1076
1077std::istream& TMVA::operator>> ( std::istream& istr, PDF& pdf )
1078{
1079 TString devnullS;
1080 Int_t valI;
1081 Int_t nbins=-1; // default binning will cause an exit
1082 Float_t xmin=-1., xmax=-1.;
1083 TString hname="_original";
1084 Bool_t doneReading = kFALSE;
1085 while (!doneReading) {
1086 istr >> devnullS;
1087 if (devnullS=="NSmooth")
1088 {istr >> pdf.fMinNsmooth; pdf.fMaxNsmooth=pdf.fMinNsmooth;}
1089 else if (devnullS=="MinNSmooth") istr >> pdf.fMinNsmooth;
1090 else if (devnullS=="MaxNSmooth") istr >> pdf.fMaxNsmooth;
1091 // have to do this with strings to be more stable with developing code
1092 else if (devnullS == "InterpolMethod") { istr >> valI; pdf.fInterpolMethod = PDF::EInterpolateMethod(valI);}
1093 else if (devnullS == "KDE_type") { istr >> valI; pdf.fKDEtype = KDEKernel::EKernelType(valI); }
1094 else if (devnullS == "KDE_iter") { istr >> valI; pdf.fKDEiter = KDEKernel::EKernelIter(valI);}
1095 else if (devnullS == "KDE_border") { istr >> valI; pdf.fKDEborder = KDEKernel::EKernelBorder(valI);}
1096 else if (devnullS == "KDE_finefactor") {
1097 istr >> pdf.fFineFactor;
1098 if (pdf.GetReadingVersion() != 0 && pdf.GetReadingVersion() < TMVA_VERSION(3,7,3)) { // here we expect the histogram limits if the version is below 3.7.3. When version == 0, the newest TMVA version is assumed.
1099 // coverity[tainted_data_argument]
1100 istr >> nbins >> xmin >> xmax;
1101 doneReading = kTRUE;
1102 }
1103 }
1104 else if (devnullS == "Histogram") { istr >> hname >> nbins >> xmin >> xmax; }
1105 else if (devnullS == "Weights") { doneReading = kTRUE; }
1106 }
1107
1108 TString hnameSmooth = hname;
1109 hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1110
1111 // recreate the original hist
1112 if (nbins==-1) {
1113 std::cout << "PDF, trying to create a histogram without defined binning"<< std::endl;
1114 std::exit(1);
1115 }
1116 TH1* newhist = new TH1F( hname,hname, nbins, xmin, xmax );
1117 newhist->SetDirectory(0);
1118 Float_t val;
1119 for (Int_t i=0; i<nbins; i++) {
1120 istr >> val;
1121 newhist->SetBinContent(i+1,val);
1122 }
1123
1124 if (pdf.fHistOriginal != 0) delete pdf.fHistOriginal;
1125 pdf.fHistOriginal = newhist;
1126 pdf.fHist = (TH1F*)pdf.fHistOriginal->Clone( hnameSmooth );
1127 pdf.fHist->SetTitle( hnameSmooth );
1128 pdf.fHist->SetDirectory(0);
1129
1130 if (pdf.fMinNsmooth>=0) pdf.BuildSplinePDF();
1131 else {
1132 pdf.fInterpolMethod = PDF::kKDE;
1133 pdf.BuildKDEPDF();
1134 }
1135
1136 return istr;
1137}
1138
1140{
1141 // return global "this" pointer of PDF
1142 return GetThisPdfThreadLocal();
1143}
const Handle_t kNone
Definition: GuiTypes.h:87
#define d(i)
Definition: RSha256.hxx:102
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
char name[80]
Definition: TGX11.cxx:109
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
char * Form(const char *fmt,...)
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
Int_t GetSize() const
Definition: TArray.h:47
Double_t GetXmax() const
Definition: TAxis.h:134
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:504
Double_t GetXmin() const
Definition: TAxis.h:133
1-Dim function class
Definition: TF1.h:211
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
1-D histogram with an int per channel (see TH1 documentation)}
Definition: TH1.h:530
The TH1 histogram class.
Definition: TH1.h:56
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8351
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8554
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition: TH1.cxx:8476
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
TObject * Clone(const char *newname=0) const
Make a complete copy of the underlying object.
Definition: TH1.cxx:2664
virtual Int_t GetNbinsX() const
Definition: TH1.h:292
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8635
virtual Double_t GetEntries() const
Return the current number of entries.
Definition: TH1.cxx:4277
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4882
virtual TArrayD * GetSumw2()
Definition: TH1.h:308
virtual Double_t GetBinWidth(Int_t bin) const
Return bin width for 1D histogram.
Definition: TH1.cxx:8576
virtual Double_t GetSumOfWeights() const
Return the sum of weights excluding under/overflows.
Definition: TH1.cxx:7389
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8433
KDE Kernel for "smoothing" the PDFs.
Definition: KDEKernel.h:50
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:120
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:225
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
static const Bool_t fgManualIntegration
Definition: PDF.h:162
KDEKernel::EKernelType fKDEtype
Definition: PDF.h:188
void FillHistToGraph()
Simple conversion.
Definition: PDF.cxx:517
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:259
KDEKernel::EKernelBorder fKDEborder
Definition: PDF.h:190
static Double_t IGetVal(Double_t *, Double_t *)
static external auxiliary function (integrand)
Definition: PDF.cxx:649
TString fBorderMethodString
Definition: PDF.h:185
Int_t fMaxNsmooth
Definition: PDF.h:169
TMVA::PDF::EInterpolateMethod fInterpolMethod
Definition: PDF.h:172
void SmoothHistogram()
Definition: PDF.cxx:454
Double_t GetValInverse(Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
returns value
Definition: PDF.cxx:737
void CheckHist() const
sanity check: compare PDF with original histogram
Definition: PDF.cxx:556
void FindBinInverse(const TH1 *histogram, Int_t &lowerBin, Int_t &higherBin, Double_t &lowerBinValue, Double_t &higherBinValue, Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
find bin from value on ordinate
Definition: PDF.cxx:764
Int_t fHistDefinedNBins
Definition: PDF.h:181
TString fKDEiterString
Definition: PDF.h:184
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:964
TString fInterpolateString
Definition: PDF.h:186
Int_t fNsmooth
Definition: PDF.h:167
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: PDF.cxx:823
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:922
void BuildSplinePDF()
build the PDF from the original histograms
Definition: PDF.cxx:320
void FillSplineToHist()
creates high-binned reference histogram to be used instead of the PDF for speed reasons
Definition: PDF.cxx:526
void BuildKDEPDF()
creates high-binned reference histogram to be used instead of the PDF for speed reasons
Definition: PDF.cxx:386
Int_t fMinNsmooth
Definition: PDF.h:168
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:303
static const Double_t fgEpsilon
Definition: PDF.h:163
TH1 * GetOriginalHist() const
Definition: PDF.h:94
Float_t fFineFactor
Definition: PDF.h:191
static PDF *& GetThisPdfThreadLocal()
Definition: PDF.h:205
virtual ~PDF()
Definition: PDF.cxx:245
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:581
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:704
Int_t fHistAvgEvtPerBin
Definition: PDF.h:180
Double_t GetIntegral() const
computes normalisation
Definition: PDF.cxx:638
Bool_t fCheckHist
Definition: PDF.h:195
MsgLogger * fLogger
the suffix for options
Definition: PDF.h:199
TString fKDEtypeString
Definition: PDF.h:183
void ProcessOptions()
Definition: PDF.cxx:873
EInterpolateMethod
Definition: PDF.h:70
@ kKDE
Definition: PDF.h:70
@ kSpline1
Definition: PDF.h:70
@ kSpline0
Definition: PDF.h:70
@ kSpline3
Definition: PDF.h:70
@ kSpline2
Definition: PDF.h:70
@ kSpline5
Definition: PDF.h:70
static const Int_t fgNbin_PdfHist
Definition: PDF.h:161
PDF(const TString &name, Bool_t norm=kTRUE)
default constructor needed for ROOT I/O
Definition: PDF.cxx:68
KDEKernel::EKernelIter fKDEiter
Definition: PDF.h:189
static PDF * ThisPDF(void)
Definition: PDF.cxx:1139
Linear interpolation of TGraph.
Definition: TSpline1.h:43
Quadratic interpolation of TGraph.
Definition: TSpline2.h:43
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition: Tools.cxx:1499
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1202
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1245
const char * GetContent(void *node)
XML helpers.
Definition: Tools.cxx:1186
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:48
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Class to create third splines to interpolate knots Arbitrary conditions can be introduced for first a...
Definition: TSpline.h:192
Class to create quintic natural splines to interpolate knots Arbitrary conditions can be introduced f...
Definition: TSpline.h:248
Basic string class.
Definition: TString.h:131
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
Element * GetMatrixArray()
Definition: TVectorT.h:78
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
Double_t ey[n]
Definition: legend1.C:17
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:146
static constexpr double s
std::istream & operator>>(std::istream &istr, BinaryTree &tree)
Tools & gTools()
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Prob(Double_t chi2, Int_t ndf)
Computation of the probability for a certain Chi-squared (chi2) and number of degrees of freedom (ndf...
Definition: TMath.cxx:621
Double_t Log(Double_t x)
Definition: TMath.h:748
Double_t Sqrt(Double_t x)
Definition: TMath.h:679
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
Short_t Abs(Short_t d)
Definition: TMathBase.h:120