ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
PDF.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov, Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : PDF *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
17  * Eckhard von Toerne <evt@physik.uni-bonn.de> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * Freiburg U., Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #include "TMVA/PDF.h"
34 
35 #include "TMVA/KDEKernel.h"
36 #include "TMVA/MsgLogger.h"
37 #include "TMVA/Types.h"
38 #include "TMVA/Tools.h"
39 #include "TMVA/TSpline1.h"
40 #include "TMVA/TSpline2.h"
41 #include "TMVA/Version.h"
42 
43 #include "TMath.h"
44 #include "TF1.h"
45 #include "TH1F.h"
46 #include "TVectorD.h"
47 #include "Riostream.h"
48 
49 #include <iomanip>
50 #include <cstdlib>
51 
52 // static configuration settings
53 const Int_t TMVA::PDF::fgNbin_PdfHist = 10000;
55 const Double_t TMVA::PDF::fgEpsilon = 1.0e-12;
56 
58 
59 ////////////////////////////////////////////////////////////////////////////////
60 /// default constructor needed for ROOT I/O
61 
62 TMVA::PDF::PDF( const TString& name, Bool_t norm )
63  : Configurable (""),
64  fUseHistogram ( kFALSE ),
65  fPDFName ( name ),
66  fNsmooth ( 0 ),
67  fMinNsmooth (-1 ),
68  fMaxNsmooth (-1 ),
69  fNSmoothHist ( 0 ),
70  fInterpolMethod( PDF::kSpline2 ),
71  fSpline ( 0 ),
72  fPDFHist ( 0 ),
73  fHist ( 0 ),
74  fHistOriginal ( 0 ),
75  fGraph ( 0 ),
76  fIGetVal ( 0 ),
77  fHistAvgEvtPerBin ( 0 ),
78  fHistDefinedNBins ( 0 ),
79  fKDEtypeString ( 0 ),
80  fKDEiterString ( 0 ),
81  fBorderMethodString( 0 ),
82  fInterpolateString ( 0 ),
83  fKDEtype ( KDEKernel::kNone ),
84  fKDEiter ( KDEKernel::kNonadaptiveKDE ),
85  fKDEborder ( KDEKernel::kNoTreatment ),
86  fFineFactor ( 0. ),
87  fReadingVersion( 0 ),
88  fCheckHist ( kFALSE ),
89  fNormalize ( norm ),
90  fSuffix ( "" ),
91  fLogger ( 0 )
92 {
93  fLogger = new MsgLogger(this);
94  GetThisPdfThreadLocal() = this;
95 }
96 
97 ////////////////////////////////////////////////////////////////////////////////
98 /// constructor of spline based PDF:
99 
101  const TH1 *hist,
103  Int_t minnsmooth,
104  Int_t maxnsmooth,
105  Bool_t checkHist,
106  Bool_t norm) :
107  Configurable (""),
108  fUseHistogram ( kFALSE ),
109  fPDFName ( name ),
110  fMinNsmooth ( minnsmooth ),
111  fMaxNsmooth ( maxnsmooth ),
112  fNSmoothHist ( 0 ),
113  fInterpolMethod( method ),
114  fSpline ( 0 ),
115  fPDFHist ( 0 ),
116  fHist ( 0 ),
117  fHistOriginal ( 0 ),
118  fGraph ( 0 ),
119  fIGetVal ( 0 ),
120  fHistAvgEvtPerBin ( 0 ),
121  fHistDefinedNBins ( 0 ),
122  fKDEtypeString ( 0 ),
123  fKDEiterString ( 0 ),
124  fBorderMethodString( 0 ),
125  fInterpolateString ( 0 ),
126  fKDEtype ( KDEKernel::kNone ),
127  fKDEiter ( KDEKernel::kNonadaptiveKDE ),
128  fKDEborder ( KDEKernel::kNoTreatment ),
129  fFineFactor ( 0. ),
130  fReadingVersion( 0 ),
131  fCheckHist ( checkHist ),
132  fNormalize ( norm ),
133  fSuffix ( "" ),
134  fLogger ( 0 )
135 {
136  fLogger = new MsgLogger(this);
137  BuildPDF( hist );
138 }
139 
140 ////////////////////////////////////////////////////////////////////////////////
141 /// constructor of kernel based PDF:
142 
144  const TH1* hist,
147  KDEKernel::EKernelBorder kborder,
148  Float_t FineFactor,
149  Bool_t norm) :
150  Configurable (""),
151  fUseHistogram ( kFALSE ),
152  fPDFName ( name ),
153  fNsmooth ( 0 ),
154  fMinNsmooth (-1 ),
155  fMaxNsmooth (-1 ),
156  fNSmoothHist ( 0 ),
157  fInterpolMethod( PDF::kKDE ),
158  fSpline ( 0 ),
159  fPDFHist ( 0 ),
160  fHist ( 0 ),
161  fHistOriginal ( 0 ),
162  fGraph ( 0 ),
163  fIGetVal ( 0 ),
164  fHistAvgEvtPerBin ( 0 ),
165  fHistDefinedNBins ( 0 ),
166  fKDEtypeString ( 0 ),
167  fKDEiterString ( 0 ),
168  fBorderMethodString( 0 ),
169  fInterpolateString ( 0 ),
170  fKDEtype ( ktype ),
171  fKDEiter ( kiter ),
172  fKDEborder ( kborder ),
173  fFineFactor ( FineFactor),
174  fReadingVersion( 0 ),
175  fCheckHist ( kFALSE ),
176  fNormalize ( norm ),
177  fSuffix ( "" ),
178  fLogger ( 0 )
179 {
180  fLogger = new MsgLogger(this);
181  BuildPDF( hist );
182 }
183 
184 ////////////////////////////////////////////////////////////////////////////////
185 
187  const TString& options,
188  const TString& suffix,
189  PDF* defaultPDF,
190  Bool_t norm) :
191  Configurable (options),
192  fUseHistogram ( kFALSE ),
193  fPDFName ( name ),
194  fNsmooth ( 0 ),
195  fMinNsmooth ( -1 ),
196  fMaxNsmooth ( -1 ),
197  fNSmoothHist ( 0 ),
198  fInterpolMethod( PDF::kSpline0 ),
199  fSpline ( 0 ),
200  fPDFHist ( 0 ),
201  fHist ( 0 ),
202  fHistOriginal ( 0 ),
203  fGraph ( 0 ),
204  fIGetVal ( 0 ),
205  fHistAvgEvtPerBin ( 50 ),
206  fHistDefinedNBins ( 0 ),
207  fKDEtypeString ( "Gauss" ),
208  fKDEiterString ( "Nonadaptive" ),
209  fBorderMethodString( "None" ),
210  fInterpolateString ( "Spline2" ),
211  fKDEtype ( KDEKernel::kNone ),
212  fKDEiter ( KDEKernel::kNonadaptiveKDE ),
213  fKDEborder ( KDEKernel::kNoTreatment ),
214  fFineFactor ( 1. ),
215  fReadingVersion( 0 ),
216  fCheckHist ( kFALSE ),
217  fNormalize ( norm ),
218  fSuffix ( suffix ),
219  fLogger ( 0 )
220 {
221  fLogger = new MsgLogger(this);
222  if (defaultPDF != 0) {
223  fNsmooth = defaultPDF->fNsmooth;
224  fMinNsmooth = defaultPDF->fMinNsmooth;
225  fMaxNsmooth = defaultPDF->fMaxNsmooth;
226  fHistAvgEvtPerBin = defaultPDF->fHistAvgEvtPerBin;
227  fHistAvgEvtPerBin = defaultPDF->fHistAvgEvtPerBin;
229  fKDEtypeString = defaultPDF->fKDEtypeString;
230  fKDEiterString = defaultPDF->fKDEiterString;
231  fFineFactor = defaultPDF->fFineFactor;
233  fCheckHist = defaultPDF->fCheckHist;
234  fHistDefinedNBins = defaultPDF->fHistDefinedNBins;
235  }
236 }
237 
238 //___________________fNSmoothHist____________________________________________________
240 {
241  // destructor
242  if (fSpline != NULL) delete fSpline;
243  if (fHist != NULL) delete fHist;
244  if (fPDFHist != NULL) delete fPDFHist;
245  if (fHistOriginal != NULL) delete fHistOriginal;
246  if (fIGetVal != NULL) delete fIGetVal;
247  if (fGraph != NULL) delete fGraph;
248  delete fLogger;
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 
253 void TMVA::PDF::BuildPDF( const TH1* hist )
254 {
255  GetThisPdfThreadLocal() = this;
256  // sanity check
257  if (hist == NULL) Log() << kFATAL << "Called without valid histogram pointer!" << Endl;
258 
259  // histogram should be non empty
260  if (hist->GetEntries() <= 0)
261  Log() << kFATAL << "Number of entries <= 0 (" << hist->GetEntries() << " in histogram: " << hist->GetTitle() << Endl;
262 
263  if (fInterpolMethod == PDF::kKDE) {
264  Log() << "Create "
265  << ((fKDEiter == KDEKernel::kNonadaptiveKDE) ? "nonadaptive " :
266  (fKDEiter == KDEKernel::kAdaptiveKDE) ? "adaptive " : "??? ")
267  << ((fKDEtype == KDEKernel::kGauss) ? "Gauss " : "??? ")
268  << "type KDE kernel for histogram: \"" << hist->GetName() << "\""
269  << Endl;
270  }
271  else {
272  // another sanity check (nsmooth<0 indicated build with KDE)
273  if (fMinNsmooth<0)
274  Log() << kFATAL << "PDF construction called with minnsmooth<0" << Endl;
275  else if (fMaxNsmooth<=0)
276  fMaxNsmooth = fMinNsmooth;
277  else if (fMaxNsmooth<fMinNsmooth)
278  Log() << kFATAL << "PDF construction called with maxnsmooth<minnsmooth" << Endl;
279  }
280 
281  fHistOriginal = (TH1F*)hist->Clone( TString(hist->GetName()) + "_original" );
282  fHist = (TH1F*)hist->Clone( TString(hist->GetName()) + "_smoothed" );
283  fHistOriginal->SetTitle( fHistOriginal->GetName() ); // reset to new title as well
284  fHist ->SetTitle( fHist->GetName() );
285 
286  // do not store in current target file
287  fHistOriginal->SetDirectory(0);
288  fHist ->SetDirectory(0);
289  fUseHistogram = kFALSE;
290 
291  if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
292  else BuildSplinePDF();
293 }
294 
295 ////////////////////////////////////////////////////////////////////////////////
296 
298 {
299  Int_t ResolutionFactor = (fInterpolMethod == PDF::kKDE) ? 5 : 1;
300  if (evtNum == 0 && fHistDefinedNBins == 0)
301  Log() << kFATAL << "No number of bins set for PDF" << Endl;
302  else if (fHistDefinedNBins > 0)
303  return fHistDefinedNBins * ResolutionFactor;
304  else if ( evtNum > 0 && fHistAvgEvtPerBin > 0 )
305  return evtNum / fHistAvgEvtPerBin * ResolutionFactor;
306  else
307  Log() << kFATAL << "No number of bins or average event per bin set for PDF" << fHistAvgEvtPerBin << Endl;
308  return 0;
309 }
310 
311 ////////////////////////////////////////////////////////////////////////////////
312 /// build the PDF from the original histograms
313 
315 {
316  // (not useful for discrete distributions, or if no splines are requested)
317  if (fInterpolMethod != PDF::kSpline0 && fCheckHist) CheckHist();
318  // use ROOT TH1 smooth method
319  fNSmoothHist = 0;
320  if (fMaxNsmooth > 0 && fMinNsmooth >=0 ) SmoothHistogram();
321 
322  // fill histogramm to graph
323  FillHistToGraph();
324 
325  // fGraph->Print();
326  switch (fInterpolMethod) {
327 
328  case kSpline0:
329  // use original histogram as reference
330  // this is useful, eg, for discrete variables
331  fUseHistogram = kTRUE;
332  break;
333 
334  case kSpline1:
335  fSpline = new TMVA::TSpline1( "spline1", new TGraph(*fGraph) );
336  break;
337 
338  case kSpline2:
339  fSpline = new TMVA::TSpline2( "spline2", new TGraph(*fGraph) );
340  break;
341 
342  case kSpline3:
343  fSpline = new TSpline3( "spline3", new TGraph(*fGraph) );
344  break;
345 
346  case kSpline5:
347  fSpline = new TSpline5( "spline5", new TGraph(*fGraph) );
348  break;
349 
350  default:
351  Log() << kWARNING << "No valid interpolation method given! Use Spline2" << Endl;
352  fSpline = new TMVA::TSpline2( "spline2", new TGraph(*fGraph) );
353  Log() << kFATAL << " Well.. .thinking about it, I better quit so you notice you are forced to fix the mistake " << Endl;
354  std::exit(1);
355  }
356  // fill into histogram
357  FillSplineToHist();
358 
359  if (!UseHistogram()) {
360  fSpline->SetTitle( (TString)fHist->GetTitle() + fSpline->GetTitle() );
361  fSpline->SetName ( (TString)fHist->GetName() + fSpline->GetName() );
362  }
363 
364 
365  // sanity check
366  Double_t integral = GetIntegral();
367  if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
368 
369  // normalize
370  if (fNormalize)
371  if (integral>0) fPDFHist->Scale( 1.0/integral );
372 
373  fPDFHist->SetDirectory(0);
374 }
375 
376 ////////////////////////////////////////////////////////////////////////////////
377 /// creates high-binned reference histogram to be used instead of the
378 /// PDF for speed reasons
379 
381 {
382  fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
383  fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_KDE" );
384  fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_KDE" );
385 
386  // create the kernel object
387  Float_t histoLowEdge = fHist->GetBinLowEdge(1);
388  Float_t histoUpperEdge = fPDFHist->GetBinLowEdge(fPDFHist->GetNbinsX()) + fPDFHist->GetBinWidth(fPDFHist->GetNbinsX());
389  KDEKernel *kern = new TMVA::KDEKernel( fKDEiter,
390  fHist, histoLowEdge, histoUpperEdge,
391  fKDEborder, fFineFactor );
392  kern->SetKernelType(fKDEtype);
393 
394  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
395  // loop over the bins of the original histo
396  for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
397  // loop over the bins of the new histo and fill it
398  fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
399  kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
400  fPDFHist->GetBinLowEdge(j+1),
401  fHist->GetBinCenter(i),
402  i)
403  );
404  }
405  if (fKDEborder == 3) { // mirror the saples and fill them again
406  // in order to save time do the mirroring only for the first (the lowwer) 1/5 of the histo to the left;
407  // and the last (the higher) 1/5 of the histo to the right.
408  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
409  if (i < fHist->GetNbinsX()/5 ) { // the first (the lowwer) 1/5 of the histo
410  for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
411  // loop over the bins of the PDF histo and fill it
412  fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
413  kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
414  fPDFHist->GetBinLowEdge(j+1),
415  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
416  i)
417  );
418  }
419  }
420  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
421  for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
422  // loop over the bins of the PDF histo and fill it
423  fPDFHist->AddBinContent( j,fHist->GetBinContent(i)*
424  kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
425  fPDFHist->GetBinLowEdge(j+1),
426  2*histoUpperEdge-fHist->GetBinCenter(i), i) );
427  }
428  }
429  }
430  }
431 
432  fPDFHist->SetEntries(fHist->GetEntries());
433 
434  delete kern;
435 
436  // sanity check
437  Double_t integral = GetIntegral();
438  if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
439 
440  // normalize
441  if (fNormalize)
442  if (integral>0) fPDFHist->Scale( 1.0/integral );
443  fPDFHist->SetDirectory(0);
444 }
445 
446 ////////////////////////////////////////////////////////////////////////////////
447 
449 {
450  if(fHist->GetNbinsX()==1) return;
451  if (fMaxNsmooth == fMinNsmooth) {
452  fHist->Smooth( fMinNsmooth );
453  return;
454  }
455 
456  //calculating Mean, RMS of the relative errors and using them to set
457  //the bounderies of the liniar transformation
458  Float_t Err=0, ErrAvg=0, ErrRMS=0 ; Int_t num=0, smooth;
459  for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
460  if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1)) continue;
461  Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
462  ErrAvg += Err; ErrRMS += Err*Err; num++;
463  }
464  ErrAvg /= num;
465  ErrRMS = TMath::Sqrt(ErrRMS/num-ErrAvg*ErrAvg) ;
466 
467  //liniarly convent the histogram to a vector of smoothnum
468  Float_t MaxErr=ErrAvg+ErrRMS, MinErr=ErrAvg-ErrRMS;
469  fNSmoothHist = new TH1I("","",fHist->GetNbinsX(),0,fHist->GetNbinsX());
470  fNSmoothHist->SetTitle( (TString)fHist->GetTitle() + "_Nsmooth" );
471  fNSmoothHist->SetName ( (TString)fHist->GetName() + "_Nsmooth" );
472  for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
473  if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1))
474  smooth=fMaxNsmooth;
475  else {
476  Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
477  smooth=(Int_t)((Err-MinErr) /(MaxErr-MinErr) * (fMaxNsmooth-fMinNsmooth)) + fMinNsmooth;
478  }
479  smooth = TMath::Max(fMinNsmooth,TMath::Min(fMaxNsmooth,smooth));
480  fNSmoothHist->SetBinContent(bin+1,smooth);
481  }
482 
483  //find regions of constant smoothnum, starting from the highest amount of
484  //smoothing. So the last iteration all the histogram will be smoothed as a whole
485  for (Int_t n=fMaxNsmooth; n>=0; n--) {
486  //all the histogram has to be smoothed at least fMinNsmooth
487  if (n <= fMinNsmooth) { fHist->Smooth(); continue; }
488  Int_t MinBin=-1,MaxBin =-1;
489  for (Int_t bin=0; bin < fHist->GetNbinsX(); bin++) {
490  if (fNSmoothHist->GetBinContent(bin+1) >= n) {
491  if (MinBin==-1) MinBin = bin;
492  else MaxBin=bin;
493  }
494  else if (MaxBin >= 0) {
495 #if ROOT_VERSION_CODE > ROOT_VERSION(5,19,2)
496  fHist->Smooth(1,"R");
497 #else
498  fHist->Smooth(1,MinBin+1,MaxBin+1);
499 #endif
500  MinBin=MaxBin=-1;
501  }
502  else // can't smooth a single bin
503  MinBin = -1;
504  }
505  }
506 }
507 
508 ////////////////////////////////////////////////////////////////////////////////
509 /// Simple conversion
510 
512 {
513  fGraph=new TGraph(fHist);
514 }
515 
516 ////////////////////////////////////////////////////////////////////////////////
517 /// creates high-binned reference histogram to be used instead of the
518 /// PDF for speed reasons
519 
521 {
522  if (UseHistogram()) {
523  // no spline given, use the original histogram
524  fPDFHist = (TH1*)fHist->Clone();
525  fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_spline0" );
526  fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_spline0" );
527  }
528  else {
529  // create new reference histogram
530  fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
531  fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_" + fSpline->GetTitle() );
532  fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_" + fSpline->GetTitle() );
533 
534  for (Int_t bin=1; bin <= fgNbin_PdfHist; bin++) {
535  Double_t x = fPDFHist->GetBinCenter( bin );
536  Double_t y = fSpline->Eval( x );
537  // sanity correction: in cases where strong slopes exist, accidentally, the
538  // splines can go to zero; in this case we set the corresponding bin content
539  // equal to the bin content of the original histogram
540  if (y <= fgEpsilon) y = fHist->GetBinContent( fHist->FindBin( x ) );
541  fPDFHist->SetBinContent( bin, TMath::Max(y, fgEpsilon) );
542  }
543  }
544  fPDFHist->SetDirectory(0);
545 }
546 
547 ////////////////////////////////////////////////////////////////////////////////
548 /// sanity check: compare PDF with original histogram
549 
551 {
552  if (fHist == NULL) {
553  Log() << kFATAL << "<CheckHist> Called without valid histogram pointer!" << Endl;
554  }
555 
556  Int_t nbins = fHist->GetNbinsX();
557 
558  Int_t emptyBins=0;
559  // count number of empty bins
560  for (Int_t bin=1; bin<=nbins; bin++)
561  if (fHist->GetBinContent(bin) == 0) emptyBins++;
562 
563  if (((Float_t)emptyBins/(Float_t)nbins) > 0.5) {
564  Log() << kWARNING << "More than 50% (" << (((Float_t)emptyBins/(Float_t)nbins)*100)
565  <<"%) of the bins in hist '"
566  << fHist->GetName() << "' are empty!" << Endl;
567  Log() << kWARNING << "X_min=" << GetXmin()
568  << " mean=" << fHist->GetMean() << " X_max= " << GetXmax() << Endl;
569  }
570 }
571 
572 ////////////////////////////////////////////////////////////////////////////////
573 /// comparison of original histogram with reference PDF
574 
575 void TMVA::PDF::ValidatePDF( TH1* originalHist ) const
576 {
577  // if no histogram is given, use the original one (the one the PDF was made from)
578  if (!originalHist) originalHist = fHistOriginal;
579 
580  Int_t nbins = originalHist->GetNbinsX();
581 
582  // treat errors properly
583  if (originalHist->GetSumw2()->GetSize() == 0) originalHist->Sumw2();
584 
585  // ---- first validation: simple(st) possible chi2 test
586  // count number of empty bins
587  Double_t chi2 = 0;
588  Int_t ndof = 0;
589  Int_t nc1 = 0; // deviation counters
590  Int_t nc2 = 0; // deviation counters
591  Int_t nc3 = 0; // deviation counters
592  Int_t nc6 = 0; // deviation counters
593  for (Int_t bin=1; bin<=nbins; bin++) {
594  Double_t x = originalHist->GetBinCenter( bin );
595  Double_t y = originalHist->GetBinContent( bin );
596  Double_t ey = originalHist->GetBinError( bin );
597 
598  Int_t binPdfHist = fPDFHist->FindBin( x );
599  if (binPdfHist<0) continue; // happens only if hist-dim>3
600 
601  Double_t yref = GetVal( x );
602  Double_t rref = ( originalHist->GetSumOfWeights()/fPDFHist->GetSumOfWeights() *
603  originalHist->GetBinWidth( bin )/fPDFHist->GetBinWidth( binPdfHist ) );
604 
605  if (y > 0) {
606  ndof++;
607  Double_t d = TMath::Abs( (y - yref*rref)/ey );
608  // std::cout << "bin: " << bin << " val: " << x << " data(err): " << y << "(" << ey << ") pdf: "
609  // << yref << " dev(chi2): " << d << "(" << chi2 << ") rref: " << rref << std::endl;
610  chi2 += d*d;
611  if (d > 1) { nc1++; if (d > 2) { nc2++; if (d > 3) { nc3++; if (d > 6) nc6++; } } }
612  }
613  }
614 
615  Log() << "Validation result for PDF \"" << originalHist->GetTitle() << "\"" << ": " << Endl;
616  Log() << Form( " chi2/ndof(!=0) = %.1f/%i = %.2f (Prob = %.2f)",
617  chi2, ndof, chi2/ndof, TMath::Prob( chi2, ndof ) ) << Endl;
618  if ((1.0 - TMath::Prob( chi2, ndof )) > 0.9999994) {
619  Log() << kWARNING << "Comparison of the original histogram \"" << originalHist->GetTitle() << "\"" << Endl;
620  Log() << kWARNING << "with the corresponding PDF gave a chi2/ndof of " << chi2/ndof << "," << Endl;
621  Log() << kWARNING << "which corresponds to a deviation of more than 5 sigma! Please check!" << Endl;
622  }
623  Log() << Form( " #bins-found(#expected-bins) deviating > [1,2,3,6] sigmas: " \
624  "[%i(%i),%i(%i),%i(%i),%i(%i)]",
625  nc1, Int_t(TMath::Prob(1.0,1)*ndof), nc2, Int_t(TMath::Prob(4.0,1)*ndof),
626  nc3, Int_t(TMath::Prob(9.0,1)*ndof), nc6, Int_t(TMath::Prob(36.0,1)*ndof) ) << Endl;
627 }
628 
629 ////////////////////////////////////////////////////////////////////////////////
630 /// computes normalisation
631 
633 {
634  Double_t integral = fPDFHist->GetSumOfWeights();
635  integral *= GetPdfHistBinWidth();
636 
637  return integral;
638 }
639 
640 ////////////////////////////////////////////////////////////////////////////////
641 /// static external auxiliary function (integrand)
642 
644 {
645  return ThisPDF()->GetVal( x[0] );
646 }
647 
648 ////////////////////////////////////////////////////////////////////////////////
649 /// computes PDF integral within given ranges
650 
652 {
653  Double_t integral = 0;
654 
655  if (fgManualIntegration) {
656 
657  // compute integral by summing over bins
658  Int_t imin = fPDFHist->FindBin(xmin);
659  Int_t imax = fPDFHist->FindBin(xmax);
660  if (imin < 1) imin = 1;
661  if (imax > fPDFHist->GetNbinsX()) imax = fPDFHist->GetNbinsX();
662 
663  for (Int_t bini = imin; bini <= imax; bini++) {
664  Float_t dx = fPDFHist->GetBinWidth(bini);
665  // correct for bin fractions in first and last bin
666  if (bini == imin) dx = fPDFHist->GetBinLowEdge(bini+1) - xmin;
667  else if (bini == imax) dx = xmax - fPDFHist->GetBinLowEdge(bini);
668  if (dx < 0 && dx > -1.0e-8) dx = 0; // protect against float->double numerical effects
669  if (dx<0) {
670  Log() << kWARNING
671  << "dx = " << dx << std::endl
672  << "bini = " << bini << std::endl
673  << "xmin = " << xmin << std::endl
674  << "xmax = " << xmax << std::endl
675  << "imin = " << imin << std::endl
676  << "imax = " << imax << std::endl
677  << "low edge of imin" << fPDFHist->GetBinLowEdge(imin) << std::endl
678  << "low edge of imin+1" << fPDFHist->GetBinLowEdge(imin+1) << Endl;
679  Log() << kFATAL << "<GetIntegral> dx = " << dx << " < 0" << Endl;
680  }
681  integral += fPDFHist->GetBinContent(bini)*dx;
682  }
683 
684  }
685  else {
686 
687  // compute via Gaussian quadrature (C++ version of CERNLIB function DGAUSS)
688  if (fIGetVal == 0) fIGetVal = new TF1( "IGetVal", PDF::IGetVal, GetXmin(), GetXmax(), 0 );
689  integral = fIGetVal->Integral( xmin, xmax );
690  }
691 
692  return integral;
693 }
694 
695 ////////////////////////////////////////////////////////////////////////////////
696 /// returns value PDF(x)
697 
699 {
700  // check which is filled
701  Int_t bin = fPDFHist->FindBin(x);
702  bin = TMath::Max(bin,1);
703  bin = TMath::Min(bin,fPDFHist->GetNbinsX());
704 
705  Double_t retval = 0;
706 
707  if (UseHistogram()) {
708  // use directly histogram bins (this is for discrete PDFs)
709  retval = fPDFHist->GetBinContent( bin );
710  }
711  else {
712  // linear interpolation
713  Int_t nextbin = bin;
714  if ((x > fPDFHist->GetBinCenter(bin) && bin != fPDFHist->GetNbinsX()) || bin == 1)
715  nextbin++;
716  else
717  nextbin--;
718 
719  // linear interpolation between adjacent bins
720  Double_t dx = fPDFHist->GetBinCenter( bin ) - fPDFHist->GetBinCenter( nextbin );
721  Double_t dy = fPDFHist->GetBinContent( bin ) - fPDFHist->GetBinContent( nextbin );
722  retval = fPDFHist->GetBinContent( bin ) + (x - fPDFHist->GetBinCenter( bin ))*dy/dx;
723  }
724 
725  return TMath::Max( retval, fgEpsilon );
726 }
727 
728 ////////////////////////////////////////////////////////////////////////////////
729 /// returns value PDF^{-1}(y)
730 
731 Double_t TMVA::PDF::GetValInverse( Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
732 {
733  Int_t lowerBin=0, higherBin=0;
734  Double_t lowerBinValue=0, higherBinValue=0;
735  FindBinInverse(fPDFHist,lowerBin,higherBin,lowerBinValue,higherBinValue,y,isMonotonouslyIncreasingFunction);
736 
737  Double_t xValueLowerBin =fPDFHist->GetBinCenter (lowerBin);
738  Double_t xValueHigherBin=fPDFHist->GetBinCenter (higherBin);
739 
740  Double_t length =(higherBinValue-lowerBinValue);
741  Double_t fraction=lowerBinValue;
742  if (length>1.0e-10)
743  fraction=(y-lowerBinValue)/length;
744 
745  Double_t lengthX =xValueHigherBin-xValueLowerBin;
746  Double_t x =xValueLowerBin+lengthX*fraction;
747 
748  // comparison for test purposes
749 // std::cout << "lb " << lowerBin << " hb " << higherBin << " lbv " << lowerBinValue << " hbv " << higherBinValue << " frac " << fraction << std::endl;
750 // std::cout << "y " << y << " inv x " << x << " straight y " << GetVal(x) << std::endl;
751 
752  return x;
753 }
754 
755 ////////////////////////////////////////////////////////////////////////////////
756 /// find bin from value on ordinate
757 
758 void TMVA::PDF::FindBinInverse( const TH1* histogram, Int_t& lowerBin, Int_t& higherBin, Double_t& lowerBinValue, Double_t& higherBinValue,
759  Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
760 {
761  if (isMonotonouslyIncreasingFunction) {
762  higherBin=histogram->GetNbinsX();
763  lowerBin =0;
764 
765  Int_t bin=higherBin/2;
766 
767  while (bin>lowerBin && bin<higherBin) {
768  Double_t binContent=histogram->GetBinContent(bin);
769 
770  if (y<binContent) {
771  higherBin =bin;
772  higherBinValue=binContent;
773  }
774  else if (y>=binContent){
775  lowerBin =bin;
776  lowerBinValue =binContent;
777  }
778  bin=lowerBin+(higherBin-lowerBin)/2;
779  }
780  return;
781  }
782  // search sequentially
783  for (Int_t bin=0, binEnd=histogram->GetNbinsX(); bin<binEnd; ++bin) {
784  Double_t binContent=histogram->GetBinContent(bin);
785  if (binContent<y) {
786  lowerBin =bin;
787  higherBin=bin;
788  lowerBinValue =binContent;
789  higherBinValue=binContent;
790  }
791  else {
792  higherBin=bin;
793  higherBinValue=binContent;
794  break;
795  }
796  }
797 }
798 
799 
800 ////////////////////////////////////////////////////////////////////////////////
801 /// define the options (their key words) that can be set in the option string
802 /// know options:
803 /// PDFInterpol[ivar] <string> Spline0, Spline1, Spline2 <default>, Spline3, Spline5, KDE used to interpolate reference histograms
804 /// if no variable index is given, it is valid for ALL the variables
805 ///
806 /// NSmooth <int> how often the input histos are smoothed
807 /// MinNSmooth <int> min number of smoothing iterations, for bins with most data
808 /// MaxNSmooth <int> max number of smoothing iterations, for bins with least data
809 /// NAvEvtPerBin <int> minimum average number of events per PDF bin
810 /// TransformOutput <bool> transform (often strongly peaked) likelihood output through sigmoid inversion
811 /// fKDEtype <KernelType> type of the Kernel to use (1 is Gaussian)
812 /// fKDEiter <KerneIter> number of iterations (1 --> "static KDE", 2 --> "adaptive KDE")
813 /// fBorderMethod <KernelBorder> the method to take care about "border" effects (1=no treatment , 2=kernel renormalization, 3=sample mirroring)
814 
816 {
817  DeclareOptionRef( fNsmooth, Form("NSmooth%s",fSuffix.Data()),
818  "Number of smoothing iterations for the input histograms" );
819  DeclareOptionRef( fMinNsmooth, Form("MinNSmooth%s",fSuffix.Data()),
820  "Min number of smoothing iterations, for bins with most data" );
821 
822  DeclareOptionRef( fMaxNsmooth, Form("MaxNSmooth%s",fSuffix.Data()),
823  "Max number of smoothing iterations, for bins with least data" );
824 
825  DeclareOptionRef( fHistAvgEvtPerBin, Form("NAvEvtPerBin%s",fSuffix.Data()),
826  "Average number of events per PDF bin" );
827 
828  DeclareOptionRef( fHistDefinedNBins, Form("Nbins%s",fSuffix.Data()),
829  "Defined number of bins for the histogram from which the PDF is created" );
830 
831  DeclareOptionRef( fCheckHist, Form("CheckHist%s",fSuffix.Data()),
832  "Whether or not to check the source histogram of the PDF" );
833 
834  DeclareOptionRef( fInterpolateString, Form("PDFInterpol%s",fSuffix.Data()),
835  "Interpolation method for reference histograms (e.g. Spline2 or KDE)" );
836  AddPreDefVal(TString("Spline0")); // take histogram
837  AddPreDefVal(TString("Spline1")); // linear interpolation between bins
838  AddPreDefVal(TString("Spline2")); // quadratic interpolation
839  AddPreDefVal(TString("Spline3")); // cubic interpolation
840  AddPreDefVal(TString("Spline5")); // fifth order polynome interpolation
841  AddPreDefVal(TString("KDE")); // use kernel density estimator
842 
843  DeclareOptionRef( fKDEtypeString, Form("KDEtype%s",fSuffix.Data()), "KDE kernel type (1=Gauss)" );
844  AddPreDefVal(TString("Gauss"));
845 
846  DeclareOptionRef( fKDEiterString, Form("KDEiter%s",fSuffix.Data()), "Number of iterations (1=non-adaptive, 2=adaptive)" );
847  AddPreDefVal(TString("Nonadaptive"));
848  AddPreDefVal(TString("Adaptive"));
849 
850  DeclareOptionRef( fFineFactor , Form("KDEFineFactor%s",fSuffix.Data()),
851  "Fine tuning factor for Adaptive KDE: Factor to multyply the width of the kernel");
852 
853  DeclareOptionRef( fBorderMethodString, Form("KDEborder%s",fSuffix.Data()),
854  "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
855  AddPreDefVal(TString("None"));
856  AddPreDefVal(TString("Renorm"));
857  AddPreDefVal(TString("Mirror"));
858 
859  SetConfigName( GetName() );
860  SetConfigDescription( "Configuration options for the PDF class" );
861 }
862 
863 ////////////////////////////////////////////////////////////////////////////////
864 
866 {
867  if (fNsmooth < 0) fNsmooth = 0; // set back to default
868 
869  if (fMaxNsmooth < 0 || fMinNsmooth < 0) { // use "Nsmooth" variable
870  fMinNsmooth = fMaxNsmooth = fNsmooth;
871  }
872 
873  if (fMaxNsmooth < fMinNsmooth && fMinNsmooth >= 0) { // sanity check
874  Log() << kFATAL << "ERROR: MaxNsmooth = "
875  << fMaxNsmooth << " < MinNsmooth = " << fMinNsmooth << Endl;
876  }
877 
878  if (fMaxNsmooth < 0 || fMinNsmooth < 0) {
879  Log() << kFATAL << "ERROR: MaxNsmooth = "
880  << fMaxNsmooth << " or MinNsmooth = " << fMinNsmooth << " smaller than zero" << Endl;
881  }
882 
883  //processing the options
884  if (fInterpolateString == "Spline0") fInterpolMethod = TMVA::PDF::kSpline0;
885  else if (fInterpolateString == "Spline1") fInterpolMethod = TMVA::PDF::kSpline1;
886  else if (fInterpolateString == "Spline2") fInterpolMethod = TMVA::PDF::kSpline2;
887  else if (fInterpolateString == "Spline3") fInterpolMethod = TMVA::PDF::kSpline3;
888  else if (fInterpolateString == "Spline5") fInterpolMethod = TMVA::PDF::kSpline5;
889  else if (fInterpolateString == "KDE" ) fInterpolMethod = TMVA::PDF::kKDE;
890  else if (fInterpolateString != "" ) {
891  Log() << kFATAL << "unknown setting for option 'InterpolateMethod': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
892  }
893 
894  // init KDE options
895  if (fKDEtypeString == "Gauss" ) fKDEtype = KDEKernel::kGauss;
896  else if (fKDEtypeString != "" )
897  Log() << kFATAL << "unknown setting for option 'KDEtype': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
898  if (fKDEiterString == "Nonadaptive") fKDEiter = KDEKernel::kNonadaptiveKDE;
899  else if (fKDEiterString == "Adaptive" ) fKDEiter = KDEKernel::kAdaptiveKDE;
900  else if (fKDEiterString != "" )// nothing more known
901  Log() << kFATAL << "unknown setting for option 'KDEiter': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
902 
903  if ( fBorderMethodString == "None" ) fKDEborder= KDEKernel::kNoTreatment;
904  else if ( fBorderMethodString == "Renorm" ) fKDEborder= KDEKernel::kKernelRenorm;
905  else if ( fBorderMethodString == "Mirror" ) fKDEborder= KDEKernel::kSampleMirror;
906  else if ( fKDEiterString != "" ) { // nothing more known
907  Log() << kFATAL << "unknown setting for option 'KDEBorder': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
908  }
909 }
910 
911 ////////////////////////////////////////////////////////////////////////////////
912 /// XML file writing
913 
914 void TMVA::PDF::AddXMLTo( void* parent )
915 {
916  void* pdfxml = gTools().AddChild(parent, "PDF");
917  gTools().AddAttr(pdfxml, "Name", fPDFName );
918  gTools().AddAttr(pdfxml, "MinNSmooth", fMinNsmooth );
919  gTools().AddAttr(pdfxml, "MaxNSmooth", fMaxNsmooth );
920  gTools().AddAttr(pdfxml, "InterpolMethod", fInterpolMethod );
921  gTools().AddAttr(pdfxml, "KDE_type", fKDEtype );
922  gTools().AddAttr(pdfxml, "KDE_iter", fKDEiter );
923  gTools().AddAttr(pdfxml, "KDE_border", fKDEborder );
924  gTools().AddAttr(pdfxml, "KDE_finefactor", fFineFactor );
925  void* pdfhist = gTools().AddChild(pdfxml,"Histogram" );
926  TH1* histToWrite = GetOriginalHist();
927  Bool_t hasEquidistantBinning = gTools().HistoHasEquidistantBins(*histToWrite);
928  gTools().AddAttr(pdfhist, "Name", histToWrite->GetName() );
929  gTools().AddAttr(pdfhist, "NBins", histToWrite->GetNbinsX() );
930  gTools().AddAttr(pdfhist, "XMin", histToWrite->GetXaxis()->GetXmin() );
931  gTools().AddAttr(pdfhist, "XMax", histToWrite->GetXaxis()->GetXmax() );
932  gTools().AddAttr(pdfhist, "HasEquidistantBins", hasEquidistantBinning );
933 
934  TString bincontent("");
935  for (Int_t i=0; i<histToWrite->GetNbinsX(); i++) {
936  bincontent += gTools().StringFromDouble(histToWrite->GetBinContent(i+1));
937  bincontent += " ";
938  }
939  gTools().AddRawLine(pdfhist, bincontent );
940 
941  if (!hasEquidistantBinning) {
942  void* pdfhistbins = gTools().AddChild(pdfxml,"HistogramBinning" );
943  gTools().AddAttr(pdfhistbins, "NBins", histToWrite->GetNbinsX() );
944  TString binns("");
945  for (Int_t i=1; i<=histToWrite->GetNbinsX()+1; i++) {
946  binns += gTools().StringFromDouble(histToWrite->GetXaxis()->GetBinLowEdge(i));
947  binns += " ";
948  }
949  gTools().AddRawLine(pdfhistbins, binns );
950  }
951 }
952 
953 ////////////////////////////////////////////////////////////////////////////////
954 /// XML file reading
955 
956 void TMVA::PDF::ReadXML( void* pdfnode )
957 {
958  UInt_t enumint;
959 
960  gTools().ReadAttr(pdfnode, "MinNSmooth", fMinNsmooth );
961  gTools().ReadAttr(pdfnode, "MaxNSmooth", fMaxNsmooth );
962  gTools().ReadAttr(pdfnode, "InterpolMethod", enumint ); fInterpolMethod = EInterpolateMethod(enumint);
963  gTools().ReadAttr(pdfnode, "KDE_type", enumint ); fKDEtype = KDEKernel::EKernelType(enumint);
964  gTools().ReadAttr(pdfnode, "KDE_iter", enumint ); fKDEiter = KDEKernel::EKernelIter(enumint);
965  gTools().ReadAttr(pdfnode, "KDE_border", enumint ); fKDEborder = KDEKernel::EKernelBorder(enumint);
966  gTools().ReadAttr(pdfnode, "KDE_finefactor", fFineFactor );
967 
968  TString hname;
969  UInt_t nbins;
970  Double_t xmin, xmax;
971  Bool_t hasEquidistantBinning;
972 
973  void* histch = gTools().GetChild(pdfnode);
974  gTools().ReadAttr( histch, "Name", hname );
975  gTools().ReadAttr( histch, "NBins", nbins );
976  gTools().ReadAttr( histch, "XMin", xmin );
977  gTools().ReadAttr( histch, "XMax", xmax );
978  gTools().ReadAttr( histch, "HasEquidistantBins", hasEquidistantBinning );
979 
980  // recreate the original hist
981  TH1* newhist = 0;
982  if (hasEquidistantBinning) {
983  newhist = new TH1F( hname, hname, nbins, xmin, xmax );
984  newhist->SetDirectory(0);
985  const char* content = gTools().GetContent(histch);
986  std::stringstream s(content);
987  Double_t val;
988  for (UInt_t i=0; i<nbins; i++) {
989  s >> val;
990  newhist->SetBinContent(i+1,val);
991  }
992  }
993  else {
994  const char* content = gTools().GetContent(histch);
995  std::stringstream s(content);
996  Double_t val;
997  void* binch = gTools().GetNextChild(histch);
998  UInt_t nbinning;
999  gTools().ReadAttr( binch, "NBins", nbinning );
1000  TVectorD binns(nbinning+1);
1001  if (nbinning != nbins) {
1002  Log() << kFATAL << "Number of bins in content and binning array differs"<<Endl;
1003  }
1004  const char* binString = gTools().GetContent(binch);
1005  std::stringstream sb(binString);
1006  for (UInt_t i=0; i<=nbins; i++) sb >> binns[i];
1007  newhist = new TH1F( hname, hname, nbins, binns.GetMatrixArray() );
1008  newhist->SetDirectory(0);
1009  for (UInt_t i=0; i<nbins; i++) {
1010  s >> val;
1011  newhist->SetBinContent(i+1,val);
1012  }
1013  }
1014 
1015  TString hnameSmooth = hname;
1016  hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1017 
1018  if (fHistOriginal != 0) delete fHistOriginal;
1019  fHistOriginal = newhist;
1020  fHist = (TH1F*)fHistOriginal->Clone( hnameSmooth );
1021  fHist->SetTitle( hnameSmooth );
1022  fHist->SetDirectory(0);
1023 
1024  if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
1025  else BuildSplinePDF();
1026 }
1027 
1028 ////////////////////////////////////////////////////////////////////////////////
1029 /// write the pdf
1030 
1031 std::ostream& TMVA::operator<< ( std::ostream& os, const PDF& pdf )
1032 {
1033  Int_t dp = os.precision();
1034  os << "MinNSmooth " << pdf.fMinNsmooth << std::endl;
1035  os << "MaxNSmooth " << pdf.fMaxNsmooth << std::endl;
1036  os << "InterpolMethod " << pdf.fInterpolMethod << std::endl;
1037  os << "KDE_type " << pdf.fKDEtype << std::endl;
1038  os << "KDE_iter " << pdf.fKDEiter << std::endl;
1039  os << "KDE_border " << pdf.fKDEborder << std::endl;
1040  os << "KDE_finefactor " << pdf.fFineFactor << std::endl;
1041 
1042  TH1* histToWrite = pdf.GetOriginalHist();
1043 
1044  const Int_t nBins = histToWrite->GetNbinsX();
1045 
1046  // note: this is a schema change introduced for v3.7.3
1047  os << "Histogram "
1048  << histToWrite->GetName()
1049  << " " << nBins // nbins
1050  << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmin() // x_min
1051  << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmax() // x_max
1052  << std::endl;
1053 
1054  // write the smoothed hist
1055  os << "Weights " << std::endl;
1056  os << std::setprecision(8);
1057  for (Int_t i=0; i<nBins; i++) {
1058  os << std::setw(15) << std::left << histToWrite->GetBinContent(i+1) << std::right << " ";
1059  if ((i+1)%5==0) os << std::endl;
1060  }
1061 
1062  os << std::setprecision(dp);
1063  return os; // Return the output stream.
1064 }
1065 
1066 ////////////////////////////////////////////////////////////////////////////////
1067 /// read the tree from an std::istream
1068 
1069 std::istream& TMVA::operator>> ( std::istream& istr, PDF& pdf )
1070 {
1071  TString devnullS;
1072  Int_t valI;
1073  Int_t nbins=-1; // default binning will cause an exit
1074  Float_t xmin=-1., xmax=-1.;
1075  TString hname="_original";
1076  Bool_t doneReading = kFALSE;
1077  while (!doneReading) {
1078  istr >> devnullS;
1079  if (devnullS=="NSmooth")
1080  {istr >> pdf.fMinNsmooth; pdf.fMaxNsmooth=pdf.fMinNsmooth;}
1081  else if (devnullS=="MinNSmooth") istr >> pdf.fMinNsmooth;
1082  else if (devnullS=="MaxNSmooth") istr >> pdf.fMaxNsmooth;
1083  // have to do this with strings to be more stable with developing code
1084  else if (devnullS == "InterpolMethod") { istr >> valI; pdf.fInterpolMethod = PDF::EInterpolateMethod(valI);}
1085  else if (devnullS == "KDE_type") { istr >> valI; pdf.fKDEtype = KDEKernel::EKernelType(valI); }
1086  else if (devnullS == "KDE_iter") { istr >> valI; pdf.fKDEiter = KDEKernel::EKernelIter(valI);}
1087  else if (devnullS == "KDE_border") { istr >> valI; pdf.fKDEborder = KDEKernel::EKernelBorder(valI);}
1088  else if (devnullS == "KDE_finefactor") {
1089  istr >> pdf.fFineFactor;
1090  if (pdf.GetReadingVersion() != 0 && pdf.GetReadingVersion() < TMVA_VERSION(3,7,3)) { // here we expect the histogram limits if the version is below 3.7.3. When version == 0, the newest TMVA version is assumed.
1091  // coverity[tainted_data_argument]
1092  istr >> nbins >> xmin >> xmax;
1093  doneReading = kTRUE;
1094  }
1095  }
1096  else if (devnullS == "Histogram") { istr >> hname >> nbins >> xmin >> xmax; }
1097  else if (devnullS == "Weights") { doneReading = kTRUE; }
1098  }
1099 
1100  TString hnameSmooth = hname;
1101  hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1102 
1103  // recreate the original hist
1104  if (nbins==-1) {
1105  std::cout << "PDF, trying to create a histogram without defined binning"<< std::endl;
1106  std::exit(1);
1107  }
1108  TH1* newhist = new TH1F( hname,hname, nbins, xmin, xmax );
1109  newhist->SetDirectory(0);
1110  Float_t val;
1111  for (Int_t i=0; i<nbins; i++) {
1112  istr >> val;
1113  newhist->SetBinContent(i+1,val);
1114  }
1115 
1116  if (pdf.fHistOriginal != 0) delete pdf.fHistOriginal;
1117  pdf.fHistOriginal = newhist;
1118  pdf.fHist = (TH1F*)pdf.fHistOriginal->Clone( hnameSmooth );
1119  pdf.fHist->SetTitle( hnameSmooth );
1120  pdf.fHist->SetDirectory(0);
1121 
1122  if (pdf.fMinNsmooth>=0) pdf.BuildSplinePDF();
1123  else {
1124  pdf.fInterpolMethod = PDF::kKDE;
1125  pdf.BuildKDEPDF();
1126  }
1127 
1128  return istr;
1129 }
1130 
1132 {
1133  // return global "this" pointer of PDF
1134  return GetThisPdfThreadLocal();
1135 }
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:52
Int_t fMaxNsmooth
Definition: PDF.h:177
TString fBorderMethodString
Definition: PDF.h:193
float xmin
Definition: THbookFile.cxx:93
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:956
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4629
Int_t fHistAvgEvtPerBin
Definition: PDF.h:188
static const Double_t fgEpsilon
Definition: PDF.h:171
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8266
ClassImp(TMVA::PDF) TMVA
default constructor needed for ROOT I/O
Definition: PDF.cxx:57
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
void SmoothHistogram()
Definition: PDF.cxx:448
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:253
Double_t GetIntegral() const
computes normalisation
Definition: PDF.cxx:632
Float_t fFineFactor
Definition: PDF.h:199
void FindBinInverse(const TH1 *histogram, Int_t &lowerBin, Int_t &higherBin, Double_t &lowerBinValue, Double_t &higherBinValue, Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
find bin from value on ordinate
Definition: PDF.cxx:758
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:489
static const Int_t fgNbin_PdfHist
Definition: PDF.h:169
Basic string class.
Definition: TString.h:137
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
TString fKDEtypeString
Definition: PDF.h:191
int nbins[3]
PDF(const TString &name, Bool_t norm=kTRUE)
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
virtual Double_t GetEntries() const
return the current number of entries
Definition: TH1.cxx:4051
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
Double_t Prob(Double_t chi2, Int_t ndf)
Computation of the probability for a certain Chi-squared (chi2) and number of degrees of freedom (ndf...
Definition: TMath.cxx:619
TObject * Clone(const char *newname=0) const
Make a complete copy of the underlying object.
Definition: TH1.cxx:2565
virtual Double_t GetBinWidth(Int_t bin) const
return bin width for 1D historam Better to use h1.GetXaxis().GetBinWidth(bin)
Definition: TH1.cxx:8492
TMVA::PDF::EInterpolateMethod fInterpolMethod
Definition: PDF.h:180
Tools & gTools()
Definition: Tools.cxx:79
Double_t x[n]
Definition: legend1.C:17
int d
Definition: tornado.py:11
static PDF * ThisPDF(void)
Definition: PDF.cxx:1131
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Bool_t fCheckHist
Definition: PDF.h:203
virtual TArrayD * GetSumw2()
Definition: TH1.h:312
Definition: PDF.h:71
TString fKDEiterString
Definition: PDF.h:192
static const Bool_t fgManualIntegration
Definition: PDF.h:170
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:575
Double_t GetXmin() const
Definition: TAxis.h:137
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1198
void BuildSplinePDF()
build the PDF from the original histograms
Definition: PDF.cxx:314
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:297
virtual Double_t GetBinCenter(Int_t bin) const
return bin center for 1D historam Better to use h1.GetXaxis().GetBinCenter(bin)
Definition: TH1.cxx:8470
TH1 * fHistOriginal
Definition: PDF.h:184
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1241
Double_t length(const TVector2 &v)
Definition: CsgOps.cxx:347
TH1 * GetOriginalHist() const
Definition: PDF.h:102
1-D histogram with a int per channel (see TH1 documentation)}
Definition: TH1.h:529
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8543
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition: Tools.cxx:1494
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
const Handle_t kNone
Definition: GuiTypes.h:89
virtual Double_t GetSumOfWeights() const
Return the sum of weights excluding under/overflows.
Definition: TH1.cxx:7354
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
const char * GetContent(void *node)
XML helpers.
Definition: Tools.cxx:1182
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:295
float xmax
Definition: THbookFile.cxx:93
UInt_t GetReadingVersion() const
Definition: PDF.h:128
Double_t GetValInverse(Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
returns value PDF^{-1}(y)
Definition: PDF.cxx:731
Int_t fNsmooth
Definition: PDF.h:175
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
print the tree recursinvely using the << operator
Definition: BinaryTree.cxx:157
std::istream & operator>>(std::istream &istr, BinaryTree &tree)
read the tree from an std::istream
Definition: BinaryTree.cxx:203
Int_t fMinNsmooth
Definition: PDF.h:176
MsgLogger * fLogger
the suffix for options
Definition: PDF.h:207
Int_t GetSize() const
Definition: TArray.h:49
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: PDFInterpol[i...
Definition: PDF.cxx:815
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
void CheckHist() const
sanity check: compare PDF with original histogram
Definition: PDF.cxx:550
void BuildKDEPDF()
creates high-binned reference histogram to be used instead of the PDF for speed reasons ...
Definition: PDF.cxx:380
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:218
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:698
double Double_t
Definition: RtypesCore.h:55
Int_t fHistDefinedNBins
Definition: PDF.h:189
Double_t GetXmax() const
Definition: TAxis.h:138
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Double_t y[n]
Definition: legend1.C:17
Double_t ey[n]
Definition: legend1.C:17
The TH1 histogram class.
Definition: TH1.h:80
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:914
#define name(a, b)
Definition: linkTestLib0.cpp:5
TH1 * fHist
Definition: PDF.h:183
void Err(int level, const char *msg, int size)
virtual ~PDF()
Definition: PDF.cxx:239
void ProcessOptions()
Definition: PDF.cxx:865
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
1-Dim function class
Definition: TF1.h:149
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8350
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
static Double_t IGetVal(Double_t *, Double_t *)
static external auxiliary function (integrand)
Definition: PDF.cxx:643
#define NULL
Definition: Rtypes.h:82
KDEKernel::EKernelIter fKDEiter
Definition: PDF.h:197
TString()
TString default ctor.
Definition: TString.cxx:87
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:113
virtual void SetTitle(const char *title)
Change (i.e.
Definition: TH1.cxx:6268
TString fInterpolateString
Definition: PDF.h:194
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition: TH1.cxx:8395
const Bool_t kTRUE
Definition: Rtypes.h:91
void FillSplineToHist()
creates high-binned reference histogram to be used instead of the PDF for speed reasons ...
Definition: PDF.cxx:520
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
EInterpolateMethod
Definition: PDF.h:78
KDEKernel::EKernelBorder fKDEborder
Definition: PDF.h:198
KDEKernel::EKernelType fKDEtype
Definition: PDF.h:196
const Int_t n
Definition: legend1.C:16
void FillHistToGraph()
Simple conversion.
Definition: PDF.cxx:511
Definition: math.cpp:60
TAxis * GetXaxis()
Definition: TH1.h:319