ROOT  6.06/09
Reference Guide
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
PDF.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov, Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : PDF *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
17  * Eckhard von Toerne <evt@physik.uni-bonn.de> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * Freiburg U., Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #include <iomanip>
34 #include <cstdlib>
35 
36 #include "TMath.h"
37 #include "TF1.h"
38 #include "TH1F.h"
39 #include "TVectorD.h"
40 #include "Riostream.h"
41 
42 #include "TMVA/Tools.h"
43 #include "TMVA/PDF.h"
44 #include "TMVA/TSpline1.h"
45 #include "TMVA/TSpline2.h"
46 #include "TMVA/Version.h"
47 
48 // static configuration settings
49 const Int_t TMVA::PDF::fgNbin_PdfHist = 10000;
51 const Double_t TMVA::PDF::fgEpsilon = 1.0e-12;
52 
54 
55 ////////////////////////////////////////////////////////////////////////////////
56 /// default constructor needed for ROOT I/O
57 
58 TMVA::PDF::PDF( const TString& name, Bool_t norm )
59  : Configurable (""),
60  fUseHistogram ( kFALSE ),
61  fPDFName ( name ),
62  fNsmooth ( 0 ),
63  fMinNsmooth (-1 ),
64  fMaxNsmooth (-1 ),
65  fNSmoothHist ( 0 ),
66  fInterpolMethod( PDF::kSpline2 ),
67  fSpline ( 0 ),
68  fPDFHist ( 0 ),
69  fHist ( 0 ),
70  fHistOriginal ( 0 ),
71  fGraph ( 0 ),
72  fIGetVal ( 0 ),
73  fHistAvgEvtPerBin ( 0 ),
74  fHistDefinedNBins ( 0 ),
75  fKDEtypeString ( 0 ),
76  fKDEiterString ( 0 ),
77  fBorderMethodString( 0 ),
78  fInterpolateString ( 0 ),
79  fKDEtype ( KDEKernel::kNone ),
80  fKDEiter ( KDEKernel::kNonadaptiveKDE ),
81  fKDEborder ( KDEKernel::kNoTreatment ),
82  fFineFactor ( 0. ),
83  fReadingVersion( 0 ),
84  fCheckHist ( kFALSE ),
85  fNormalize ( norm ),
86  fSuffix ( "" ),
87  fLogger ( 0 )
88 {
89  fLogger = new MsgLogger(this);
90  GetThisPdfThreadLocal() = this;
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// constructor of spline based PDF:
95 
97  const TH1 *hist,
99  Int_t minnsmooth,
100  Int_t maxnsmooth,
101  Bool_t checkHist,
102  Bool_t norm) :
103  Configurable (""),
104  fUseHistogram ( kFALSE ),
105  fPDFName ( name ),
106  fMinNsmooth ( minnsmooth ),
107  fMaxNsmooth ( maxnsmooth ),
108  fNSmoothHist ( 0 ),
109  fInterpolMethod( method ),
110  fSpline ( 0 ),
111  fPDFHist ( 0 ),
112  fHist ( 0 ),
113  fHistOriginal ( 0 ),
114  fGraph ( 0 ),
115  fIGetVal ( 0 ),
116  fHistAvgEvtPerBin ( 0 ),
117  fHistDefinedNBins ( 0 ),
118  fKDEtypeString ( 0 ),
119  fKDEiterString ( 0 ),
120  fBorderMethodString( 0 ),
121  fInterpolateString ( 0 ),
122  fKDEtype ( KDEKernel::kNone ),
123  fKDEiter ( KDEKernel::kNonadaptiveKDE ),
124  fKDEborder ( KDEKernel::kNoTreatment ),
125  fFineFactor ( 0. ),
126  fReadingVersion( 0 ),
127  fCheckHist ( checkHist ),
128  fNormalize ( norm ),
129  fSuffix ( "" ),
130  fLogger ( 0 )
131 {
132  fLogger = new MsgLogger(this);
133  BuildPDF( hist );
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// constructor of kernel based PDF:
138 
140  const TH1* hist,
143  KDEKernel::EKernelBorder kborder,
144  Float_t FineFactor,
145  Bool_t norm) :
146  Configurable (""),
147  fUseHistogram ( kFALSE ),
148  fPDFName ( name ),
149  fNsmooth ( 0 ),
150  fMinNsmooth (-1 ),
151  fMaxNsmooth (-1 ),
152  fNSmoothHist ( 0 ),
153  fInterpolMethod( PDF::kKDE ),
154  fSpline ( 0 ),
155  fPDFHist ( 0 ),
156  fHist ( 0 ),
157  fHistOriginal ( 0 ),
158  fGraph ( 0 ),
159  fIGetVal ( 0 ),
160  fHistAvgEvtPerBin ( 0 ),
161  fHistDefinedNBins ( 0 ),
162  fKDEtypeString ( 0 ),
163  fKDEiterString ( 0 ),
164  fBorderMethodString( 0 ),
165  fInterpolateString ( 0 ),
166  fKDEtype ( ktype ),
167  fKDEiter ( kiter ),
168  fKDEborder ( kborder ),
169  fFineFactor ( FineFactor),
170  fReadingVersion( 0 ),
171  fCheckHist ( kFALSE ),
172  fNormalize ( norm ),
173  fSuffix ( "" ),
174  fLogger ( 0 )
175 {
176  fLogger = new MsgLogger(this);
177  BuildPDF( hist );
178 }
179 
180 ////////////////////////////////////////////////////////////////////////////////
181 
183  const TString& options,
184  const TString& suffix,
185  PDF* defaultPDF,
186  Bool_t norm) :
187  Configurable (options),
188  fUseHistogram ( kFALSE ),
189  fPDFName ( name ),
190  fNsmooth ( 0 ),
191  fMinNsmooth ( -1 ),
192  fMaxNsmooth ( -1 ),
193  fNSmoothHist ( 0 ),
194  fInterpolMethod( PDF::kSpline0 ),
195  fSpline ( 0 ),
196  fPDFHist ( 0 ),
197  fHist ( 0 ),
198  fHistOriginal ( 0 ),
199  fGraph ( 0 ),
200  fIGetVal ( 0 ),
201  fHistAvgEvtPerBin ( 50 ),
202  fHistDefinedNBins ( 0 ),
203  fKDEtypeString ( "Gauss" ),
204  fKDEiterString ( "Nonadaptive" ),
205  fBorderMethodString( "None" ),
206  fInterpolateString ( "Spline2" ),
207  fKDEtype ( KDEKernel::kNone ),
208  fKDEiter ( KDEKernel::kNonadaptiveKDE ),
209  fKDEborder ( KDEKernel::kNoTreatment ),
210  fFineFactor ( 1. ),
211  fReadingVersion( 0 ),
212  fCheckHist ( kFALSE ),
213  fNormalize ( norm ),
214  fSuffix ( suffix ),
215  fLogger ( 0 )
216 {
217  fLogger = new MsgLogger(this);
218  if (defaultPDF != 0) {
219  fNsmooth = defaultPDF->fNsmooth;
220  fMinNsmooth = defaultPDF->fMinNsmooth;
221  fMaxNsmooth = defaultPDF->fMaxNsmooth;
222  fHistAvgEvtPerBin = defaultPDF->fHistAvgEvtPerBin;
223  fHistAvgEvtPerBin = defaultPDF->fHistAvgEvtPerBin;
225  fKDEtypeString = defaultPDF->fKDEtypeString;
226  fKDEiterString = defaultPDF->fKDEiterString;
227  fFineFactor = defaultPDF->fFineFactor;
229  fCheckHist = defaultPDF->fCheckHist;
230  fHistDefinedNBins = defaultPDF->fHistDefinedNBins;
231  }
232 }
233 
234 //___________________fNSmoothHist____________________________________________________
236 {
237  // destructor
238  if (fSpline != NULL) delete fSpline;
239  if (fHist != NULL) delete fHist;
240  if (fPDFHist != NULL) delete fPDFHist;
241  if (fHistOriginal != NULL) delete fHistOriginal;
242  if (fIGetVal != NULL) delete fIGetVal;
243  if (fGraph != NULL) delete fGraph;
244  delete fLogger;
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 
249 void TMVA::PDF::BuildPDF( const TH1* hist )
250 {
251  GetThisPdfThreadLocal() = this;
252  // sanity check
253  if (hist == NULL) Log() << kFATAL << "Called without valid histogram pointer!" << Endl;
254 
255  // histogram should be non empty
256  if (hist->GetEntries() <= 0)
257  Log() << kFATAL << "Number of entries <= 0 (" << hist->GetEntries() << " in histogram: " << hist->GetTitle() << Endl;
258 
259  if (fInterpolMethod == PDF::kKDE) {
260  Log() << "Create "
261  << ((fKDEiter == KDEKernel::kNonadaptiveKDE) ? "nonadaptive " :
262  (fKDEiter == KDEKernel::kAdaptiveKDE) ? "adaptive " : "??? ")
263  << ((fKDEtype == KDEKernel::kGauss) ? "Gauss " : "??? ")
264  << "type KDE kernel for histogram: \"" << hist->GetName() << "\""
265  << Endl;
266  }
267  else {
268  // another sanity check (nsmooth<0 indicated build with KDE)
269  if (fMinNsmooth<0)
270  Log() << kFATAL << "PDF construction called with minnsmooth<0" << Endl;
271  else if (fMaxNsmooth<=0)
272  fMaxNsmooth = fMinNsmooth;
273  else if (fMaxNsmooth<fMinNsmooth)
274  Log() << kFATAL << "PDF construction called with maxnsmooth<minnsmooth" << Endl;
275  }
276 
277  fHistOriginal = (TH1F*)hist->Clone( TString(hist->GetName()) + "_original" );
278  fHist = (TH1F*)hist->Clone( TString(hist->GetName()) + "_smoothed" );
279  fHistOriginal->SetTitle( fHistOriginal->GetName() ); // reset to new title as well
280  fHist ->SetTitle( fHist->GetName() );
281 
282  // do not store in current target file
283  fHistOriginal->SetDirectory(0);
284  fHist ->SetDirectory(0);
285  fUseHistogram = kFALSE;
286 
287  if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
288  else BuildSplinePDF();
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 
294 {
295  Int_t ResolutionFactor = (fInterpolMethod == PDF::kKDE) ? 5 : 1;
296  if (evtNum == 0 && fHistDefinedNBins == 0)
297  Log() << kFATAL << "No number of bins set for PDF" << Endl;
298  else if (fHistDefinedNBins > 0)
299  return fHistDefinedNBins * ResolutionFactor;
300  else if ( evtNum > 0 && fHistAvgEvtPerBin > 0 )
301  return evtNum / fHistAvgEvtPerBin * ResolutionFactor;
302  else
303  Log() << kFATAL << "No number of bins or average event per bin set for PDF" << fHistAvgEvtPerBin << Endl;
304  return 0;
305 }
306 
307 ////////////////////////////////////////////////////////////////////////////////
308 /// build the PDF from the original histograms
309 
311 {
312  // (not useful for discrete distributions, or if no splines are requested)
313  if (fInterpolMethod != PDF::kSpline0 && fCheckHist) CheckHist();
314  // use ROOT TH1 smooth method
315  fNSmoothHist = 0;
316  if (fMaxNsmooth > 0 && fMinNsmooth >=0 ) SmoothHistogram();
317 
318  // fill histogramm to graph
319  FillHistToGraph();
320 
321  // fGraph->Print();
322  switch (fInterpolMethod) {
323 
324  case kSpline0:
325  // use original histogram as reference
326  // this is useful, eg, for discrete variables
327  fUseHistogram = kTRUE;
328  break;
329 
330  case kSpline1:
331  fSpline = new TMVA::TSpline1( "spline1", new TGraph(*fGraph) );
332  break;
333 
334  case kSpline2:
335  fSpline = new TMVA::TSpline2( "spline2", new TGraph(*fGraph) );
336  break;
337 
338  case kSpline3:
339  fSpline = new TSpline3( "spline3", new TGraph(*fGraph) );
340  break;
341 
342  case kSpline5:
343  fSpline = new TSpline5( "spline5", new TGraph(*fGraph) );
344  break;
345 
346  default:
347  Log() << kWARNING << "No valid interpolation method given! Use Spline2" << Endl;
348  fSpline = new TMVA::TSpline2( "spline2", new TGraph(*fGraph) );
349  Log() << kFATAL << " Well.. .thinking about it, I better quit so you notice you are forced to fix the mistake " << Endl;
350  std::exit(1);
351  }
352  // fill into histogram
353  FillSplineToHist();
354 
355  if (!UseHistogram()) {
356  fSpline->SetTitle( (TString)fHist->GetTitle() + fSpline->GetTitle() );
357  fSpline->SetName ( (TString)fHist->GetName() + fSpline->GetName() );
358  }
359 
360 
361  // sanity check
362  Double_t integral = GetIntegral();
363  if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
364 
365  // normalize
366  if (fNormalize)
367  if (integral>0) fPDFHist->Scale( 1.0/integral );
368 
369  fPDFHist->SetDirectory(0);
370 }
371 
372 ////////////////////////////////////////////////////////////////////////////////
373 /// creates high-binned reference histogram to be used instead of the
374 /// PDF for speed reasons
375 
377 {
378  fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
379  fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_KDE" );
380  fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_KDE" );
381 
382  // create the kernel object
383  Float_t histoLowEdge = fHist->GetBinLowEdge(1);
384  Float_t histoUpperEdge = fPDFHist->GetBinLowEdge(fPDFHist->GetNbinsX()) + fPDFHist->GetBinWidth(fPDFHist->GetNbinsX());
385  KDEKernel *kern = new TMVA::KDEKernel( fKDEiter,
386  fHist, histoLowEdge, histoUpperEdge,
387  fKDEborder, fFineFactor );
388  kern->SetKernelType(fKDEtype);
389 
390  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
391  // loop over the bins of the original histo
392  for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
393  // loop over the bins of the new histo and fill it
394  fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
395  kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
396  fPDFHist->GetBinLowEdge(j+1),
397  fHist->GetBinCenter(i),
398  i)
399  );
400  }
401  if (fKDEborder == 3) { // mirror the saples and fill them again
402  // in order to save time do the mirroring only for the first (the lowwer) 1/5 of the histo to the left;
403  // and the last (the higher) 1/5 of the histo to the right.
404  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
405  if (i < fHist->GetNbinsX()/5 ) { // the first (the lowwer) 1/5 of the histo
406  for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
407  // loop over the bins of the PDF histo and fill it
408  fPDFHist->AddBinContent(j,fHist->GetBinContent(i)*
409  kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
410  fPDFHist->GetBinLowEdge(j+1),
411  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
412  i)
413  );
414  }
415  }
416  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
417  for (Int_t j=1;j<fPDFHist->GetNbinsX();j++) {
418  // loop over the bins of the PDF histo and fill it
419  fPDFHist->AddBinContent( j,fHist->GetBinContent(i)*
420  kern->GetBinKernelIntegral(fPDFHist->GetBinLowEdge(j),
421  fPDFHist->GetBinLowEdge(j+1),
422  2*histoUpperEdge-fHist->GetBinCenter(i), i) );
423  }
424  }
425  }
426  }
427 
428  fPDFHist->SetEntries(fHist->GetEntries());
429 
430  delete kern;
431 
432  // sanity check
433  Double_t integral = GetIntegral();
434  if (integral < 0) Log() << kFATAL << "Integral: " << integral << " <= 0" << Endl;
435 
436  // normalize
437  if (fNormalize)
438  if (integral>0) fPDFHist->Scale( 1.0/integral );
439  fPDFHist->SetDirectory(0);
440 }
441 
442 ////////////////////////////////////////////////////////////////////////////////
443 
445 {
446  if(fHist->GetNbinsX()==1) return;
447  if (fMaxNsmooth == fMinNsmooth) {
448  fHist->Smooth( fMinNsmooth );
449  return;
450  }
451 
452  //calculating Mean, RMS of the relative errors and using them to set
453  //the bounderies of the liniar transformation
454  Float_t Err=0, ErrAvg=0, ErrRMS=0 ; Int_t num=0, smooth;
455  for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
456  if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1)) continue;
457  Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
458  ErrAvg += Err; ErrRMS += Err*Err; num++;
459  }
460  ErrAvg /= num;
461  ErrRMS = TMath::Sqrt(ErrRMS/num-ErrAvg*ErrAvg) ;
462 
463  //liniarly convent the histogram to a vector of smoothnum
464  Float_t MaxErr=ErrAvg+ErrRMS, MinErr=ErrAvg-ErrRMS;
465  fNSmoothHist = new TH1I("","",fHist->GetNbinsX(),0,fHist->GetNbinsX());
466  fNSmoothHist->SetTitle( (TString)fHist->GetTitle() + "_Nsmooth" );
467  fNSmoothHist->SetName ( (TString)fHist->GetName() + "_Nsmooth" );
468  for (Int_t bin=0; bin<fHist->GetNbinsX(); bin++) {
469  if (fHist->GetBinContent(bin+1) <= fHist->GetBinError(bin+1))
470  smooth=fMaxNsmooth;
471  else {
472  Err = fHist->GetBinError(bin+1) / fHist->GetBinContent(bin+1);
473  smooth=(Int_t)((Err-MinErr) /(MaxErr-MinErr) * (fMaxNsmooth-fMinNsmooth)) + fMinNsmooth;
474  }
475  smooth = TMath::Max(fMinNsmooth,TMath::Min(fMaxNsmooth,smooth));
476  fNSmoothHist->SetBinContent(bin+1,smooth);
477  }
478 
479  //find regions of constant smoothnum, starting from the highest amount of
480  //smoothing. So the last iteration all the histogram will be smoothed as a whole
481  for (Int_t n=fMaxNsmooth; n>=0; n--) {
482  //all the histogram has to be smoothed at least fMinNsmooth
483  if (n <= fMinNsmooth) { fHist->Smooth(); continue; }
484  Int_t MinBin=-1,MaxBin =-1;
485  for (Int_t bin=0; bin < fHist->GetNbinsX(); bin++) {
486  if (fNSmoothHist->GetBinContent(bin+1) >= n) {
487  if (MinBin==-1) MinBin = bin;
488  else MaxBin=bin;
489  }
490  else if (MaxBin >= 0) {
491 #if ROOT_VERSION_CODE > ROOT_VERSION(5,19,2)
492  fHist->Smooth(1,"R");
493 #else
494  fHist->Smooth(1,MinBin+1,MaxBin+1);
495 #endif
496  MinBin=MaxBin=-1;
497  }
498  else // can't smooth a single bin
499  MinBin = -1;
500  }
501  }
502 }
503 
504 ////////////////////////////////////////////////////////////////////////////////
505 /// Simple conversion
506 
508 {
509  fGraph=new TGraph(fHist);
510 }
511 
512 ////////////////////////////////////////////////////////////////////////////////
513 /// creates high-binned reference histogram to be used instead of the
514 /// PDF for speed reasons
515 
517 {
518  if (UseHistogram()) {
519  // no spline given, use the original histogram
520  fPDFHist = (TH1*)fHist->Clone();
521  fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_spline0" );
522  fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_spline0" );
523  }
524  else {
525  // create new reference histogram
526  fPDFHist = new TH1F( "", "", fgNbin_PdfHist, GetXmin(), GetXmax() );
527  fPDFHist->SetTitle( (TString)fHist->GetTitle() + "_hist from_" + fSpline->GetTitle() );
528  fPDFHist->SetName ( (TString)fHist->GetName() + "_hist_from_" + fSpline->GetTitle() );
529 
530  for (Int_t bin=1; bin <= fgNbin_PdfHist; bin++) {
531  Double_t x = fPDFHist->GetBinCenter( bin );
532  Double_t y = fSpline->Eval( x );
533  // sanity correction: in cases where strong slopes exist, accidentally, the
534  // splines can go to zero; in this case we set the corresponding bin content
535  // equal to the bin content of the original histogram
536  if (y <= fgEpsilon) y = fHist->GetBinContent( fHist->FindBin( x ) );
537  fPDFHist->SetBinContent( bin, TMath::Max(y, fgEpsilon) );
538  }
539  }
540  fPDFHist->SetDirectory(0);
541 }
542 
543 ////////////////////////////////////////////////////////////////////////////////
544 /// sanity check: compare PDF with original histogram
545 
547 {
548  if (fHist == NULL) {
549  Log() << kFATAL << "<CheckHist> Called without valid histogram pointer!" << Endl;
550  }
551 
552  Int_t nbins = fHist->GetNbinsX();
553 
554  Int_t emptyBins=0;
555  // count number of empty bins
556  for (Int_t bin=1; bin<=nbins; bin++)
557  if (fHist->GetBinContent(bin) == 0) emptyBins++;
558 
559  if (((Float_t)emptyBins/(Float_t)nbins) > 0.5) {
560  Log() << kWARNING << "More than 50% (" << (((Float_t)emptyBins/(Float_t)nbins)*100)
561  <<"%) of the bins in hist '"
562  << fHist->GetName() << "' are empty!" << Endl;
563  Log() << kWARNING << "X_min=" << GetXmin()
564  << " mean=" << fHist->GetMean() << " X_max= " << GetXmax() << Endl;
565  }
566 }
567 
568 ////////////////////////////////////////////////////////////////////////////////
569 /// comparison of original histogram with reference PDF
570 
571 void TMVA::PDF::ValidatePDF( TH1* originalHist ) const
572 {
573  // if no histogram is given, use the original one (the one the PDF was made from)
574  if (!originalHist) originalHist = fHistOriginal;
575 
576  Int_t nbins = originalHist->GetNbinsX();
577 
578  // treat errors properly
579  if (originalHist->GetSumw2()->GetSize() == 0) originalHist->Sumw2();
580 
581  // ---- first validation: simple(st) possible chi2 test
582  // count number of empty bins
583  Double_t chi2 = 0;
584  Int_t ndof = 0;
585  Int_t nc1 = 0; // deviation counters
586  Int_t nc2 = 0; // deviation counters
587  Int_t nc3 = 0; // deviation counters
588  Int_t nc6 = 0; // deviation counters
589  for (Int_t bin=1; bin<=nbins; bin++) {
590  Double_t x = originalHist->GetBinCenter( bin );
591  Double_t y = originalHist->GetBinContent( bin );
592  Double_t ey = originalHist->GetBinError( bin );
593 
594  Int_t binPdfHist = fPDFHist->FindBin( x );
595  if (binPdfHist<0) continue; // happens only if hist-dim>3
596 
597  Double_t yref = GetVal( x );
598  Double_t rref = ( originalHist->GetSumOfWeights()/fPDFHist->GetSumOfWeights() *
599  originalHist->GetBinWidth( bin )/fPDFHist->GetBinWidth( binPdfHist ) );
600 
601  if (y > 0) {
602  ndof++;
603  Double_t d = TMath::Abs( (y - yref*rref)/ey );
604  // std::cout << "bin: " << bin << " val: " << x << " data(err): " << y << "(" << ey << ") pdf: "
605  // << yref << " dev(chi2): " << d << "(" << chi2 << ") rref: " << rref << std::endl;
606  chi2 += d*d;
607  if (d > 1) { nc1++; if (d > 2) { nc2++; if (d > 3) { nc3++; if (d > 6) nc6++; } } }
608  }
609  }
610 
611  Log() << "Validation result for PDF \"" << originalHist->GetTitle() << "\"" << ": " << Endl;
612  Log() << Form( " chi2/ndof(!=0) = %.1f/%i = %.2f (Prob = %.2f)",
613  chi2, ndof, chi2/ndof, TMath::Prob( chi2, ndof ) ) << Endl;
614  if ((1.0 - TMath::Prob( chi2, ndof )) > 0.9999994) {
615  Log() << kWARNING << "Comparison of the original histogram \"" << originalHist->GetTitle() << "\"" << Endl;
616  Log() << kWARNING << "with the corresponding PDF gave a chi2/ndof of " << chi2/ndof << "," << Endl;
617  Log() << kWARNING << "which corresponds to a deviation of more than 5 sigma! Please check!" << Endl;
618  }
619  Log() << Form( " #bins-found(#expected-bins) deviating > [1,2,3,6] sigmas: " \
620  "[%i(%i),%i(%i),%i(%i),%i(%i)]",
621  nc1, Int_t(TMath::Prob(1.0,1)*ndof), nc2, Int_t(TMath::Prob(4.0,1)*ndof),
622  nc3, Int_t(TMath::Prob(9.0,1)*ndof), nc6, Int_t(TMath::Prob(36.0,1)*ndof) ) << Endl;
623 }
624 
625 ////////////////////////////////////////////////////////////////////////////////
626 /// computes normalisation
627 
629 {
630  Double_t integral = fPDFHist->GetSumOfWeights();
631  integral *= GetPdfHistBinWidth();
632 
633  return integral;
634 }
635 
636 ////////////////////////////////////////////////////////////////////////////////
637 /// static external auxiliary function (integrand)
638 
640 {
641  return ThisPDF()->GetVal( x[0] );
642 }
643 
644 ////////////////////////////////////////////////////////////////////////////////
645 /// computes PDF integral within given ranges
646 
648 {
649  Double_t integral = 0;
650 
651  if (fgManualIntegration) {
652 
653  // compute integral by summing over bins
654  Int_t imin = fPDFHist->FindBin(xmin);
655  Int_t imax = fPDFHist->FindBin(xmax);
656  if (imin < 1) imin = 1;
657  if (imax > fPDFHist->GetNbinsX()) imax = fPDFHist->GetNbinsX();
658 
659  for (Int_t bini = imin; bini <= imax; bini++) {
660  Float_t dx = fPDFHist->GetBinWidth(bini);
661  // correct for bin fractions in first and last bin
662  if (bini == imin) dx = fPDFHist->GetBinLowEdge(bini+1) - xmin;
663  else if (bini == imax) dx = xmax - fPDFHist->GetBinLowEdge(bini);
664  if (dx < 0 && dx > -1.0e-8) dx = 0; // protect against float->double numerical effects
665  if (dx<0) {
666  Log() << kWARNING
667  << "dx = " << dx << std::endl
668  << "bini = " << bini << std::endl
669  << "xmin = " << xmin << std::endl
670  << "xmax = " << xmax << std::endl
671  << "imin = " << imin << std::endl
672  << "imax = " << imax << std::endl
673  << "low edge of imin" << fPDFHist->GetBinLowEdge(imin) << std::endl
674  << "low edge of imin+1" << fPDFHist->GetBinLowEdge(imin+1) << Endl;
675  Log() << kFATAL << "<GetIntegral> dx = " << dx << " < 0" << Endl;
676  }
677  integral += fPDFHist->GetBinContent(bini)*dx;
678  }
679 
680  }
681  else {
682 
683  // compute via Gaussian quadrature (C++ version of CERNLIB function DGAUSS)
684  if (fIGetVal == 0) fIGetVal = new TF1( "IGetVal", PDF::IGetVal, GetXmin(), GetXmax(), 0 );
685  integral = fIGetVal->Integral( xmin, xmax );
686  }
687 
688  return integral;
689 }
690 
691 ////////////////////////////////////////////////////////////////////////////////
692 /// returns value PDF(x)
693 
695 {
696  // check which is filled
697  Int_t bin = fPDFHist->FindBin(x);
698  bin = TMath::Max(bin,1);
699  bin = TMath::Min(bin,fPDFHist->GetNbinsX());
700 
701  Double_t retval = 0;
702 
703  if (UseHistogram()) {
704  // use directly histogram bins (this is for discrete PDFs)
705  retval = fPDFHist->GetBinContent( bin );
706  }
707  else {
708  // linear interpolation
709  Int_t nextbin = bin;
710  if ((x > fPDFHist->GetBinCenter(bin) && bin != fPDFHist->GetNbinsX()) || bin == 1)
711  nextbin++;
712  else
713  nextbin--;
714 
715  // linear interpolation between adjacent bins
716  Double_t dx = fPDFHist->GetBinCenter( bin ) - fPDFHist->GetBinCenter( nextbin );
717  Double_t dy = fPDFHist->GetBinContent( bin ) - fPDFHist->GetBinContent( nextbin );
718  retval = fPDFHist->GetBinContent( bin ) + (x - fPDFHist->GetBinCenter( bin ))*dy/dx;
719  }
720 
721  return TMath::Max( retval, fgEpsilon );
722 }
723 
724 ////////////////////////////////////////////////////////////////////////////////
725 /// returns value PDF^{-1}(y)
726 
727 Double_t TMVA::PDF::GetValInverse( Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
728 {
729  Int_t lowerBin=0, higherBin=0;
730  Double_t lowerBinValue=0, higherBinValue=0;
731  FindBinInverse(fPDFHist,lowerBin,higherBin,lowerBinValue,higherBinValue,y,isMonotonouslyIncreasingFunction);
732 
733  Double_t xValueLowerBin =fPDFHist->GetBinCenter (lowerBin);
734  Double_t xValueHigherBin=fPDFHist->GetBinCenter (higherBin);
735 
736  Double_t length =(higherBinValue-lowerBinValue);
737  Double_t fraction=lowerBinValue;
738  if (length>1.0e-10)
739  fraction=(y-lowerBinValue)/length;
740 
741  Double_t lengthX =xValueHigherBin-xValueLowerBin;
742  Double_t x =xValueLowerBin+lengthX*fraction;
743 
744  // comparison for test purposes
745 // std::cout << "lb " << lowerBin << " hb " << higherBin << " lbv " << lowerBinValue << " hbv " << higherBinValue << " frac " << fraction << std::endl;
746 // std::cout << "y " << y << " inv x " << x << " straight y " << GetVal(x) << std::endl;
747 
748  return x;
749 }
750 
751 ////////////////////////////////////////////////////////////////////////////////
752 /// find bin from value on ordinate
753 
754 void TMVA::PDF::FindBinInverse( const TH1* histogram, Int_t& lowerBin, Int_t& higherBin, Double_t& lowerBinValue, Double_t& higherBinValue,
755  Double_t y, Bool_t isMonotonouslyIncreasingFunction ) const
756 {
757  if (isMonotonouslyIncreasingFunction) {
758  higherBin=histogram->GetNbinsX();
759  lowerBin =0;
760 
761  Int_t bin=higherBin/2;
762 
763  while (bin>lowerBin && bin<higherBin) {
764  Double_t binContent=histogram->GetBinContent(bin);
765 
766  if (y<binContent) {
767  higherBin =bin;
768  higherBinValue=binContent;
769  }
770  else if (y>=binContent){
771  lowerBin =bin;
772  lowerBinValue =binContent;
773  }
774  bin=lowerBin+(higherBin-lowerBin)/2;
775  }
776  return;
777  }
778  // search sequentially
779  for (Int_t bin=0, binEnd=histogram->GetNbinsX(); bin<binEnd; ++bin) {
780  Double_t binContent=histogram->GetBinContent(bin);
781  if (binContent<y) {
782  lowerBin =bin;
783  higherBin=bin;
784  lowerBinValue =binContent;
785  higherBinValue=binContent;
786  }
787  else {
788  higherBin=bin;
789  higherBinValue=binContent;
790  break;
791  }
792  }
793 }
794 
795 
796 ////////////////////////////////////////////////////////////////////////////////
797 /// define the options (their key words) that can be set in the option string
798 /// know options:
799 /// PDFInterpol[ivar] <string> Spline0, Spline1, Spline2 <default>, Spline3, Spline5, KDE used to interpolate reference histograms
800 /// if no variable index is given, it is valid for ALL the variables
801 ///
802 /// NSmooth <int> how often the input histos are smoothed
803 /// MinNSmooth <int> min number of smoothing iterations, for bins with most data
804 /// MaxNSmooth <int> max number of smoothing iterations, for bins with least data
805 /// NAvEvtPerBin <int> minimum average number of events per PDF bin
806 /// TransformOutput <bool> transform (often strongly peaked) likelihood output through sigmoid inversion
807 /// fKDEtype <KernelType> type of the Kernel to use (1 is Gaussian)
808 /// fKDEiter <KerneIter> number of iterations (1 --> "static KDE", 2 --> "adaptive KDE")
809 /// fBorderMethod <KernelBorder> the method to take care about "border" effects (1=no treatment , 2=kernel renormalization, 3=sample mirroring)
810 
812 {
813  DeclareOptionRef( fNsmooth, Form("NSmooth%s",fSuffix.Data()),
814  "Number of smoothing iterations for the input histograms" );
815  DeclareOptionRef( fMinNsmooth, Form("MinNSmooth%s",fSuffix.Data()),
816  "Min number of smoothing iterations, for bins with most data" );
817 
818  DeclareOptionRef( fMaxNsmooth, Form("MaxNSmooth%s",fSuffix.Data()),
819  "Max number of smoothing iterations, for bins with least data" );
820 
821  DeclareOptionRef( fHistAvgEvtPerBin, Form("NAvEvtPerBin%s",fSuffix.Data()),
822  "Average number of events per PDF bin" );
823 
824  DeclareOptionRef( fHistDefinedNBins, Form("Nbins%s",fSuffix.Data()),
825  "Defined number of bins for the histogram from which the PDF is created" );
826 
827  DeclareOptionRef( fCheckHist, Form("CheckHist%s",fSuffix.Data()),
828  "Whether or not to check the source histogram of the PDF" );
829 
830  DeclareOptionRef( fInterpolateString, Form("PDFInterpol%s",fSuffix.Data()),
831  "Interpolation method for reference histograms (e.g. Spline2 or KDE)" );
832  AddPreDefVal(TString("Spline0")); // take histogram
833  AddPreDefVal(TString("Spline1")); // linear interpolation between bins
834  AddPreDefVal(TString("Spline2")); // quadratic interpolation
835  AddPreDefVal(TString("Spline3")); // cubic interpolation
836  AddPreDefVal(TString("Spline5")); // fifth order polynome interpolation
837  AddPreDefVal(TString("KDE")); // use kernel density estimator
838 
839  DeclareOptionRef( fKDEtypeString, Form("KDEtype%s",fSuffix.Data()), "KDE kernel type (1=Gauss)" );
840  AddPreDefVal(TString("Gauss"));
841 
842  DeclareOptionRef( fKDEiterString, Form("KDEiter%s",fSuffix.Data()), "Number of iterations (1=non-adaptive, 2=adaptive)" );
843  AddPreDefVal(TString("Nonadaptive"));
844  AddPreDefVal(TString("Adaptive"));
845 
846  DeclareOptionRef( fFineFactor , Form("KDEFineFactor%s",fSuffix.Data()),
847  "Fine tuning factor for Adaptive KDE: Factor to multyply the width of the kernel");
848 
849  DeclareOptionRef( fBorderMethodString, Form("KDEborder%s",fSuffix.Data()),
850  "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
851  AddPreDefVal(TString("None"));
852  AddPreDefVal(TString("Renorm"));
853  AddPreDefVal(TString("Mirror"));
854 
855  SetConfigName( GetName() );
856  SetConfigDescription( "Configuration options for the PDF class" );
857 }
858 
859 ////////////////////////////////////////////////////////////////////////////////
860 
862 {
863  if (fNsmooth < 0) fNsmooth = 0; // set back to default
864 
865  if (fMaxNsmooth < 0 || fMinNsmooth < 0) { // use "Nsmooth" variable
866  fMinNsmooth = fMaxNsmooth = fNsmooth;
867  }
868 
869  if (fMaxNsmooth < fMinNsmooth && fMinNsmooth >= 0) { // sanity check
870  Log() << kFATAL << "ERROR: MaxNsmooth = "
871  << fMaxNsmooth << " < MinNsmooth = " << fMinNsmooth << Endl;
872  }
873 
874  if (fMaxNsmooth < 0 || fMinNsmooth < 0) {
875  Log() << kFATAL << "ERROR: MaxNsmooth = "
876  << fMaxNsmooth << " or MinNsmooth = " << fMinNsmooth << " smaller than zero" << Endl;
877  }
878 
879  //processing the options
880  if (fInterpolateString == "Spline0") fInterpolMethod = TMVA::PDF::kSpline0;
881  else if (fInterpolateString == "Spline1") fInterpolMethod = TMVA::PDF::kSpline1;
882  else if (fInterpolateString == "Spline2") fInterpolMethod = TMVA::PDF::kSpline2;
883  else if (fInterpolateString == "Spline3") fInterpolMethod = TMVA::PDF::kSpline3;
884  else if (fInterpolateString == "Spline5") fInterpolMethod = TMVA::PDF::kSpline5;
885  else if (fInterpolateString == "KDE" ) fInterpolMethod = TMVA::PDF::kKDE;
886  else if (fInterpolateString != "" ) {
887  Log() << kFATAL << "unknown setting for option 'InterpolateMethod': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
888  }
889 
890  // init KDE options
891  if (fKDEtypeString == "Gauss" ) fKDEtype = KDEKernel::kGauss;
892  else if (fKDEtypeString != "" )
893  Log() << kFATAL << "unknown setting for option 'KDEtype': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
894  if (fKDEiterString == "Nonadaptive") fKDEiter = KDEKernel::kNonadaptiveKDE;
895  else if (fKDEiterString == "Adaptive" ) fKDEiter = KDEKernel::kAdaptiveKDE;
896  else if (fKDEiterString != "" )// nothing more known
897  Log() << kFATAL << "unknown setting for option 'KDEiter': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
898 
899  if ( fBorderMethodString == "None" ) fKDEborder= KDEKernel::kNoTreatment;
900  else if ( fBorderMethodString == "Renorm" ) fKDEborder= KDEKernel::kKernelRenorm;
901  else if ( fBorderMethodString == "Mirror" ) fKDEborder= KDEKernel::kSampleMirror;
902  else if ( fKDEiterString != "" ) { // nothing more known
903  Log() << kFATAL << "unknown setting for option 'KDEBorder': " << fKDEtypeString << ((fSuffix=="")?"":Form(" for pdf with suffix %s",fSuffix.Data())) << Endl;
904  }
905 }
906 
907 ////////////////////////////////////////////////////////////////////////////////
908 /// XML file writing
909 
910 void TMVA::PDF::AddXMLTo( void* parent )
911 {
912  void* pdfxml = gTools().AddChild(parent, "PDF");
913  gTools().AddAttr(pdfxml, "Name", fPDFName );
914  gTools().AddAttr(pdfxml, "MinNSmooth", fMinNsmooth );
915  gTools().AddAttr(pdfxml, "MaxNSmooth", fMaxNsmooth );
916  gTools().AddAttr(pdfxml, "InterpolMethod", fInterpolMethod );
917  gTools().AddAttr(pdfxml, "KDE_type", fKDEtype );
918  gTools().AddAttr(pdfxml, "KDE_iter", fKDEiter );
919  gTools().AddAttr(pdfxml, "KDE_border", fKDEborder );
920  gTools().AddAttr(pdfxml, "KDE_finefactor", fFineFactor );
921  void* pdfhist = gTools().AddChild(pdfxml,"Histogram" );
922  TH1* histToWrite = GetOriginalHist();
923  Bool_t hasEquidistantBinning = gTools().HistoHasEquidistantBins(*histToWrite);
924  gTools().AddAttr(pdfhist, "Name", histToWrite->GetName() );
925  gTools().AddAttr(pdfhist, "NBins", histToWrite->GetNbinsX() );
926  gTools().AddAttr(pdfhist, "XMin", histToWrite->GetXaxis()->GetXmin() );
927  gTools().AddAttr(pdfhist, "XMax", histToWrite->GetXaxis()->GetXmax() );
928  gTools().AddAttr(pdfhist, "HasEquidistantBins", hasEquidistantBinning );
929 
930  TString bincontent("");
931  for (Int_t i=0; i<histToWrite->GetNbinsX(); i++) {
932  bincontent += gTools().StringFromDouble(histToWrite->GetBinContent(i+1));
933  bincontent += " ";
934  }
935  gTools().AddRawLine(pdfhist, bincontent );
936 
937  if (!hasEquidistantBinning) {
938  void* pdfhistbins = gTools().AddChild(pdfxml,"HistogramBinning" );
939  gTools().AddAttr(pdfhistbins, "NBins", histToWrite->GetNbinsX() );
940  TString binns("");
941  for (Int_t i=1; i<=histToWrite->GetNbinsX()+1; i++) {
942  binns += gTools().StringFromDouble(histToWrite->GetXaxis()->GetBinLowEdge(i));
943  binns += " ";
944  }
945  gTools().AddRawLine(pdfhistbins, binns );
946  }
947 }
948 
949 ////////////////////////////////////////////////////////////////////////////////
950 /// XML file reading
951 
952 void TMVA::PDF::ReadXML( void* pdfnode )
953 {
954  UInt_t enumint;
955 
956  gTools().ReadAttr(pdfnode, "MinNSmooth", fMinNsmooth );
957  gTools().ReadAttr(pdfnode, "MaxNSmooth", fMaxNsmooth );
958  gTools().ReadAttr(pdfnode, "InterpolMethod", enumint ); fInterpolMethod = EInterpolateMethod(enumint);
959  gTools().ReadAttr(pdfnode, "KDE_type", enumint ); fKDEtype = KDEKernel::EKernelType(enumint);
960  gTools().ReadAttr(pdfnode, "KDE_iter", enumint ); fKDEiter = KDEKernel::EKernelIter(enumint);
961  gTools().ReadAttr(pdfnode, "KDE_border", enumint ); fKDEborder = KDEKernel::EKernelBorder(enumint);
962  gTools().ReadAttr(pdfnode, "KDE_finefactor", fFineFactor );
963 
964  TString hname;
965  UInt_t nbins;
966  Double_t xmin, xmax;
967  Bool_t hasEquidistantBinning;
968 
969  void* histch = gTools().GetChild(pdfnode);
970  gTools().ReadAttr( histch, "Name", hname );
971  gTools().ReadAttr( histch, "NBins", nbins );
972  gTools().ReadAttr( histch, "XMin", xmin );
973  gTools().ReadAttr( histch, "XMax", xmax );
974  gTools().ReadAttr( histch, "HasEquidistantBins", hasEquidistantBinning );
975 
976  // recreate the original hist
977  TH1* newhist = 0;
978  if (hasEquidistantBinning) {
979  newhist = new TH1F( hname, hname, nbins, xmin, xmax );
980  newhist->SetDirectory(0);
981  const char* content = gTools().GetContent(histch);
982  std::stringstream s(content);
983  Double_t val;
984  for (UInt_t i=0; i<nbins; i++) {
985  s >> val;
986  newhist->SetBinContent(i+1,val);
987  }
988  }
989  else {
990  const char* content = gTools().GetContent(histch);
991  std::stringstream s(content);
992  Double_t val;
993  void* binch = gTools().GetNextChild(histch);
994  UInt_t nbinning;
995  gTools().ReadAttr( binch, "NBins", nbinning );
996  TVectorD binns(nbinning+1);
997  if (nbinning != nbins) {
998  Log() << kFATAL << "Number of bins in content and binning array differs"<<Endl;
999  }
1000  const char* binString = gTools().GetContent(binch);
1001  std::stringstream sb(binString);
1002  for (UInt_t i=0; i<=nbins; i++) sb >> binns[i];
1003  newhist = new TH1F( hname, hname, nbins, binns.GetMatrixArray() );
1004  newhist->SetDirectory(0);
1005  for (UInt_t i=0; i<nbins; i++) {
1006  s >> val;
1007  newhist->SetBinContent(i+1,val);
1008  }
1009  }
1010 
1011  TString hnameSmooth = hname;
1012  hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1013 
1014  if (fHistOriginal != 0) delete fHistOriginal;
1015  fHistOriginal = newhist;
1016  fHist = (TH1F*)fHistOriginal->Clone( hnameSmooth );
1017  fHist->SetTitle( hnameSmooth );
1018  fHist->SetDirectory(0);
1019 
1020  if (fInterpolMethod == PDF::kKDE) BuildKDEPDF();
1021  else BuildSplinePDF();
1022 }
1023 
1024 ////////////////////////////////////////////////////////////////////////////////
1025 /// write the pdf
1026 
1027 std::ostream& TMVA::operator<< ( std::ostream& os, const PDF& pdf )
1028 {
1029  Int_t dp = os.precision();
1030  os << "MinNSmooth " << pdf.fMinNsmooth << std::endl;
1031  os << "MaxNSmooth " << pdf.fMaxNsmooth << std::endl;
1032  os << "InterpolMethod " << pdf.fInterpolMethod << std::endl;
1033  os << "KDE_type " << pdf.fKDEtype << std::endl;
1034  os << "KDE_iter " << pdf.fKDEiter << std::endl;
1035  os << "KDE_border " << pdf.fKDEborder << std::endl;
1036  os << "KDE_finefactor " << pdf.fFineFactor << std::endl;
1037 
1038  TH1* histToWrite = pdf.GetOriginalHist();
1039 
1040  const Int_t nBins = histToWrite->GetNbinsX();
1041 
1042  // note: this is a schema change introduced for v3.7.3
1043  os << "Histogram "
1044  << histToWrite->GetName()
1045  << " " << nBins // nbins
1046  << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmin() // x_min
1047  << " " << std::setprecision(12) << histToWrite->GetXaxis()->GetXmax() // x_max
1048  << std::endl;
1049 
1050  // write the smoothed hist
1051  os << "Weights " << std::endl;
1052  os << std::setprecision(8);
1053  for (Int_t i=0; i<nBins; i++) {
1054  os << std::setw(15) << std::left << histToWrite->GetBinContent(i+1) << std::right << " ";
1055  if ((i+1)%5==0) os << std::endl;
1056  }
1057 
1058  os << std::setprecision(dp);
1059  return os; // Return the output stream.
1060 }
1061 
1062 ////////////////////////////////////////////////////////////////////////////////
1063 /// read the tree from an std::istream
1064 
1065 std::istream& TMVA::operator>> ( std::istream& istr, PDF& pdf )
1066 {
1067  TString devnullS;
1068  Int_t valI;
1069  Int_t nbins=-1; // default binning will cause an exit
1070  Float_t xmin=-1., xmax=-1.;
1071  TString hname="_original";
1072  Bool_t doneReading = kFALSE;
1073  while (!doneReading) {
1074  istr >> devnullS;
1075  if (devnullS=="NSmooth")
1076  {istr >> pdf.fMinNsmooth; pdf.fMaxNsmooth=pdf.fMinNsmooth;}
1077  else if (devnullS=="MinNSmooth") istr >> pdf.fMinNsmooth;
1078  else if (devnullS=="MaxNSmooth") istr >> pdf.fMaxNsmooth;
1079  // have to do this with strings to be more stable with developing code
1080  else if (devnullS == "InterpolMethod") { istr >> valI; pdf.fInterpolMethod = PDF::EInterpolateMethod(valI);}
1081  else if (devnullS == "KDE_type") { istr >> valI; pdf.fKDEtype = KDEKernel::EKernelType(valI); }
1082  else if (devnullS == "KDE_iter") { istr >> valI; pdf.fKDEiter = KDEKernel::EKernelIter(valI);}
1083  else if (devnullS == "KDE_border") { istr >> valI; pdf.fKDEborder = KDEKernel::EKernelBorder(valI);}
1084  else if (devnullS == "KDE_finefactor") {
1085  istr >> pdf.fFineFactor;
1086  if (pdf.GetReadingVersion() != 0 && pdf.GetReadingVersion() < TMVA_VERSION(3,7,3)) { // here we expect the histogram limits if the version is below 3.7.3. When version == 0, the newest TMVA version is assumed.
1087  // coverity[tainted_data_argument]
1088  istr >> nbins >> xmin >> xmax;
1089  doneReading = kTRUE;
1090  }
1091  }
1092  else if (devnullS == "Histogram") { istr >> hname >> nbins >> xmin >> xmax; }
1093  else if (devnullS == "Weights") { doneReading = kTRUE; }
1094  }
1095 
1096  TString hnameSmooth = hname;
1097  hnameSmooth.ReplaceAll( "_original", "_smoothed" );
1098 
1099  // recreate the original hist
1100  if (nbins==-1) {
1101  std::cout << "PDF, trying to create a histogram without defined binning"<< std::endl;
1102  std::exit(1);
1103  }
1104  TH1* newhist = new TH1F( hname,hname, nbins, xmin, xmax );
1105  newhist->SetDirectory(0);
1106  Float_t val;
1107  for (Int_t i=0; i<nbins; i++) {
1108  istr >> val;
1109  newhist->SetBinContent(i+1,val);
1110  }
1111 
1112  if (pdf.fHistOriginal != 0) delete pdf.fHistOriginal;
1113  pdf.fHistOriginal = newhist;
1114  pdf.fHist = (TH1F*)pdf.fHistOriginal->Clone( hnameSmooth );
1115  pdf.fHist->SetTitle( hnameSmooth );
1116  pdf.fHist->SetDirectory(0);
1117 
1118  if (pdf.fMinNsmooth>=0) pdf.BuildSplinePDF();
1119  else {
1120  pdf.fInterpolMethod = PDF::kKDE;
1121  pdf.BuildKDEPDF();
1122  }
1123 
1124  return istr;
1125 }
1126 
1128 {
1129  // return global "this" pointer of PDF
1130  return GetThisPdfThreadLocal();
1131 }
virtual const char * GetTitle() const
Returns title of object.
Definition: TNamed.h:52
Int_t fMaxNsmooth
Definition: PDF.h:177
TString fBorderMethodString
Definition: PDF.h:193
float xmin
Definition: THbookFile.cxx:93
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:952
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4629
Int_t fHistAvgEvtPerBin
Definition: PDF.h:188
static const Double_t fgEpsilon
Definition: PDF.h:171
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8266
ClassImp(TMVA::PDF) TMVA
default constructor needed for ROOT I/O
Definition: PDF.cxx:53
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
void SmoothHistogram()
Definition: PDF.cxx:444
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:249
Double_t GetIntegral() const
computes normalisation
Definition: PDF.cxx:628
Float_t fFineFactor
Definition: PDF.h:199
void FindBinInverse(const TH1 *histogram, Int_t &lowerBin, Int_t &higherBin, Double_t &lowerBinValue, Double_t &higherBinValue, Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
find bin from value on ordinate
Definition: PDF.cxx:754
THist< 1, float > TH1F
Definition: THist.h:315
virtual Double_t GetBinLowEdge(Int_t bin) const
Return low edge of bin.
Definition: TAxis.cxx:489
static const Int_t fgNbin_PdfHist
Definition: PDF.h:169
THist< 1, int > TH1I
Definition: THist.h:317
Basic string class.
Definition: TString.h:137
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
TString fKDEtypeString
Definition: PDF.h:191
int nbins[3]
PDF(const TString &name, Bool_t norm=kTRUE)
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
virtual Double_t GetEntries() const
return the current number of entries
Definition: TH1.cxx:4051
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
Double_t Prob(Double_t chi2, Int_t ndf)
Computation of the probability for a certain Chi-squared (chi2) and number of degrees of freedom (ndf...
Definition: TMath.cxx:619
TObject * Clone(const char *newname=0) const
Make a complete copy of the underlying object.
Definition: TH1.cxx:2565
virtual Double_t GetBinWidth(Int_t bin) const
return bin width for 1D historam Better to use h1.GetXaxis().GetBinWidth(bin)
Definition: TH1.cxx:8492
TMVA::PDF::EInterpolateMethod fInterpolMethod
Definition: PDF.h:180
Tools & gTools()
Definition: Tools.cxx:79
Double_t x[n]
Definition: legend1.C:17
static PDF * ThisPDF(void)
Definition: PDF.cxx:1127
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Bool_t fCheckHist
Definition: PDF.h:203
virtual TArrayD * GetSumw2()
Definition: TH1.h:312
Definition: PDF.h:71
TString fKDEiterString
Definition: PDF.h:192
static const Bool_t fgManualIntegration
Definition: PDF.h:170
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:571
Double_t GetXmin() const
Definition: TAxis.h:137
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1198
void BuildSplinePDF()
build the PDF from the original histograms
Definition: PDF.cxx:310
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:293
virtual Double_t GetBinCenter(Int_t bin) const
return bin center for 1D historam Better to use h1.GetXaxis().GetBinCenter(bin)
Definition: TH1.cxx:8470
TH1 * fHistOriginal
Definition: PDF.h:184
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1241
Double_t length(const TVector2 &v)
Definition: CsgOps.cxx:347
TH1 * GetOriginalHist() const
Definition: PDF.h:102
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8543
Bool_t HistoHasEquidistantBins(const TH1 &h)
Definition: Tools.cxx:1494
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
const Handle_t kNone
Definition: GuiTypes.h:89
virtual Double_t GetSumOfWeights() const
Return the sum of weights excluding under/overflows.
Definition: TH1.cxx:7354
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
const char * GetContent(void *node)
XML helpers.
Definition: Tools.cxx:1182
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:295
float xmax
Definition: THbookFile.cxx:93
UInt_t GetReadingVersion() const
Definition: PDF.h:128
Double_t GetValInverse(Double_t y, Bool_t isMonotonouslyIncreasingFunction=kFALSE) const
returns value PDF^{-1}(y)
Definition: PDF.cxx:727
Int_t fNsmooth
Definition: PDF.h:175
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
print the tree recursinvely using the << operator
Definition: BinaryTree.cxx:155
std::istream & operator>>(std::istream &istr, BinaryTree &tree)
read the tree from an std::istream
Definition: BinaryTree.cxx:201
Int_t fMinNsmooth
Definition: PDF.h:176
MsgLogger * fLogger
the suffix for options
Definition: PDF.h:207
Int_t GetSize() const
Definition: TArray.h:49
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: PDFInterpol[i...
Definition: PDF.cxx:811
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
void CheckHist() const
sanity check: compare PDF with original histogram
Definition: PDF.cxx:546
void BuildKDEPDF()
creates high-binned reference histogram to be used instead of the PDF for speed reasons ...
Definition: PDF.cxx:376
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition: KDEKernel.cxx:217
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:694
double Double_t
Definition: RtypesCore.h:55
Int_t fHistDefinedNBins
Definition: PDF.h:189
Double_t GetXmax() const
Definition: TAxis.h:138
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Double_t y[n]
Definition: legend1.C:17
Double_t ey[n]
Definition: legend1.C:17
The TH1 histogram class.
Definition: TH1.h:80
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:910
#define name(a, b)
Definition: linkTestLib0.cpp:5
Abstract ClassifierFactory template that handles arbitrary types.
TH1 * fHist
Definition: PDF.h:183
void Err(int level, const char *msg, int size)
virtual ~PDF()
Definition: PDF.cxx:235
void ProcessOptions()
Definition: PDF.cxx:861
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
1-Dim function class
Definition: TF1.h:149
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8350
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
static Double_t IGetVal(Double_t *, Double_t *)
static external auxiliary function (integrand)
Definition: PDF.cxx:639
#define NULL
Definition: Rtypes.h:82
KDEKernel::EKernelIter fKDEiter
Definition: PDF.h:197
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition: KDEKernel.cxx:112
virtual void SetTitle(const char *title)
Change (i.e.
Definition: TH1.cxx:6268
TString fInterpolateString
Definition: PDF.h:194
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
virtual Double_t GetBinError(Int_t bin) const
Return value of error associated to bin number bin.
Definition: TH1.cxx:8395
const Bool_t kTRUE
Definition: Rtypes.h:91
void FillSplineToHist()
creates high-binned reference histogram to be used instead of the PDF for speed reasons ...
Definition: PDF.cxx:516
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
EInterpolateMethod
Definition: PDF.h:78
KDEKernel::EKernelBorder fKDEborder
Definition: PDF.h:198
KDEKernel::EKernelType fKDEtype
Definition: PDF.h:196
const Int_t n
Definition: legend1.C:16
void FillHistToGraph()
Simple conversion.
Definition: PDF.cxx:507
Definition: math.cpp:60
TAxis * GetXaxis()
Definition: TH1.h:319