doc/v630/KDEKernel_8cxx_source.html

// @(#)root/tmva $Id$

// Author: Asen Christov


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis       *

 * Package: TMVA                                                                  *

 * Class  : TMVA::KDEKernel                                                       *

 * Web    : http://tmva.sourceforge.net                                           *

 *                                                                                *

 * Description:                                                                   *

 *      Implementation (see header for description)                               *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Asen Christov   <christov@physik.uni-freiburg.de> - Freiburg U., Germany  *

 *                                                                                *

 * Copyright (c) 2005:                                                            *

 *      CERN, Switzerland                                                         *

 *      MPI-K Heidelberg, Germany                                                 *

 *      Freiburg U., Germany                                                      *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (http://tmva.sourceforge.net/LICENSE)                                          *

 **********************************************************************************/


/*! \class TMVA::KDEKernel

\ingroup TMVA


KDE Kernel for "smoothing" the PDFs.


*/


#include "TH1.h"

#include "TH1F.h"

#include "TF1.h"


#include "TMath.h"


#include "TMVA/KDEKernel.h"

#include "TMVA/MsgLogger.h"

#include "TMVA/Types.h"


ClassImp(TMVA::KDEKernel);


////////////////////////////////////////////////////////////////////////////////

/// constructor

/// sanity check


TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,

                            EKernelBorder kborder, Float_t FineFactor )

: fSigma( 1. ),

   fIter ( kiter ),

   fLowerEdge (lower_edge ),

   fUpperEdge (upper_edge),

   fFineFactor ( FineFactor ),

   fKernel_integ ( 0 ),

   fKDEborder ( kborder ),

   fLogger( new MsgLogger("KDEKernel") )

{

   if (hist == NULL) {

      Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;

   }


   fHist          = (TH1F*)hist->Clone();

   fFirstIterHist = (TH1F*)hist->Clone();

   fFirstIterHist->Reset(); // now it is empty but with the proper binning

   fSigmaHist     = (TH1F*)hist->Clone();

   fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning


   fHiddenIteration=false;

}


////////////////////////////////////////////////////////////////////////////////

/// destructor


TMVA::KDEKernel::~KDEKernel()

{

   if (fHist           != NULL) delete fHist;

   if (fFirstIterHist  != NULL) delete fFirstIterHist;

   if (fSigmaHist      != NULL) delete fSigmaHist;

   if (fKernel_integ   != NULL) delete fKernel_integ;

   delete fLogger;

}


////////////////////////////////////////////////////////////////////////////////

/// when using Gaussian as Kernel function this is faster way to calculate the integrals


Double_t GaussIntegral(Double_t *x, Double_t *par)

{

   if ( (par[1]<=0) || (x[0]>x[1])) return -1.;


   Float_t xs1=(x[0]-par[0])/par[1];

   Float_t xs2=(x[1]-par[0])/par[1];


   if (xs1==0) {

      if (xs2==0) return 0.;

      if (xs2>0 ) return 0.5*TMath::Erf(xs2);

   }

   if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));

   if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));

   if (xs1<0) {

      if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));

      else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));

   }

   return -1.;

}


////////////////////////////////////////////////////////////////////////////////

/// fIter == 1 ---> nonadaptive KDE

/// fIter == 2 ---> adaptive KDE


void TMVA::KDEKernel::SetKernelType( EKernelType ktype )

{

   if (ktype == kGauss) {


      // i.e. gauss kernel

      //

      // this is going to be done for both (nonadaptive KDE and adaptive KDE)

      // for nonadaptive KDE this is the only = final thing to do

      // for adaptive KDE this is going to be used in the first (hidden) iteration

      fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);

      fSigma = ( TMath::Sqrt(2.0)

                 *TMath::Power(4./3., 0.2)

                 *fHist->GetRMS()

                 *TMath::Power(fHist->Integral(), -0.2) );

      // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).

      // formula found in:

      // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley

      if (fSigma <= 0 ) {

         Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;

      }

   }


   if (fIter == kAdaptiveKDE) {


      // this is done only for adaptive KDE


      // fill a temporary histo using nonadaptive KDE

      // this histo is identical with the final output when using only nonadaptive KDE

      fHiddenIteration=true;


      Float_t histoLowEdge=fHist->GetBinLowEdge(1);

      Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);


      for (Int_t i=1;i<fHist->GetNbinsX();i++) {

         // loop over the bins of the original histo

         for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {

            // loop over the bins of the PDF histo and fill it

            fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*

                                          this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),

                                                                     fFirstIterHist->GetBinLowEdge(j+1),

                                                                     fHist->GetBinCenter(i),

                                                                     i)

                                          );

         }

         if (fKDEborder == 3) { // mirror the samples and fill them again

            // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;

            // and the last (the higher) 1/5 of the histo to the right.

            // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...

            if (i < fHist->GetNbinsX()/5  ) {  // the first (the lower) 1/5 of the histo

               for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {

                  // loop over the bins of the PDF histo and fill it

                  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*

                                                this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),

                                                                           fFirstIterHist->GetBinLowEdge(j+1),

                                                                           2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left

                                                                           i)

                                                );

               }

            }

            if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo

               for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {

                  // loop over the bins of the PDF histo and fill it

                  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*

                                                this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),

                                                                           fFirstIterHist->GetBinLowEdge(j+1),

                                                                           2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right

                                                                           i)

                                                );

               }

            }

         }

      }


      fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo


      // do "function like" integration = sum of (bin_width*bin_content):

      Float_t integ=0;

      for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)

         integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);

      fFirstIterHist->Scale(1./integ);


      fHiddenIteration=false;


      // OK, now we have the first iteration,

      // next: calculate the Sigmas (Widths) for the second (adaptive) iteration

      // based on the output of the first iteration

      // these Sigmas will be stored in histo called fSigmaHist

      for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {

         // loop over the bins of the PDF histo and fill fSigmaHist

         if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {

            Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;

         }


         fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));

      }

   }


   if (fKernel_integ ==0 ) {

      Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;

   }

}


////////////////////////////////////////////////////////////////////////////////

/// calculates the integral of the Kernel


Float_t TMVA::KDEKernel::GetBinKernelIntegral( Float_t lowr, Float_t highr, Float_t mean, Int_t binnum )

{

   if ((fIter == kNonadaptiveKDE) || fHiddenIteration  )

      fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE

   else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )

      fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE


   if ( fKDEborder == 2 ) {  // renormalization of the kernel function

      Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);

      return (renormFactor*fKernel_integ->Eval(lowr,highr));

   }


   // the RenormFactor takes care about the "border" effects, i.e. sets the

   // integral to one inside the histogram borders

   return (fKernel_integ->Eval(lowr,highr));

}


GaussIntegral
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals
Definition KDEKernel.cxx:88

KDEKernel.h

MsgLogger.h

Float_t
float Float_t
Definition RtypesCore.h:57

Double_t
double Double_t
Definition RtypesCore.h:59

ClassImp
#define ClassImp(name)
Definition Rtypes.h:377

TF1.h

TH1F.h

TH1.h

TMath.h

TF1
1-Dim function class
Definition TF1.h:214

TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition TH1.h:577

TH1F::Reset
void Reset(Option_t *option="") override
Reset.
Definition TH1.cxx:10030

TH1
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58

TH1::Clone
TObject * Clone(const char *newname="") const override
Make a complete copy of the underlying object.
Definition TH1.cxx:2734

TMVA::KDEKernel
KDE Kernel for "smoothing" the PDFs.
Definition KDEKernel.h:50

TMVA::KDEKernel::fHist
TH1F * fHist
copy of input histogram
Definition KDEKernel.h:82

TMVA::KDEKernel::EKernelBorder
EKernelBorder
Definition KDEKernel.h:56

TMVA::KDEKernel::~KDEKernel
virtual ~KDEKernel(void)
destructor
Definition KDEKernel.cxx:76

TMVA::KDEKernel::fSigmaHist
TH1F * fSigmaHist
contains the Sigmas Widths for adaptive KDE
Definition KDEKernel.h:84

TMVA::KDEKernel::fFirstIterHist
TH1F * fFirstIterHist
histogram to be filled in the hidden iteration
Definition KDEKernel.h:83

TMVA::KDEKernel::EKernelIter
EKernelIter
Definition KDEKernel.h:55

TMVA::KDEKernel::EKernelType
EKernelType
Definition KDEKernel.h:54

TMVA::KDEKernel::SetKernelType
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 —> nonadaptive KDE fIter == 2 —> adaptive KDE
Definition KDEKernel.cxx:112

TMVA::KDEKernel::KDEKernel
KDEKernel(EKernelIter kiter=kNonadaptiveKDE, const TH1 *hist=nullptr, Float_t lower_edge=0., Float_t upper_edge=1., EKernelBorder kborder=kNoTreatment, Float_t FineFactor=1.)
constructor sanity check
Definition KDEKernel.cxx:49

TMVA::KDEKernel::Log
MsgLogger & Log() const
Definition KDEKernel.h:89

TMVA::KDEKernel::fHiddenIteration
Bool_t fHiddenIteration
Defines if whats currently running is the.
Definition KDEKernel.h:85

TMVA::KDEKernel::GetBinKernelIntegral
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
Definition KDEKernel.cxx:217

TMVA::MsgLogger
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57

int

x
Double_t x[n]
Definition legend1.C:17

TMVA::DNN::EActivationFunction::kGauss
@ kGauss

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148

TMath::Erf
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition TMath.cxx:190

TMath::Sqrt
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:662

TMath::Power
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Definition TMath.h:721

TMath::Abs
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:123

Types.h