//Begin_Html
/*
Likelihood analysis ("non-parametric approach")
<p>
Also implemented is a "diagonalized likelihood approach",
which improves over the uncorrelated likelihood ansatz by
transforming linearly the input variables into a diagonal space,
using the square-root of the covariance matrix
<p>
The method of maximum likelihood is the most straightforward, and
certainly among the most elegant multivariate analyser approaches.
We define the likelihood ratio, <i>R<sub>L</sub></i>, for event
<i>i</i>, by:<br>
<center>
<img vspace=6 src="gif/tmva_likratio.gif" align="bottom" >
</center>
Here the signal and background likelihoods, <i>L<sub>S</sub></i>,
<i>L<sub>B</sub></i>, are products of the corresponding probability
densities, <i>p<sub>S</sub></i>, <i>p<sub>B</sub></i>, of the
<i>N</i><sub>var</sub> discriminating variables used in the MVA: <br>
<center>
<img vspace=6 src="gif/tmva_lik.gif" align="bottom" >
</center>
and accordingly for L<sub>B</sub>.
In practise, TMVA uses polynomial splines to estimate the probability
density functions (PDF) obtained from the distributions of the
training variables.<br>
<p>
Note that in TMVA the output of the likelihood ratio is transformed
by<br>
<center>
<img vspace=6 src="gif/tmva_likratio_trans.gif" align="bottom"/>
</center>
to avoid the occurrence of heavy peaks at <i>R<sub>L</sub></i>=0,1.
<b>Decorrelated (or "diagonalized") Likelihood</b>
<p>
The biggest drawback of the Likelihood approach is that it assumes
that the discriminant variables are uncorrelated. If it were the case,
it can be proven that the discrimination obtained by the above likelihood
ratio is optimal, ie, no other method can beat it. However, in most
practical applications of MVAs correlations are present. <br><p></p>
<p>
Linear correlations, measured from the training sample, can be taken
into account in a straightforward manner through the square-root
of the covariance matrix. The square-root of a matrix
<i>C</i> is the matrix <i>C′</i> that multiplied with itself
yields <i>C</i>: <i>C</i>=<i>C′C′</i>. We compute the
square-root matrix (SQM) by means of diagonalising (<i>D</i>) the
covariance matrix: <br>
<center>
<img vspace=6 src="gif/tmva_sqm.gif" align="bottom" >
</center>
and the linear transformation of the linearly correlated into the
uncorrelated variables space is then given by multiplying the measured
variable tuple by the inverse of the SQM. Note that these transformations
are performed for both signal and background separately, since the
correlation pattern is not the same in the two samples.
<p>
The above diagonalisation is complete for linearly correlated,
Gaussian distributed variables only. In real-world examples this
is not often the case, so that only little additional information
may be recovered by the diagonalisation procedure. In these cases,
non-linear methods must be applied.
*/
//End_Html
#include "TMatrixD.h"
#include "TVector.h"
#include "TObjString.h"
#include "TFile.h"
#include "TKey.h"
#ifndef ROOT_TMVA_MethodLikelihood
#include "TMVA/MethodLikelihood.h"
#endif
#ifndef ROOT_TMVA_Tools
#include "TMVA/Tools.h"
#endif
ClassImp(TMVA::MethodLikelihood)
;
TMVA::MethodLikelihood::MethodLikelihood( TString jobName, TString methodTitle, DataSet& theData,
TString theOption, TDirectory* theTargetDir )
: TMVA::MethodBase( jobName, methodTitle, theData, theOption, theTargetDir )
{
InitLik();
DeclareOptions();
ParseOptions();
ProcessOptions();
if (HasTrainingTree()) {
Int_t nsig = Data().GetNEvtSigTrain();
Int_t nbgd = Data().GetNEvtBkgdTrain();
fLogger << kVERBOSE << "num of events for training (signal, background): "
<< " (" << nsig << ", " << nbgd << ")" << Endl;
if (nsig != nbgd) {
fLogger << kWARNING << "\t--------------------------------------------------" << Endl;
fLogger << kWARNING << "\tWarning: different number of signal and background\n"
<< "--- " << GetName() << " \tevents: Likelihood training will not be optimal :-("
<< Endl;
fLogger << kWARNING << "\t--------------------------------------------------" << Endl;
}
}
}
TMVA::MethodLikelihood::MethodLikelihood( DataSet& theData,
TString theWeightFile,
TDirectory* theTargetDir )
: TMVA::MethodBase( theData, theWeightFile, theTargetDir )
{
DeclareOptions();
InitLik();
}
void TMVA::MethodLikelihood::InitLik( void )
{
fHistBgd = NULL;
fHistSig_smooth = NULL;
fHistBgd_smooth = NULL;
fPDFSig = NULL;
fPDFBgd = NULL;
SetMethodName( "Likelihood" );
SetMethodType( TMVA::Types::kLikelihood );
SetTestvarName();
fEpsilon = 1e-5;
fBgdPDFHist = new TList();
fSigPDFHist = new TList();
fHistSig = new vector<TH1*> ( GetNvar() );
fHistBgd = new vector<TH1*> ( GetNvar() );
fHistSig_smooth = new vector<TH1*> ( GetNvar() );
fHistBgd_smooth = new vector<TH1*> ( GetNvar() );
fPDFSig = new vector<TMVA::PDF*>( GetNvar() );
fPDFBgd = new vector<TMVA::PDF*>( GetNvar() );
fIndexSig = new vector<UInt_t> ( GetNvar() );
fIndexBgd = new vector<UInt_t> ( GetNvar() );
}
void TMVA::MethodLikelihood::DeclareOptions()
{
DeclareOptionRef(fSpline=2,"Spline","spline used to interpolate reference histograms");
AddPreDefVal(0);
AddPreDefVal(1);
AddPreDefVal(2);
AddPreDefVal(3);
AddPreDefVal(5);
DeclareOptionRef( fNsmooth = 0, "NSmooth",
"how often the input histos are smoothed");
DeclareOptionRef( fAverageEvtPerBin = 25, "NAvEvtPerBin",
"average num of events per PDF bin to trigger warning");
DeclareOptionRef( fTransformLikelihoodOutput = kFALSE, "TransformOutput",
"transform (often strongly peaked) likelihood output through sigmoid inversion" );
}
void TMVA::MethodLikelihood::ProcessOptions()
{
MethodBase::ProcessOptions();
if (fSpline == 0) fSmoothMethod = TMVA::PDF::kSpline0;
else if (fSpline == 1) fSmoothMethod = TMVA::PDF::kSpline1;
else if (fSpline == 2) fSmoothMethod = TMVA::PDF::kSpline2;
else if (fSpline == 3) fSmoothMethod = TMVA::PDF::kSpline3;
else if (fSpline == 5) fSmoothMethod = TMVA::PDF::kSpline5;
else {
fLogger << kWARNING << "unknown Spline type! Choose Spline2" << Endl;
fSmoothMethod = TMVA::PDF::kSpline2;
}
if (GetPreprocessingMethod() == Types::kDecorrelated)
fLogger << kINFO << "use decorrelated variable set" << Endl;
else if (GetPreprocessingMethod() == Types::kPCA)
fLogger << kINFO << "use principal component preprocessing" << Endl;
}
TMVA::MethodLikelihood::~MethodLikelihood( void )
{
if (NULL != fHistSig) delete fHistSig;
if (NULL != fHistBgd) delete fHistBgd;
if (NULL != fHistSig_smooth) delete fHistSig_smooth;
if (NULL != fHistBgd_smooth) delete fHistBgd_smooth;
if (NULL != fPDFSig) delete fPDFSig;
if (NULL != fPDFBgd) delete fPDFBgd;
if (NULL != fIndexSig) delete fIndexSig;
if (NULL != fIndexBgd) delete fIndexBgd;
if (NULL != fFin) { fFin->Close(); delete fFin; }
delete fBgdPDFHist;
delete fSigPDFHist;
}
void TMVA::MethodLikelihood::Train( void )
{
if (!CheckSanity()) {
fLogger << kFATAL << "sanity check failed" << Endl;
}
UInt_t nbins = (Int_t)(TMath::Min(Data().GetNEvtSigTrain(),Data().GetNEvtBkgdTrain())/fAverageEvtPerBin);
TString histTitle, histName;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
histTitle = (*fInputVars)[ivar] + " signal training";
histName = (*fInputVars)[ivar] + "_sig";
(*fHistSig)[ivar] = new TH1F( histName, histTitle, nbins,
GetXmin(ivar,GetPreprocessingMethod()),
GetXmax(ivar,GetPreprocessingMethod()));
histTitle = (*fInputVars)[ivar] + " background training";
histName = (*fInputVars)[ivar] + "_bgd";
(*fHistBgd)[ivar] = new TH1F( histName, histTitle, nbins,
GetXmin(ivar, GetPreprocessingMethod()),
GetXmax(ivar, GetPreprocessingMethod()));
}
for (Int_t ievt=0; ievt<Data().GetNEvtTrain(); ievt++) {
ReadTrainingEvent( ievt, Types::kTrueType );
Float_t weight = GetEventWeight();
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Float_t value = GetEventVal(ivar);
if (Data().Event().IsSignal()) (*fHistSig)[ivar]->Fill( value, weight );
else (*fHistBgd)[ivar]->Fill( value, weight );
}
}
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
for (UInt_t itype=0; itype < 2; itype++) {
TH1F* htmp = 0;
if (itype == 0) {
htmp = (TH1F*)(*fHistSig)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
histName = (*fInputVars)[ivar] + "_sig_smooth";
}
else {
htmp = (TH1F*)(*fHistBgd)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " background training smoothed ";
histName = (*fInputVars)[ivar] + "_bgd_smooth";
}
histTitle += fNsmooth;
histTitle += " times";
htmp->SetName(histName);
htmp->SetTitle(histTitle);
TMVA::PDF* ptmp;
if (Data().VarTypeOriginal(ivar) == 'I') {
ptmp = new TMVA::PDF( htmp, PDF::kSpline0 );
}
else {
if (htmp->GetNbinsX() > 2&& fNsmooth >0) htmp->Smooth(fNsmooth);
else {
if (htmp->GetNbinsX() <=2)
fLogger << kWARNING << "histogram "<< htmp->GetName()
<< " has not enough (" << htmp->GetNbinsX()
<< ") bins for for smoothing " << Endl;
}
ptmp = new TMVA::PDF( htmp, fSmoothMethod );
}
if (itype == 0) {
(*fHistSig_smooth)[ivar] = htmp;
(*fPDFSig)[ivar] = ptmp;
(*fIndexSig)[ivar] = ivar;
}
else {
(*fHistBgd_smooth)[ivar] = htmp;
(*fPDFBgd)[ivar] = ptmp;
(*fIndexBgd)[ivar] = ivar;
}
}
}
}
Double_t TMVA::MethodLikelihood::GetMvaValue()
{
Int_t ivar;
TVector vs( GetNvar() );
TVector vb( GetNvar() );
Event eventBackup( Data().Event() );
Data().ApplyTransformation( GetPreprocessingMethod(), kTRUE );
for (ivar=0; ivar<GetNvar(); ivar++) vs(ivar) = GetEventVal(ivar);
Data().Event().CopyVarValues( eventBackup );
Data().ApplyTransformation( GetPreprocessingMethod(), kFALSE );
for (ivar=0; ivar<GetNvar(); ivar++) vb(ivar) = GetEventVal(ivar);
Double_t ps = 1;
Double_t pb = 1;
for (ivar=0; ivar<GetNvar(); ivar++) {
Double_t x[2] = { vs(ivar), vb(ivar) };
for (UInt_t itype=0; itype < 2; itype++) {
TH1F* hist = 0;
if (itype == 0) hist = (TH1F*)fSigPDFHist->At((*fIndexSig)[ivar]);
else hist = (TH1F*)fBgdPDFHist->At((*fIndexBgd)[ivar]);
Int_t bin = hist->FindBin(x[itype]);
Double_t p;
if (fSmoothMethod == TMVA::PDF::kSpline0) {
p = TMath::Max( hist->GetBinContent(bin), fEpsilon );
}
else {
Int_t nextbin = bin;
if ((x[itype] > hist->GetBinCenter(bin)&& bin != hist->GetNbinsX()) || bin == 1)
nextbin++;
else
nextbin--;
Double_t dx = hist->GetBinCenter(bin) - hist->GetBinCenter(nextbin);
Double_t dy = hist->GetBinContent(bin) - hist->GetBinContent(nextbin);
Double_t like = hist->GetBinContent(bin) + (x[itype] - hist->GetBinCenter(bin)) * dy/dx;
p = TMath::Max( like, fEpsilon );
}
if (itype == 0) ps *= p;
else pb *= p;
}
}
Double_t myMVA = 0;
if (fTransformLikelihoodOutput) {
Double_t r = ps/(pb+ps);
if (r <= 0.0) r = fEpsilon;
else if (r >= 1.0) r = 1.0 - fEpsilon;
Double_t tau = 15.0;
myMVA = - log(1.0/r - 1.0)/tau;
}
else myMVA = ps/(pb+ps);
return myMVA;
}
void TMVA::MethodLikelihood::WriteWeightsToStream( ostream& o ) const
{
TString fname = GetWeightFileName() + ".root";
fLogger << kINFO << "creating weight file: " << fname << Endl;
TFile *fout = new TFile( fname, "RECREATE" );
o << "# weights stored in root i/o file: " << fname << endl;
TList lvar;
TVectorD vmin( GetNvar() ), vmax( GetNvar() );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
lvar.Add( new TNamed( (*fInputVars)[ivar], TString() ) );
vmin[ivar] = this->GetXmin( ivar );
vmax[ivar] = this->GetXmax( ivar );
}
lvar.Write();
vmin.Write( "vmin" );
vmax.Write( "vmax" );
lvar.Delete();
TVectorD likelOptions( 4 );
likelOptions(0) = (Double_t)fSmoothMethod;
likelOptions(1) = (Double_t)fNsmooth;
likelOptions(2) = (Double_t)fAverageEvtPerBin;
likelOptions(3) = (Double_t) (GetPreprocessingMethod() == Types::kNone ? 0. : 1.);
likelOptions.Write( "LikelihoodOptions" );
for(Int_t ivar=0; ivar<GetNvar(); ivar++){
(*fPDFSig)[ivar]->GetPDFHist()->Write();
(*fPDFBgd)[ivar]->GetPDFHist()->Write();
}
fout->Close();
delete fout;
}
void TMVA::MethodLikelihood::ReadWeightsFromStream( istream& istr )
{
if (istr.eof());
TString fname = GetWeightFileName();
if (!fname.EndsWith( ".root" )) fname += ".root";
fLogger << kINFO << "reading weight file: " << fname << Endl;
fFin = new TFile(fname);
TList lvar;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
TNamed t;
t.Read( (*fInputVars)[ivar] );
if (t.GetName() != (*fInputVars)[ivar]) {
fLogger << kFATAL << "error while reading weight file; "
<< "unknown variable: " << t.GetName() << " at position: " << ivar << ". "
<< "Expected variable: " << (*fInputVars)[ivar] << Endl;
}
}
TVectorD vmin( GetNvar() ), vmax( GetNvar() );
TVectorD *tmp = (TVectorD*)fFin->Get( "vmin" );
vmin = *tmp;
tmp = (TVectorD*)fFin->Get( "vmax" );
vmax = *tmp;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Data().SetXmin( ivar, vmin[ivar] );
Data().SetXmax( ivar, vmax[ivar] );
}
fSigPDFHist = new TList();
fBgdPDFHist = new TList();
TIter next(fFin->GetListOfKeys());
TKey *key;
while ((key = (TKey*)next())) {
TClass *cl = gROOT->GetClass(key->GetClassName());
if (!cl->InheritsFrom("TH1F")) continue;
TH1F *h = (TH1F*)key->ReadObj();
TString hname= h->GetName();
if (hname.Contains("_sig_")) fSigPDFHist->Add(h);
else if (hname.Contains("_bgd_")) fBgdPDFHist->Add(h);
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
if (hname.Contains( (*fInputVars)[ivar] )) {
if (hname.Contains("_sig_")) (*fIndexSig)[ivar] = fSigPDFHist->GetSize()-1;
else if (hname.Contains("_bgd_")) (*fIndexBgd)[ivar] = fBgdPDFHist->GetSize()-1;
}
}
}
}
void TMVA::MethodLikelihood::WriteMonitoringHistosToFile( void ) const
{
fLogger << kINFO << "write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
BaseDir()->cd();
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*fHistSig)[ivar]->Write();
(*fHistBgd)[ivar]->Write();
(*fHistSig_smooth)[ivar]->Write();
(*fHistBgd_smooth)[ivar]->Write();
(*fPDFSig)[ivar]->GetPDFHist()->Write();
(*fPDFBgd)[ivar]->GetPDFHist()->Write();
Float_t xmin=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmin();
Float_t xmax=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmax();
TH1F* mm = new TH1F( (*fInputVars)[ivar]+"_additional_check",
(*fInputVars)[ivar]+"_additional_check", 15000, xmin, xmax );
Double_t intBin = (xmax-xmin)/15000;
for (Int_t bin=0; bin < 15000; bin++) {
Double_t x = (bin + 0.5)*intBin + xmin;
mm->SetBinContent(bin+1 ,(*fPDFSig)[ivar]->GetVal(x));
}
mm->Write();
}
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.