// Begin_Html
/*
This is a generalization of the above Likelihood methods to <i>N</i><sub>var</sub>
dimensions, where <i>N</i><sub>var</sub> is the number of input variables
used in the MVA. If the multi-dimensional probability density functions
(PDFs) for signal and background were known, this method contains the entire
physical information, and is therefore optimal. Usually, kernel estimation
methods are used to approximate the PDFs using the events from the
training sample. <br><p></p>
A very simple probability density estimator (PDE) has been suggested
in <a href="http://arxiv.org/abs/hep-ex/0211019">hep-ex/0211019</a>. The
PDE for a given test event is obtained from counting the (normalized)
number of signal and background (training) events that occur in the
"vicinity" of the test event. The volume that describes "vicinity" is
user-defined. A <a href="http://arxiv.org/abs/hep-ex/0211019">search
method based on binary-trees</a> is used to effectively reduce the
selection time for the range search. Three different volume definitions
are optional: <br><p></p>
<ul>
<li><u>MinMax:</u>
the volume is defined in each dimension with respect
to the full variable range found in the training sample. </li>
<li><u>RMS:</u>
the volume is defined in each dimensions with respect
to the RMS estimated from the training sample. </li>
<li><u>Adaptive:</u>
a volume element is defined in each dimensions with
respect to the RMS estimated from the training sample. The overall
scale of the volume element is then determined for each event so
that the total number of events confined in the volume be within
a user-defined range.</li>
</ul><p></p>
The adaptive range search is used by default.
// End_Html
*/
#include "TMVA/MethodPDERS.h"
#include "TMVA/Tools.h"
#include "TMVA/RootFinder.h"
#include "TFile.h"
#include "TObjString.h"
#include "TMath.h"
#include <stdexcept>
namespace TMVA {
const Bool_t MethodPDERS_UseFindRoot =kTRUE;
const Bool_t MethodPDERS_UseKernelEstimate=kFALSE;
}
using std::vector;
ClassImp(TMVA::MethodPDERS)
TMVA::MethodPDERS::MethodPDERS( TString jobName, vector<TString>* theVariables,
TTree* theTree, TString theOption, TDirectory* theTargetDir )
: TMVA::MethodBase( jobName, theVariables, theTree, theOption, theTargetDir )
{
InitPDERS();
TList* list = TMVA::Tools::ParseFormatLine( fOptions );
if (list->GetSize() > 0) {
TString s = ((TObjString*)list->At(0))->GetString();
s.ToLower();
if (s.Contains("minmax") ) fVRangeMode = TMVA::MethodPDERS::kMinMax;
else if (s.Contains("rms") ) fVRangeMode = TMVA::MethodPDERS::kRMS;
else if (s.Contains("adaptive")) fVRangeMode = TMVA::MethodPDERS::kAdaptive;
else {
cout << "--- " << GetName() << ": Fatal error unknown vRangeType type: "
<< s << " in first option" << endl;
throw std::invalid_argument( "Abort" );
}
}
if (list->GetSize() > 1 && (fVRangeMode == kMinMax || fVRangeMode == kRMS)) {
TString s = ((TObjString*)list->At(0))->GetString();
fDeltaFrac = atof( ((TObjString*)list->At(1))->GetString() );
}
else if (fVRangeMode == kAdaptive) {
if (list->GetSize() > 1) fNEventsMin = atoi( ((TObjString*)list->At(1))->GetString() );
if (list->GetSize() > 2) fNEventsMax = atoi( ((TObjString*)list->At(2))->GetString() );
if (list->GetSize() > 3) fMaxVIterations = atoi( ((TObjString*)list->At(3))->GetString() );
if (list->GetSize() > 4) fInitialScale = atof( ((TObjString*)list->At(4))->GetString() );
}
if (Verbose()) {
cout << "--- " << GetName()
<< " <verbose>: interpreted option string: vRangeMethod: '"
<< (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
(fVRangeMode == kRMS ) ? "RMS" : "Adaptive")
<< "'" << endl;
if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
cout << "--- " << GetName() << ": deltaFrac: " << fDeltaFrac << endl;
else
cout << "--- " << GetName()
<< " <verbose>: nEventsMin/Max, maxVIterations, initialScale: "
<< fNEventsMin << " " << fNEventsMax
<< " " << fMaxVIterations << " " << fInitialScale << endl;
}
}
TMVA::MethodPDERS::MethodPDERS( vector<TString> *theVariables,
TString theWeightFile,
TDirectory* theTargetDir )
: TMVA::MethodBase( theVariables, theWeightFile, theTargetDir )
{
InitPDERS();
}
void TMVA::MethodPDERS::InitPDERS( void )
{
fMethodName = "PDERS";
fMethod = TMVA::Types::PDERS;
fTestvar = fTestvarPrefix+GetMethodName();
fFin = NULL;
fBinaryTreeS = fBinaryTreeB = NULL;
fgThisPDERS = this;
fDeltaFrac = 3.0;
fVRangeMode = kAdaptive;
fNEventsMin = 100;
fNEventsMax = 200;
fMaxVIterations = 50;
fInitialScale = 0.99;
fInitializedVolumeEle = kFALSE;
}
TMVA::MethodPDERS::~MethodPDERS( void )
{
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
if (NULL != fFin) { fFin->Close(); fFin->Delete(); }
}
void TMVA::MethodPDERS::Train( void )
{
if (!CheckSanity()) {
cout << "--- " << GetName() << ": Error: sanity check failed" << endl;
throw std::invalid_argument( "Abort" );
}
WriteWeightsToFile();
}
Double_t TMVA::MethodPDERS::GetMvaValue( TMVA::Event *e )
{
if (fInitializedVolumeEle == kFALSE) {
fInitializedVolumeEle = kTRUE;
SetVolumeElement();
Int_t nS = 0, nB = 0;
fBinaryTreeS = new TMVA::BinarySearchTree();
nS = fBinaryTreeS->Fill( fTrainingTree, fInputVars, 1 );
fBinaryTreeB = new TMVA::BinarySearchTree();
nB = fBinaryTreeB->Fill( fTrainingTree, fInputVars, 0 );
if (NULL == fBinaryTreeS || NULL == fBinaryTreeB) {
cout << "--- " << GetName() << ": Error in ::Train: Create(BinaryTree) returned zero "
<< "binaryTree pointer(s): " << fBinaryTreeS << " " << fBinaryTreeB << endl;
throw std::invalid_argument( "Abort" );
}
fScaleS = 1.0/Float_t(nS);
fScaleB = 1.0/Float_t(nB);
if (Verbose()) cout << "--- " << GetName() << " <verbose>: signal and background scales: "
<< fScaleS << " " << fScaleB << endl;
}
return this->RScalc( e );
}
void TMVA::MethodPDERS::SetVolumeElement( void )
{
fDelta = (fNvar > 0) ? new vector<Float_t>( fNvar ) : 0;
fShift = (fNvar > 0) ? new vector<Float_t>( fNvar ) : 0;
if (fDelta != 0) {
for (Int_t ivar=0; ivar<fNvar; ivar++) {
switch (fVRangeMode) {
case kRMS:
case kAdaptive:
Double_t meanS, meanB, rmsS, rmsB, xmin, xmax;
TMVA::Tools::ComputeStat( fTrainingTree, (*fInputVars)[ivar],
meanS, meanB, rmsS, rmsB, xmin, xmax );
(*fDelta)[ivar] = (rmsS + rmsB)*0.5*fDeltaFrac;
if (Verbose())
cout << "--- " << GetName() << " <verbose>: delta of var[" << (*fInputVars)[ivar]
<< "\t]: " << (rmsS + rmsB)*0.5
<< "\t | comp with d|norm|: " << (GetXmaxNorm( ivar ) - GetXminNorm( ivar ))
<< endl;
break;
case kMinMax:
(*fDelta)[ivar] = (GetXmaxNorm( ivar ) - GetXminNorm( ivar ))*fDeltaFrac;
break;
default:
cout << "--- " << GetName()
<< ": Error in ::SetVolumeElement: unknown range-set mode: "
<< fVRangeMode << endl;
throw std::invalid_argument( "Abort" );
}
(*fShift)[ivar] = 0.5;
}
}
else {
cout << "--- " << GetName() << ": Error: fNvar <= 0: " << fNvar << endl;
throw std::invalid_argument("");
}
}
TMVA::MethodPDERS* TMVA::MethodPDERS::fgThisPDERS = NULL;
Double_t TMVA::MethodPDERS::IGetVolumeContentForRoot( Double_t scale )
{
return ThisPDERS()->GetVolumeContentForRoot( scale );
}
Double_t TMVA::MethodPDERS::GetVolumeContentForRoot( Double_t scale )
{
Volume v( *fHelpVolume );
v.ScaleInterval( scale );
Double_t cS = GetBinaryTreeSig()->SearchVolume( &v );
Double_t cB = GetBinaryTreeBkg()->SearchVolume( &v );
v.Delete();
return cS + cB;
}
Float_t TMVA::MethodPDERS::RScalc( TMVA::Event *e )
{
vector<Double_t> *lb = new vector<Double_t>( fNvar );
for (Int_t ivar=0; ivar<fNvar; ivar++)
(*lb)[ivar] = e->GetData(ivar);
vector<Double_t> *ub = new vector<Double_t>( *lb );
for (Int_t ivar=0; ivar<fNvar; ivar++) {
(*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
(*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
}
TMVA::Volume* volume = new TMVA::Volume( lb, ub );
Float_t countS = 0;
Float_t countB = 0;
#ifdef TMVA_MethodPDERS__countByHand__Debug__
countS = fBinaryTreeS->SearchVolume( volume );
countB = fBinaryTreeB->SearchVolume( volume );
Int_t iS = 0, iB = 0;
for (Int_t ievt_=0; ievt_<fTrainingTree->GetEntries(); ievt_++) {
Bool_t inV;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
Float_t x = TMVA::Tools::GetValue( fTrainingTree, ievt_, (*fInputVars)[ivar] );
inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
if (!inV) break;
}
if (inV) {
if ((Int_t)TMVA::Tools::GetValue( fTrainingTree, ievt_, "type" ) == 1)
iS++;
else
iB++;
}
}
cout << "--- " << GetName() << ": debug: my test: S/B: "
<< iS << " " << iB << endl;
cout << "--- " << GetName() << ": debug: binTree: S/B: "
<< countS << " " << countB << endl << endl;
#endif
if (fVRangeMode == kAdaptive) {
if (TMVA::MethodPDERS_UseFindRoot) {
fHelpVolume = new TMVA::Volume( *volume );
TMVA::RootFinder rootFinder( &IGetVolumeContentForRoot, 0.01, 50, 50, 10 );
Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
TMVA::Volume v( *volume );
v.ScaleInterval( scale );
if (TMVA::MethodPDERS_UseKernelEstimate) {
std::vector<TMVA::Event*> eventsS;
std::vector<TMVA::Event*> eventsB;
fBinaryTreeS->SearchVolume( &v, &eventsS );
fBinaryTreeB->SearchVolume( &v, &eventsB );
countS = KernelEstimate( *e, eventsS, v );
countB = KernelEstimate( *e, eventsB, v );
}
else {
countS = fBinaryTreeS->SearchVolume( &v );
countB = fBinaryTreeB->SearchVolume( &v );
}
v.Delete();
fHelpVolume->Delete();
delete fHelpVolume; fHelpVolume = NULL;
}
else {
countS = fBinaryTreeS->SearchVolume( volume );
countB = fBinaryTreeB->SearchVolume( volume );
Float_t nEventsO = countS + countB;
Int_t i_=0;
while (nEventsO < fNEventsMin) {
volume->ScaleInterval( 1.15 );
countS = fBinaryTreeS->SearchVolume( volume );
countB = fBinaryTreeB->SearchVolume( volume );
nEventsO = countS + countB;
i_++;
}
if (i_ > 50) cout << "--- " << GetName() << ": Warning in event: " << e
<< ": adaptive volume pre-adjustment reached "
<< ">50 iterations in while loop (" << i_ << ")" << endl;
Float_t nEventsN = nEventsO;
Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
Float_t scaleO = 1.0;
Float_t scaleN = fInitialScale;
Float_t scale = scaleN;
Float_t cS = countS;
Float_t cB = countB;
for (Int_t ic=1; ic<fMaxVIterations; ic++) {
if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
TMVA::Volume* v = new TMVA::Volume( *volume );
v->ScaleInterval( scale );
cS = fBinaryTreeS->SearchVolume( v );
cB = fBinaryTreeB->SearchVolume( v );
nEventsN = cS + cB;
if (nEventsN > 1 && nEventsN - nEventsO != 0)
if (scaleN - scaleO != 0)
scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
else
scale += (-0.01);
else
scale += 0.5;
scaleN = scale;
if (TMath::Abs(cS + cB - nEventsE) < TMath::Abs(countS + countB - nEventsE) &&
(cS + cB >= fNEventsMin || countS + countB < cS + cB)) {
countS = cS; countB = cB;
}
v->Delete();
delete v;
}
else break;
}
nEventsN = countS + countB;
if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
cout << "--- " << GetName() << ": Warning in event " << e
<< ": adaptive volume adjustment reached "
<< "max. #iterations (" << fMaxVIterations << ")"
<< "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
<< endl;
}
}
volume->Delete();
delete volume;
if (countS < 1e-20 && countB < 1e-20) return 0.5;
if (countB < 1e-20) return 1.0;
if (countS < 1e-20) return 0.0;
Float_t r = countB*fScaleB/(countS*fScaleS);
return 1.0/(r + 1.0);
}
Double_t TMVA::MethodPDERS::KernelEstimate( TMVA::Event& event,
vector<TMVA::Event*>& events, TMVA::Volume& v )
{
Double_t fac = 0.2;
Double_t *sigma = new Double_t[fNvar];
for (Int_t ivar=0; ivar<fNvar; ivar++)
sigma[ivar] = ((*v.fUpper)[ivar] - (*v.fLower)[ivar])*fac;
Double_t pdfSum = 0;
for (vector<TMVA::Event*>::iterator iev = events.begin(); iev != events.end(); iev++) {
Double_t pdf = 1;
for (Int_t ivar=0; ivar<fNvar; ivar++)
pdf *= TMath::Gaus( event.GetData(ivar), (*iev)->GetData(ivar), sigma[ivar], kTRUE );
pdfSum += pdf;
}
delete [] sigma;
return pdfSum;
}
Float_t TMVA::MethodPDERS::GetError( Float_t countS, Float_t countB,
Float_t sumW2S, Float_t sumW2B ) const
{
Float_t c = fScaleB/fScaleS;
Float_t d = countS + c*countB; d *= d;
if (d < 1e-10) return 1;
Float_t f = c*c/d/d;
Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
if (err < 1e-10) return 1;
return sqrt(err);
}
void TMVA::MethodPDERS::WriteWeightsToFile( void )
{
TString fname = GetWeightFileName() + ".root";
cout << "--- " << GetName() << ": creating weight file: " << fname << endl;
TFile *fout = new TFile( fname, "RECREATE" );
TList lvar;
TVectorD vmin( fNvar ), vmax( fNvar );
for (Int_t ivar=0; ivar<fNvar; ivar++) {
lvar.Add( new TNamed( (*fInputVars)[ivar], TString() ) );
vmin[ivar] = this->GetXminNorm( ivar );
vmax[ivar] = this->GetXmaxNorm( ivar );
}
lvar.Write();
vmin.Write( "vmin" );
vmax.Write( "vmax" );
lvar.Delete();
TVectorD pdersOptions( 6 );
pdersOptions(0) = (Double_t)fVRangeMode;
pdersOptions(1) = (Double_t)fDeltaFrac;
pdersOptions(2) = (Double_t)fNEventsMin;
pdersOptions(3) = (Double_t)fNEventsMax;
pdersOptions(4) = (Double_t)fMaxVIterations;
pdersOptions(5) = (Double_t)fInitialScale;
pdersOptions.Write( "PdersOptions" );
TObjArrayIter branchIter( fTrainingTree->GetListOfBranches(), kIterForward );
TBranch* branch = NULL;
const int nBranches = fTrainingTree->GetListOfBranches()->GetSize();
Float_t ** branchVar = new Float_t*[nBranches];
Int_t theType, jvar = -1;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
if ((TString)branch->GetName() == "type")
fTrainingTree->SetBranchAddress( branch->GetName(), &theType );
else
fTrainingTree->SetBranchAddress( branch->GetName(), &branchVar[++jvar] );
}
fTrainingTree->SetBranchStatus( "*", 1 );
TTree *trainingTreeClone = fTrainingTree->CloneTree();
trainingTreeClone->Write( "trainingTree" );
fout->Close();
delete[] branchVar;
delete fout;
}
void TMVA::MethodPDERS::ReadWeightsFromFile( void )
{
TString fname = GetWeightFileName();
if (!fname.EndsWith( ".root" )) fname += ".root";
cout << "--- " << GetName() << ": reading weight file: " << fname << endl;
fFin = new TFile( fname );
TList lvar;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
TNamed t;
t.Read( (*fInputVars)[ivar] );
if (t.GetName() != (*fInputVars)[ivar]) {
cout << "--- " << GetName() << ": Error while reading weight file; "
<< "unknown variable: " << t.GetName() << " at position: " << ivar << ". "
<< "Expected variable: " << (*fInputVars)[ivar] << endl;
throw std::invalid_argument( "Abort" );
}
}
TVectorD vmin( fNvar ), vmax( fNvar );
TVectorD *tmp = (TVectorD*)fFin->Get( "vmin" );
vmin = *tmp;
tmp = (TVectorD*)fFin->Get( "vmax" );
vmax = *tmp;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
this->SetXminNorm( ivar, vmin[ivar] );
this->SetXmaxNorm( ivar, vmax[ivar] );
}
TVectorD* pdersOptions = (TVectorD*)fFin->Get( "PdersOptions" );
fVRangeMode = (VolumeRangeMode)(Int_t)(*pdersOptions)(0);
fDeltaFrac = (*pdersOptions)(1);
fNEventsMin = (Int_t)(*pdersOptions)(2);
fNEventsMax = (Int_t)(*pdersOptions)(3);
fMaxVIterations = (Int_t)(*pdersOptions)(4);
fInitialScale = (*pdersOptions)(5);
fTrainingTree = (TTree*)fFin->Get( "trainingTree" );
if (NULL == fTrainingTree) {
cout << "--- " << GetName() << ": Error while reading 'trainingTree': zero pointer "
<< endl;
throw std::invalid_argument( "Abort" );
}
}
void TMVA::MethodPDERS::WriteHistosToFile( void )
{
cout << "--- " << GetName() << ": write " << GetName()
<<" special histos to file: " << fBaseDir->GetPath() << endl;
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.