doc/hackathon/MethodFDA_8cxx_source.html

// @(#)root/tmva $Id$

// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class  : MethodFDA                                                             *

 *                                             *

 *                                                                                *

 * Description:                                                                   *

 *      Implementation                                                            *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Andreas Hoecker  <Andreas.Hocker@cern.ch> - CERN, Switzerland             *

 *      Peter Speckmayer <speckmay@mail.cern.ch>  - CERN, Switzerland             *

 *      Joerg Stelzer    <stelzer@cern.ch>        - DESY, Germany                 *

 *      Maciej Kruk      <mkruk@cern.ch>          - IFJ PAN & AGH, Poland         *

 *                                                                                *

 * Copyright (c) 2005-2006:                                                       *

 *      CERN, Switzerland                                                         *

 *      MPI-K Heidelberg, Germany                                                 *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (see tmva/doc/LICENSE)                                          *

 **********************************************************************************/


/*! \class TMVA::MethodFDA

\ingroup TMVA


Function discriminant analysis (FDA).


This simple classifier

fits any user-defined TFormula (via option configuration string) to

the training data by requiring a formula response of 1 (0) to signal

(background) events. The parameter fitting is done via the abstract

class FitterBase, featuring Monte Carlo sampling, Genetic

Algorithm, Simulated Annealing, MINUIT and combinations of these.


Can compute regression value for one dimensional output

*/


#include "TMVA/MethodFDA.h"


#include "TMVA/ClassifierFactory.h"

#include "TMVA/Config.h"

#include "TMVA/Configurable.h"

#include "TMVA/DataSetInfo.h"

#include "TMVA/FitterBase.h"

#include "TMVA/GeneticFitter.h"

#include "TMVA/Interval.h"

#include "TMVA/IFitterTarget.h"

#include "TMVA/IMethod.h"

#include "TMVA/MCFitter.h"

#include "TMVA/MethodBase.h"

#include "TMVA/MinuitFitter.h"

#include "TMVA/MsgLogger.h"

#include "TMVA/Timer.h"

#include "TMVA/Tools.h"

#include "TMVA/TransformationHandler.h"

#include "TMVA/Types.h"

#include "TMVA/SimulatedAnnealingFitter.h"


#include "TList.h"

#include "TFormula.h"

#include "TString.h"

#include "TObjString.h"

#include "TRandom3.h"

#include "TMath.h"


#include <iostream>

#include <algorithm>

#include <iterator>

#include <stdexcept>

#include <sstream>


using std::stringstream;


REGISTER_METHOD(FDA)


////////////////////////////////////////////////////////////////////////////////

/// standard constructor


   TMVA::MethodFDA::MethodFDA( const TString& jobName,

                               const TString& methodTitle,

                               DataSetInfo& theData,

                               const TString& theOption)

   : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption),

   IFitterTarget   (),

   fFormula        ( 0 ),

   fNPars          ( 0 ),

   fFitter         ( 0 ),

   fConvergerFitter( 0 ),

   fSumOfWeightsSig( 0 ),

   fSumOfWeightsBkg( 0 ),

   fSumOfWeights   ( 0 ),

   fOutputDimensions( 0 )

{

}


////////////////////////////////////////////////////////////////////////////////

/// constructor from weight file


TMVA::MethodFDA::MethodFDA( DataSetInfo& theData,

                            const TString& theWeightFile)

   : MethodBase( Types::kFDA, theData, theWeightFile),

     IFitterTarget   (),

     fFormula        ( 0 ),

     fNPars          ( 0 ),

     fFitter         ( 0 ),

     fConvergerFitter( 0 ),

     fSumOfWeightsSig( 0 ),

     fSumOfWeightsBkg( 0 ),

     fSumOfWeights   ( 0 ),

     fOutputDimensions( 0 )

{

}


////////////////////////////////////////////////////////////////////////////////

/// default initialisation


void TMVA::MethodFDA::Init( void )

{

   fNPars    = 0;


   fBestPars.clear();


   fSumOfWeights    = 0;

   fSumOfWeightsSig = 0;

   fSumOfWeightsBkg = 0;


   fFormulaStringP  = "";

   fParRangeStringP = "";

   fFormulaStringT  = "";

   fParRangeStringT = "";


   fFitMethod       = "";

   fConverger       = "";


   if( DoMulticlass() )

      if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();


}


////////////////////////////////////////////////////////////////////////////////

/// define the options (their key words) that can be set in the option string

///

/// format of function string:

///

///      "x0*(0)+((1)/x1)**(2)..."

///

/// where "[i]" are the parameters, and "xi" the input variables

///

/// format of parameter string:

///

///      "(-1.2,3.4);(-2.3,4.55);..."

///

/// where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;

/// each parameter defined in the function string must have a corresponding range


void TMVA::MethodFDA::DeclareOptions()

{

   DeclareOptionRef( fFormulaStringP  = "(0)", "Formula",   "The discrimination formula" );

   DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );


   // fitter

   DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");

   AddPreDefVal(TString("MC"));

   AddPreDefVal(TString("GA"));

   AddPreDefVal(TString("SA"));

   AddPreDefVal(TString("MINUIT"));


   DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");

   AddPreDefVal(TString("None"));

   AddPreDefVal(TString("MINUIT"));

}


////////////////////////////////////////////////////////////////////////////////

/// translate formula string into TFormula, and parameter string into par ranges


void TMVA::MethodFDA::CreateFormula()

{

   // process transient strings

   fFormulaStringT  = fFormulaStringP;


   // interpret formula string


   // replace the parameters "(i)" by the TFormula style "[i]"

   for (UInt_t ipar=0; ipar<fNPars; ipar++) {

      fFormulaStringT.ReplaceAll( TString::Format("(%i)",ipar), TString::Format("[%i]",ipar) );

   }


   // sanity check, there should be no "(i)", with 'i' a number anymore

   for (Int_t ipar=fNPars; ipar<1000; ipar++) {

      if (fFormulaStringT.Contains( TString::Format("(%i)",ipar) ))

         Log() << kFATAL

               << "<CreateFormula> Formula contains expression: \"" << TString::Format("(%i)",ipar) << "\", "

               << "which cannot be attributed to a parameter; "

               << "it may be that the number of variable ranges given via \"ParRanges\" "

               << "does not match the number of parameters in the formula expression, please verify!"

               << Endl;

   }


   // write the variables "xi" as additional parameters "[npar+i]"

   for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {

      fFormulaStringT.ReplaceAll( TString::Format("x%i",ivar), TString::Format("[%i]",ivar+fNPars) );

   }


   // sanity check, there should be no "xi", with 'i' a number anymore

   for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {

      if (fFormulaStringT.Contains( TString::Format("x%i",ivar) ))

         Log() << kFATAL

               << "<CreateFormula> Formula contains expression: \"" << TString::Format("x%i",ivar) << "\", "

               << "which cannot be attributed to an input variable" << Endl;

   }


   Log() << "User-defined formula string       : \"" << fFormulaStringP << "\"" << Endl;

   Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;

   Log() << kDEBUG << "Creating and compiling formula" << Endl;


   // create TF1

   if (fFormula) delete fFormula;

   fFormula = new TFormula( "FDA_Formula", fFormulaStringT );


   // is formula correct ?

   if (!fFormula->IsValid())

      Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;


   // other sanity checks

   if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))

      Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "

            << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;

}


////////////////////////////////////////////////////////////////////////////////

/// the option string is decoded, for available options see "DeclareOptions"


void TMVA::MethodFDA::ProcessOptions()

{

   // process transient strings

   fParRangeStringT = fParRangeStringP;


   // interpret parameter string

   fParRangeStringT.ReplaceAll( " ", "" );

   fNPars = fParRangeStringT.CountChar( ')' );


   TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );

   if ((UInt_t)parList->GetSize() != fNPars) {

      Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "

            << "the number of parameters: " << fNPars << " != ranges defined: "

            << parList->GetSize() << "; the format of the \"ParRanges\" string "

            << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "

            << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "

            << "each parameter defined in the function string must have a corresponding rang."

            << Endl;

   }


   fParRange.resize( fNPars );

   for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;


   for (UInt_t ipar=0; ipar<fNPars; ipar++) {

      // parse (a,b)

      TString str = ((TObjString*)parList->At(ipar))->GetString();

      Ssiz_t istr = str.First( ',' );

      TString pminS(str(1,istr-1));

      TString pmaxS(str(istr+1,str.Length()-2-istr));


      stringstream stmin; Float_t pmin=0; stmin << pminS.Data(); stmin >> pmin;

      stringstream stmax; Float_t pmax=0; stmax << pmaxS.Data(); stmax >> pmax;


      // sanity check

      if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;

      if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["

                             << ipar << "] : [" << pmin  << ", " << pmax << "] " << Endl;


      Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;

      fParRange[ipar] = new Interval( pmin, pmax );

   }

   delete parList;


   // create formula

   CreateFormula();


   // copy parameter ranges for each output dimension ==================

   fOutputDimensions = 1;

   if( DoRegression() )

      fOutputDimensions = DataInfo().GetNTargets();

   if( DoMulticlass() )

      fOutputDimensions = DataInfo().GetNClasses();


   for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){

      for( UInt_t par = 0; par < fNPars; ++par ){

         fParRange.push_back( fParRange.at(par) );

      }

   }

   // ====================


   // create minimiser

   fConvergerFitter = (IFitterTarget*)this;

   if (fConverger == "MINUIT") {

      fConvergerFitter = new MinuitFitter( *this, TString::Format("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );

      SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());

   }


   if(fFitMethod == "MC")

      fFitter = new MCFitter( *fConvergerFitter, TString::Format("%s_Fitter_MC", GetName()), fParRange, GetOptions() );

   else if (fFitMethod == "GA")

      fFitter = new GeneticFitter( *fConvergerFitter, TString::Format("%s_Fitter_GA", GetName()), fParRange, GetOptions() );

   else if (fFitMethod == "SA")

      fFitter = new SimulatedAnnealingFitter( *fConvergerFitter, TString::Format("%s_Fitter_SA", GetName()), fParRange, GetOptions() );

   else if (fFitMethod == "MINUIT")

      fFitter = new MinuitFitter( *fConvergerFitter, TString::Format("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );

   else {

      Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;

   }


   fFitter->CheckForUnusedOptions();

}


////////////////////////////////////////////////////////////////////////////////

/// destructor


TMVA::MethodFDA::~MethodFDA( void )

{

   ClearAll();

}


////////////////////////////////////////////////////////////////////////////////

/// FDA can handle classification with 2 classes and regression with one regression-target


Bool_t TMVA::MethodFDA::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )

{

   if (type == Types::kClassification && numberClasses == 2) return kTRUE;

   if (type == Types::kMulticlass ) return kTRUE;

   if (type == Types::kRegression ) return kTRUE;

   return kFALSE;

}


////////////////////////////////////////////////////////////////////////////////

/// delete and clear all class members


void TMVA::MethodFDA::ClearAll( void )

{

   // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).

   // hence, ... erase the copied pointers to assure, that they are deleted only once.

   //   fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );

   for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {

      if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }

   }

   fParRange.clear();


   if (fFormula  != 0) { delete fFormula; fFormula = 0; }

   fBestPars.clear();

}


////////////////////////////////////////////////////////////////////////////////

/// FDA training


void TMVA::MethodFDA::Train( void )

{

   // cache training events

   fSumOfWeights    = 0;

   fSumOfWeightsSig = 0;

   fSumOfWeightsBkg = 0;


   for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {


      // read the training event

      const Event* ev = GetEvent(ievt);


      // true event copy

      Float_t w  = ev->GetWeight();


      if (!DoRegression()) {

         if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }

         else                { fSumOfWeightsBkg += w; }

      }

      fSumOfWeights += w;

   }


   // sanity check

   if (!DoRegression()) {

      if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {

         Log() << kFATAL << "<Train> Troubles in sum of weights: "

               << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;

      }

   }

   else if (fSumOfWeights <= 0) {

      Log() << kFATAL << "<Train> Troubles in sum of weights: "

            << fSumOfWeights << Endl;

   }


   // starting values (not used by all fitters)

   fBestPars.clear();

   for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); ++parIt) {

      fBestPars.push_back( (*parIt)->GetMean() );

   }


   // execute the fit

   Double_t estimator = fFitter->Run( fBestPars );


   // print results

   PrintResults( fFitMethod, fBestPars, estimator );


   delete fFitter; fFitter = 0;

   if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {

      delete fConvergerFitter;

      fConvergerFitter = 0;

   }

   ExitFromTraining();

}


////////////////////////////////////////////////////////////////////////////////

/// display fit parameters

/// check maximum length of variable name


void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const

{

   Log() << kINFO;

   Log() << kHEADER << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;

   std::vector<TString>  parNames;

   for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( TString::Format("Par(%i)",ipar ) );

   gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );

   Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;

   Log() << "Value of estimator at minimum: " << estimator << Endl;

}


////////////////////////////////////////////////////////////////////////////////

/// compute estimator for given parameter set (to be minimised)


Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )

{

   const Double_t sumOfWeights[]                = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };

   Double_t estimator[]                         = { 0, 0, 0 };


   Double_t result, deviation;

   Double_t desired = 0.0;


   // calculate the deviation from the desired value

   if( DoRegression() ){

      for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {

         // read the training event

         const TMVA::Event* ev = GetEvent(ievt);


         for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){

            desired = ev->GetTarget( dim );

            result    = InterpretFormula( ev, pars.begin(), pars.end() );

            deviation = TMath::Power(result - desired, 2);

            estimator[2]  += deviation * ev->GetWeight();

         }

      }

      estimator[2] /= sumOfWeights[2];

      // return value is sum over normalised signal and background contributions

      return estimator[2];


   }else if( DoMulticlass() ){

      for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {

         // read the training event

         const TMVA::Event* ev = GetEvent(ievt);


         CalculateMulticlassValues( ev, pars, *fMulticlassReturnVal );


         Double_t crossEntropy = 0.0;

         for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){

            Double_t y = fMulticlassReturnVal->at(dim);

            Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );

            crossEntropy += t*log(y);

         }

         estimator[2] += ev->GetWeight()*crossEntropy;

      }

      estimator[2] /= sumOfWeights[2];

      // return value is sum over normalised signal and background contributions

      return estimator[2];


   }else{

      for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {

         // read the training event

         const TMVA::Event* ev = GetEvent(ievt);


         desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);

         result    = InterpretFormula( ev, pars.begin(), pars.end() );

         deviation = TMath::Power(result - desired, 2);

         estimator[Int_t(desired)] += deviation * ev->GetWeight();

      }

      estimator[0] /= sumOfWeights[0];

      estimator[1] /= sumOfWeights[1];

      // return value is sum over normalised signal and background contributions

      return estimator[0] + estimator[1];

   }

}


////////////////////////////////////////////////////////////////////////////////

/// formula interpretation


Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )

{

   Int_t ipar = 0;

   //    std::cout << "pars ";

   for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){

      //       std::cout << " i" << ipar << " val" << (*it);

      fFormula->SetParameter( ipar, (*it) );

      ++ipar;

   }

   for (UInt_t ivar=0;  ivar<GetNvar();  ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );


   Double_t result = fFormula->Eval( 0 );

   //    std::cout << "  result " << result << std::endl;

   return result;

}


////////////////////////////////////////////////////////////////////////////////

/// returns MVA value for given event


Double_t TMVA::MethodFDA::GetMvaValue( Double_t* err, Double_t* errUpper )

{

   const Event* ev = GetEvent();


   // cannot determine error

   NoErrorCalc(err, errUpper);


   return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );

}


////////////////////////////////////////////////////////////////////////////////


const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()

{

   if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();

   fRegressionReturnVal->clear();


   const Event* ev = GetEvent();


   Event* evT = new Event(*ev);


   for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){

      Int_t offset = dim*fNPars;

      evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) );

   }

   const Event* evT2 = GetTransformationHandler().InverseTransform( evT );

   fRegressionReturnVal->push_back(evT2->GetTarget(0));


   delete evT;


   return (*fRegressionReturnVal);

}


////////////////////////////////////////////////////////////////////////////////


const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()

{

   if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();

   fMulticlassReturnVal->clear();

   std::vector<Float_t> temp;


   // returns MVA value for given event

   const TMVA::Event* evt = GetEvent();


   CalculateMulticlassValues( evt, fBestPars, temp );


   UInt_t nClasses = DataInfo().GetNClasses();

   for(UInt_t iClass=0; iClass<nClasses; iClass++){

      Double_t norm = 0.0;

      for(UInt_t j=0;j<nClasses;j++){

         if(iClass!=j)

            norm+=exp(temp[j]-temp[iClass]);

      }

      (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));

   }


   return (*fMulticlassReturnVal);

}


////////////////////////////////////////////////////////////////////////////////

/// calculate the values for multiclass


void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)

{

   values.clear();


   //    std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );

   //    std::cout << std::endl;


   //    char inp;

   //    std::cin >> inp;


   for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)

      Int_t offset = dim*fNPars;

      Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );

      //       std::cout << "dim : " << dim << " value " << value << "    offset " << offset << std::endl;

      values.push_back( value );

   }

}


////////////////////////////////////////////////////////////////////////////////

/// read back the training results from a file (stream)


void  TMVA::MethodFDA::ReadWeightsFromStream( std::istream& istr )

{

   // retrieve best function parameters

   // coverity[tainted_data_argument]

   istr >> fNPars;


   fBestPars.clear();

   fBestPars.resize( fNPars );

   for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];

}


////////////////////////////////////////////////////////////////////////////////

/// create XML description for LD classification and regression

/// (for arbitrary number of output classes/targets)


void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const

{

   void* wght = gTools().AddChild(parent, "Weights");

   gTools().AddAttr( wght, "NPars",  fNPars );

   gTools().AddAttr( wght, "NDim",   fOutputDimensions );

   for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {

      void* coeffxml = gTools().AddChild( wght, "Parameter" );

      gTools().AddAttr( coeffxml, "Index", ipar   );

      gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );

   }


   // write formula

   gTools().AddAttr( wght, "Formula", fFormulaStringP );

}


////////////////////////////////////////////////////////////////////////////////

/// read coefficients from xml weight file


void TMVA::MethodFDA::ReadWeightsFromXML( void* wghtnode )

{

   gTools().ReadAttr( wghtnode, "NPars", fNPars );


   if(gTools().HasAttr( wghtnode, "NDim")) {

      gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );

   } else {

      // older weight files don't have this attribute

      fOutputDimensions = 1;

   }


   fBestPars.clear();

   fBestPars.resize( fNPars*fOutputDimensions );


   void* ch = gTools().GetChild(wghtnode);

   Double_t par;

   UInt_t    ipar;

   while (ch) {

      gTools().ReadAttr( ch, "Index", ipar );

      gTools().ReadAttr( ch, "Value", par  );


      // sanity check

      if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "

                                                  << ipar << " >= " << fNPars << Endl;

      fBestPars[ipar] = par;


      ch = gTools().GetNextChild(ch);

   }


   // read formula

   gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );


   // create the TFormula

   CreateFormula();

}


////////////////////////////////////////////////////////////////////////////////

/// write FDA-specific classifier response


void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const

{

   fout << "   double              fParameter[" << fNPars << "];" << std::endl;

   fout << "};" << std::endl;

   fout << "" << std::endl;

   fout << "inline void " << className << "::Initialize() " << std::endl;

   fout << "{" << std::endl;

   for(UInt_t ipar=0; ipar<fNPars; ipar++) {

      fout << "   fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << std::endl;

   }

   fout << "}" << std::endl;

   fout << std::endl;

   fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;

   fout << "{" << std::endl;

   fout << "   // interpret the formula" << std::endl;


   // replace parameters

   TString str = fFormulaStringT;

   for (UInt_t ipar=0; ipar<fNPars; ipar++) {

      str.ReplaceAll( TString::Format("[%i]", ipar), TString::Format("fParameter[%i]", ipar) );

   }


   // replace input variables

   for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {

      str.ReplaceAll( TString::Format("[%i]", ivar+fNPars), TString::Format("inputValues[%i]", ivar) );

   }


   fout << "   double retval = " << str << ";" << std::endl;

   fout << std::endl;

   fout << "   return retval; " << std::endl;

   fout << "}" << std::endl;

   fout << std::endl;

   fout << "// Clean up" << std::endl;

   fout << "inline void " << className << "::Clear() " << std::endl;

   fout << "{" << std::endl;

   fout << "   // nothing to clear" << std::endl;

   fout << "}" << std::endl;

}


////////////////////////////////////////////////////////////////////////////////

/// get help message text

///

/// typical length of text line:

///         "|--------------------------------------------------------------|"


void TMVA::MethodFDA::GetHelpMessage() const

{

   Log() << Endl;

   Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;

   Log() << Endl;

   Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;

   Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;

   Log() << Endl;

   Log() << "The user provides the desired function with adjustable parameters" << Endl;

   Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;

   Log() << "it, requiring the signal (background) function value to be as close" << Endl;

   Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;

   Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;

   Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;

   Log() << "underperform for involved problems with complicated, phase space" << Endl;

   Log() << "dependent nonlinear correlations." << Endl;

   Log() << Endl;

   Log() << "Please consult the Users Guide for the format of the formula string" << Endl;

   Log() << "and the allowed parameter ranges:" << Endl;

   if (gConfig().WriteOptionsReference()) {

      Log() << "<a href=\"https://github.com/root-project/root/blob/master/documentation/tmva/UsersGuide/TMVAUsersGuide.pdf\">"

            << "TMVAUsersGuide.pdf</a>" << Endl;

   }

   else Log() << "documentation/tmva/UsersGuide/TMVAUsersGuide.pdf" << Endl;

   Log() << Endl;

   Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;

   Log() << Endl;

   Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;

   Log() << "user-defined discriminator function. As a general rule, it should" << Endl;

   Log() << "be able to reproduce the discrimination power of any linear" << Endl;

   Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;

   Log() << "useful to inspect the correlation profiles of the input variables," << Endl;

   Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;

   Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;

   Log() << "be used as a guide." << Endl;

   Log() << Endl;

   Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;

   Log() << Endl;

   Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;

   Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;

   Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;

   Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;

   Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;

   Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;

   Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;

   Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;

   Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;

   Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;

   Log() << "minima." << Endl;

}


ClassifierFactory.h

REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition ClassifierFactory.h:124

Configurable.h

DataSetInfo.h

FitterBase.h

GeneticFitter.h

IFitterTarget.h

IMethod.h

Interval.h

MCFitter.h

MethodBase.h

MethodFDA.h

MinuitFitter.h

MsgLogger.h

Int_t
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59

Ssiz_t
int Ssiz_t
String size (currently int).
Definition RtypesCore.h:81

UInt_t
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60

Bool_t
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77

kFALSE
constexpr Bool_t kFALSE
Definition RtypesCore.h:108

Double_t
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73

Float_t
float Float_t
Float 4 bytes (float).
Definition RtypesCore.h:71

kTRUE
constexpr Bool_t kTRUE
Definition RtypesCore.h:107

SimulatedAnnealingFitter.h

TFormula.h

TList.h

TMath.h

err
Double_t err
Definition TMultiDimFit.cxx:1939

TObjString.h

TRandom3.h

TString.h

Timer.h

Tools.h

TransformationHandler.h

TCollection::GetSize
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition TCollection.h:186

TFormula
The Formula class.
Definition TFormula.h:89

TList
A doubly linked list.
Definition TList.h:38

TList::At
TObject * At(Int_t idx) const override
Returns the object at position idx. Returns 0 if idx is out of range.
Definition TList.cxx:487

TMVA::Configurable
Definition Configurable.h:45

TMVA::Configurable::DeclareOptionRef
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")

TMVA::Configurable::AddPreDefVal
void AddPreDefVal(const T &)
Definition Configurable.h:168

TMVA::Configurable::GetOptions
const TString & GetOptions() const
Definition Configurable.h:84

TMVA::Configurable::Log
MsgLogger & Log() const
Definition Configurable.h:122

TMVA::Configurable::SetOptions
void SetOptions(const TString &s)
Definition Configurable.h:85

TMVA::DataSetInfo
Class that contains all the data information.
Definition DataSetInfo.h:62

TMVA::Event
Definition Event.h:51

TMVA::Event::GetValue
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236

TMVA::Event::SetTarget
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367

TMVA::Event::GetWeight
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:389

TMVA::Event::GetClass
UInt_t GetClass() const
Definition Event.h:86

TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition Event.h:102

TMVA::GeneticFitter
Fitter using a Genetic Algorithm.
Definition GeneticFitter.h:44

TMVA::IFitterTarget::IFitterTarget
IFitterTarget()
constructor
Definition IFitterTarget.cxx:42

TMVA::Interval
The TMVA::Interval Class.
Definition Interval.h:61

TMVA::MCFitter
Fitter using Monte Carlo sampling of parameters.
Definition MCFitter.h:44

TMVA::MethodBase::MethodBase
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Definition MethodBase.cxx:236

TMVA::MethodBase::GetName
const char * GetName() const override
Definition MethodBase.h:337

TMVA::MethodBase::DoMulticlass
Bool_t DoMulticlass() const
Definition MethodBase.h:442

TMVA::MethodBase::ExitFromTraining
void ExitFromTraining()
Definition MethodBase.h:467

TMVA::MethodBase::GetNEvents
UInt_t GetNEvents() const
Definition MethodBase.h:419

TMVA::MethodBase::DoRegression
Bool_t DoRegression() const
Definition MethodBase.h:441

TMVA::MethodBase::fRegressionReturnVal
std::vector< Float_t > * fRegressionReturnVal
Definition MethodBase.h:600

TMVA::MethodBase::fMulticlassReturnVal
std::vector< Float_t > * fMulticlassReturnVal
Definition MethodBase.h:601

TMVA::MethodBase::GetEvent
const Event * GetEvent() const
Definition MethodBase.h:754

TMVA::MethodBase::DataInfo
DataSetInfo & DataInfo() const
Definition MethodBase.h:413

TMVA::MethodBase::GetNvar
UInt_t GetNvar() const
Definition MethodBase.h:347

TMVA::MethodBase::GetTransformationHandler
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition MethodBase.h:397

TMVA::MethodBase::NoErrorCalc
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition MethodBase.cxx:900

TMVA::MethodFDA::HasAnalysisType
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
FDA can handle classification with 2 classes and regression with one regression-target.
Definition MethodFDA.cxx:333

TMVA::MethodFDA::fFormulaStringT
TString fFormulaStringT
string with function
Definition MethodFDA.h:133

TMVA::MethodFDA::GetRegressionValues
const std::vector< Float_t > & GetRegressionValues() override
Definition MethodFDA.cxx:529

TMVA::MethodFDA::~MethodFDA
virtual ~MethodFDA(void)
destructor
Definition MethodFDA.cxx:325

TMVA::MethodFDA::InterpretFormula
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition MethodFDA.cxx:498

TMVA::MethodFDA::Train
void Train(void) override
FDA training.
Definition MethodFDA.cxx:362

TMVA::MethodFDA::CalculateMulticlassValues
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition MethodFDA.cxx:580

TMVA::MethodFDA::ReadWeightsFromStream
void ReadWeightsFromStream(std::istream &i) override
read back the training results from a file (stream)
Definition MethodFDA.cxx:601

TMVA::MethodFDA::fSumOfWeightsBkg
Double_t fSumOfWeightsBkg
sum of weights (background)
Definition MethodFDA.h:148

TMVA::MethodFDA::fOutputDimensions
Int_t fOutputDimensions
number of output values
Definition MethodFDA.h:152

TMVA::MethodFDA::MethodFDA
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition MethodFDA.cxx:85

TMVA::MethodFDA::ClearAll
void ClearAll()
delete and clear all class members
Definition MethodFDA.cxx:345

TMVA::MethodFDA::fParRange
std::vector< Interval * > fParRange
ranges of parameters
Definition MethodFDA.h:138

TMVA::MethodFDA::PrintResults
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition MethodFDA.cxx:420

TMVA::MethodFDA::GetMvaValue
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value for given event
Definition MethodFDA.cxx:517

TMVA::MethodFDA::fSumOfWeightsSig
Double_t fSumOfWeightsSig
sum of weights (signal)
Definition MethodFDA.h:147

TMVA::MethodFDA::fParRangeStringP
TString fParRangeStringP
string with ranges of parameters
Definition MethodFDA.h:132

TMVA::MethodFDA::MakeClassSpecific
void MakeClassSpecific(std::ostream &, const TString &) const override
write FDA-specific classifier response
Definition MethodFDA.cxx:673

TMVA::MethodFDA::fFormula
TFormula * fFormula
the discrimination function
Definition MethodFDA.h:136

TMVA::MethodFDA::fBestPars
std::vector< Double_t > fBestPars
the pars that optimise (minimise) the estimator
Definition MethodFDA.h:139

TMVA::MethodFDA::AddWeightsXMLTo
void AddWeightsXMLTo(void *parent) const override
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition MethodFDA.cxx:616

TMVA::MethodFDA::fConvergerFitter
IFitterTarget * fConvergerFitter
intermediate fitter
Definition MethodFDA.h:143

TMVA::MethodFDA::ProcessOptions
void ProcessOptions() override
the option string is decoded, for available options see "DeclareOptions"
Definition MethodFDA.cxx:239

TMVA::MethodFDA::fFitter
FitterBase * fFitter
the fitter used in the training
Definition MethodFDA.h:142

TMVA::MethodFDA::ReadWeightsFromXML
void ReadWeightsFromXML(void *wghtnode) override
read coefficients from xml weight file
Definition MethodFDA.cxx:634

TMVA::MethodFDA::EstimatorFunction
Double_t EstimatorFunction(std::vector< Double_t > &) override
compute estimator for given parameter set (to be minimised)
Definition MethodFDA.cxx:434

TMVA::MethodFDA::fSumOfWeights
Double_t fSumOfWeights
sum of weights
Definition MethodFDA.h:149

TMVA::MethodFDA::fParRangeStringT
TString fParRangeStringT
string with ranges of parameters
Definition MethodFDA.h:134

TMVA::MethodFDA::DeclareOptions
void DeclareOptions() override
define the options (their key words) that can be set in the option string
Definition MethodFDA.cxx:162

TMVA::MethodFDA::GetMulticlassValues
const std::vector< Float_t > & GetMulticlassValues() override
Definition MethodFDA.cxx:552

TMVA::MethodFDA::fFitMethod
TString fFitMethod
estimator optimisation method
Definition MethodFDA.h:140

TMVA::MethodFDA::CreateFormula
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition MethodFDA.cxx:182

TMVA::MethodFDA::fNPars
UInt_t fNPars
number of parameters
Definition MethodFDA.h:137

TMVA::MethodFDA::fConverger
TString fConverger
fit method uses fConverger as intermediate step to converge into local minimas
Definition MethodFDA.h:141

TMVA::MethodFDA::fFormulaStringP
TString fFormulaStringP
string with function
Definition MethodFDA.h:131

TMVA::MethodFDA::Init
void Init(void) override
default initialisation
Definition MethodFDA.cxx:123

TMVA::MethodFDA::GetHelpMessage
void GetHelpMessage() const override
get help message text
Definition MethodFDA.cxx:718

TMVA::MinuitFitter
/Fitter using MINUIT
Definition MinuitFitter.h:48

TMVA::SimulatedAnnealingFitter
Fitter using a Simulated Annealing Algorithm.
Definition SimulatedAnnealingFitter.h:49

TMVA::Tools::FormattedOutput
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition Tools.cxx:862

TMVA::Tools::ParseFormatLine
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:376

TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803

TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329

TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125

TMVA::Tools::AddAttr
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347

TMVA::Tools::AddChild
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099

TMVA::Tools::GetNextChild
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137

TMVA::Types
Singleton class for Global types used by TMVA.
Definition Types.h:71

TMVA::Types::EAnalysisType
EAnalysisType
Definition Types.h:126

TMVA::Types::kMulticlass
@ kMulticlass
Definition Types.h:129

TMVA::Types::kClassification
@ kClassification
Definition Types.h:127

TMVA::Types::kRegression
@ kRegression
Definition Types.h:128

TObjString
Collectable string class.
Definition TObjString.h:28

TString
Basic string class.
Definition TString.h:138

TString::Data
const char * Data() const
Definition TString.h:384

TString::Format
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385

y
Double_t y[n]
Definition legend1.C:17

TMVA::DNN::crossEntropy
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition NeuralNet.icc:412

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

TMVA::gConfig
Config & gConfig()

TMVA::gTools
Tools & gTools()

TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148

TMath::Power
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Definition TMath.h:732

TMath::Abs
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:122

Config.h

Types.h