Logo ROOT   6.07/09
Reference Guide
MethodFDA.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodFDA *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * *
19  * Copyright (c) 2005-2006: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 //_______________________________________________________________________
29 //
30 // Function discriminant analysis (FDA). This simple classifier //
31 // fits any user-defined TFormula (via option configuration string) to //
32 // the training data by requiring a formula response of 1 (0) to signal //
33 // (background) events. The parameter fitting is done via the abstract //
34 // class FitterBase, featuring Monte Carlo sampling, Genetic //
35 // Algorithm, Simulated Annealing, MINUIT and combinations of these. //
36 // //
37 // Can compute regression value for one dimensional output //
38 //_______________________________________________________________________
39 
40 #include "TMVA/MethodFDA.h"
41 
42 #include "TMVA/ClassifierFactory.h"
43 #include "TMVA/Config.h"
44 #include "TMVA/Configurable.h"
45 #include "TMVA/DataSetInfo.h"
46 #include "TMVA/FitterBase.h"
47 #include "TMVA/GeneticFitter.h"
48 #include "TMVA/Interval.h"
49 #include "TMVA/IFitterTarget.h"
50 #include "TMVA/IMethod.h"
51 #include "TMVA/MCFitter.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MinuitFitter.h"
54 #include "TMVA/MsgLogger.h"
55 #include "TMVA/Timer.h"
56 #include "TMVA/Tools.h"
58 #include "TMVA/Types.h"
60 
61 #include "Riostream.h"
62 #include "TList.h"
63 #include "TFormula.h"
64 #include "TString.h"
65 #include "TObjString.h"
66 #include "TRandom3.h"
67 #include "TMath.h"
68 
69 #include <algorithm>
70 #include <iterator>
71 #include <stdexcept>
72 #include <sstream>
73 
74 using std::stringstream;
75 
76 REGISTER_METHOD(FDA)
77 
78 ClassImp(TMVA::MethodFDA)
79 
80 ////////////////////////////////////////////////////////////////////////////////
81 /// standard constructor
82 
83  TMVA::MethodFDA::MethodFDA( const TString& jobName,
84  const TString& methodTitle,
85  DataSetInfo& theData,
86  const TString& theOption)
87  : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption),
88  IFitterTarget (),
89  fFormula ( 0 ),
90  fNPars ( 0 ),
91  fFitter ( 0 ),
92  fConvergerFitter( 0 ),
93  fSumOfWeightsSig( 0 ),
94  fSumOfWeightsBkg( 0 ),
95  fSumOfWeights ( 0 ),
96  fOutputDimensions( 0 )
97 {
98 }
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// constructor from weight file
102 
104  const TString& theWeightFile)
105  : MethodBase( Types::kFDA, theData, theWeightFile),
106  IFitterTarget (),
107  fFormula ( 0 ),
108  fNPars ( 0 ),
109  fFitter ( 0 ),
110  fConvergerFitter( 0 ),
111  fSumOfWeightsSig( 0 ),
112  fSumOfWeightsBkg( 0 ),
113  fSumOfWeights ( 0 ),
114  fOutputDimensions( 0 )
115 {
116 }
117 
118 ////////////////////////////////////////////////////////////////////////////////
119 /// default initialisation
120 
122 {
123  fNPars = 0;
124 
125  fBestPars.clear();
126 
127  fSumOfWeights = 0;
128  fSumOfWeightsSig = 0;
129  fSumOfWeightsBkg = 0;
130 
131  fFormulaStringP = "";
132  fParRangeStringP = "";
133  fFormulaStringT = "";
134  fParRangeStringT = "";
135 
136  fFitMethod = "";
137  fConverger = "";
138 
139  if( DoMulticlass() )
140  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
141 
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// define the options (their key words) that can be set in the option string
146 ///
147 /// format of function string:
148 /// "x0*(0)+((1)/x1)**(2)..."
149 /// where "[i]" are the parameters, and "xi" the input variables
150 ///
151 /// format of parameter string:
152 /// "(-1.2,3.4);(-2.3,4.55);..."
153 /// where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;
154 /// each parameter defined in the function string must have a corresponding range
155 ///
156 
158 {
159  DeclareOptionRef( fFormulaStringP = "(0)", "Formula", "The discrimination formula" );
160  DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );
161 
162  // fitter
163  DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");
164  AddPreDefVal(TString("MC"));
165  AddPreDefVal(TString("GA"));
166  AddPreDefVal(TString("SA"));
167  AddPreDefVal(TString("MINUIT"));
168 
169  DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");
170  AddPreDefVal(TString("None"));
171  AddPreDefVal(TString("MINUIT"));
172 }
173 
174 ////////////////////////////////////////////////////////////////////////////////
175 /// translate formula string into TFormula, and parameter string into par ranges
176 
178 {
179  // process transient strings
181 
182  // intepret formula string
183 
184  // replace the parameters "(i)" by the TFormula style "[i]"
185  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
186  fFormulaStringT.ReplaceAll( Form("(%i)",ipar), Form("[%i]",ipar) );
187  }
188 
189  // sanity check, there should be no "(i)", with 'i' a number anymore
190  for (Int_t ipar=fNPars; ipar<1000; ipar++) {
191  if (fFormulaStringT.Contains( Form("(%i)",ipar) ))
192  Log() << kFATAL
193  << "<CreateFormula> Formula contains expression: \"" << Form("(%i)",ipar) << "\", "
194  << "which cannot be attributed to a parameter; "
195  << "it may be that the number of variable ranges given via \"ParRanges\" "
196  << "does not match the number of parameters in the formula expression, please verify!"
197  << Endl;
198  }
199 
200  // write the variables "xi" as additional parameters "[npar+i]"
201  for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {
202  fFormulaStringT.ReplaceAll( Form("x%i",ivar), Form("[%i]",ivar+fNPars) );
203  }
204 
205  // sanity check, there should be no "xi", with 'i' a number anymore
206  for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {
207  if (fFormulaStringT.Contains( Form("x%i",ivar) ))
208  Log() << kFATAL
209  << "<CreateFormula> Formula contains expression: \"" << Form("x%i",ivar) << "\", "
210  << "which cannot be attributed to an input variable" << Endl;
211  }
212 
213  Log() << "User-defined formula string : \"" << fFormulaStringP << "\"" << Endl;
214  Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;
215  Log() << kDEBUG << "Creating and compiling formula" << Endl;
216 
217  // create TF1
218  if (fFormula) delete fFormula;
219  fFormula = new TFormula( "FDA_Formula", fFormulaStringT );
220 
221  // is formula correct ?
222  if (!fFormula->IsValid())
223  Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;
224 
225  // other sanity checks
226  if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))
227  Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "
228  << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;
229 }
230 
231 ////////////////////////////////////////////////////////////////////////////////
232 /// the option string is decoded, for availabel options see "DeclareOptions"
233 
235 {
236  // process transient strings
238 
239  // interpret parameter string
240  fParRangeStringT.ReplaceAll( " ", "" );
242 
243  TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );
244  if ((UInt_t)parList->GetSize() != fNPars) {
245  Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "
246  << "the number of parameters: " << fNPars << " != ranges defined: "
247  << parList->GetSize() << "; the format of the \"ParRanges\" string "
248  << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
249  << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
250  << "each parameter defined in the function string must have a corresponding rang."
251  << Endl;
252  }
253 
254  fParRange.resize( fNPars );
255  for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
256 
257  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
258  // parse (a,b)
259  TString str = ((TObjString*)parList->At(ipar))->GetString();
260  Ssiz_t istr = str.First( ',' );
261  TString pminS(str(1,istr-1));
262  TString pmaxS(str(istr+1,str.Length()-2-istr));
263 
264  stringstream stmin; Float_t pmin=0; stmin << pminS.Data(); stmin >> pmin;
265  stringstream stmax; Float_t pmax=0; stmax << pmaxS.Data(); stmax >> pmax;
266 
267  // sanity check
268  if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
269  if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["
270  << ipar << "] : [" << pmin << ", " << pmax << "] " << Endl;
271 
272  Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;
273  fParRange[ipar] = new Interval( pmin, pmax );
274  }
275  delete parList;
276 
277  // create formula
278  CreateFormula();
279 
280 
281  // copy parameter ranges for each output dimension ==================
282  fOutputDimensions = 1;
283  if( DoRegression() )
285  if( DoMulticlass() )
287 
288  for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){
289  for( UInt_t par = 0; par < fNPars; ++par ){
290  fParRange.push_back( fParRange.at(par) );
291  }
292  }
293  // ====================
294 
295  // create minimiser
297  if (fConverger == "MINUIT") {
298  fConvergerFitter = new MinuitFitter( *this, Form("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );
299  SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());
300  }
301 
302  if(fFitMethod == "MC")
303  fFitter = new MCFitter( *fConvergerFitter, Form("%s_Fitter_MC", GetName()), fParRange, GetOptions() );
304  else if (fFitMethod == "GA")
305  fFitter = new GeneticFitter( *fConvergerFitter, Form("%s_Fitter_GA", GetName()), fParRange, GetOptions() );
306  else if (fFitMethod == "SA")
308  else if (fFitMethod == "MINUIT")
309  fFitter = new MinuitFitter( *fConvergerFitter, Form("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );
310  else {
311  Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;
312  }
313 
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////
318 /// destructor
319 
321 {
322  ClearAll();
323 }
324 
325 ////////////////////////////////////////////////////////////////////////////////
326 /// FDA can handle classification with 2 classes and regression with one regression-target
327 
329 {
330  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
331  if (type == Types::kMulticlass ) return kTRUE;
332  if (type == Types::kRegression ) return kTRUE;
333  return kFALSE;
334 }
335 
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 /// delete and clear all class members
339 
341 {
342  // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).
343  // hence, ... erase the copied pointers to assure, that they are deleted only once.
344  // fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );
345  for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
346  if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }
347  }
348  fParRange.clear();
349 
350  if (fFormula != 0) { delete fFormula; fFormula = 0; }
351  fBestPars.clear();
352 }
353 
354 ////////////////////////////////////////////////////////////////////////////////
355 /// FDA training
356 
358 {
359  // cache training events
360  fSumOfWeights = 0;
361  fSumOfWeightsSig = 0;
362  fSumOfWeightsBkg = 0;
363 
364  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
365 
366  // read the training event
367  const Event* ev = GetEvent(ievt);
368 
369  // true event copy
370  Float_t w = ev->GetWeight();
371 
372  if (!DoRegression()) {
373  if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }
374  else { fSumOfWeightsBkg += w; }
375  }
376  fSumOfWeights += w;
377  }
378 
379  // sanity check
380  if (!DoRegression()) {
381  if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
382  Log() << kFATAL << "<Train> Troubles in sum of weights: "
383  << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;
384  }
385  }
386  else if (fSumOfWeights <= 0) {
387  Log() << kFATAL << "<Train> Troubles in sum of weights: "
388  << fSumOfWeights << Endl;
389  }
390 
391  // starting values (not used by all fitters)
392  fBestPars.clear();
393  for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); parIt++) {
394  fBestPars.push_back( (*parIt)->GetMean() );
395  }
396 
397  // execute the fit
398  Double_t estimator = fFitter->Run( fBestPars );
399 
400  // print results
401  PrintResults( fFitMethod, fBestPars, estimator );
402 
403  delete fFitter; fFitter = 0;
404  if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {
405  delete fConvergerFitter;
406  fConvergerFitter = 0;
407  }
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// display fit parameters
413 /// check maximum length of variable name
414 
415 void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const
416 {
417  Log() << kINFO;
418  Log() << kHEADER << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;
419  std::vector<TString> parNames;
420  for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( Form("Par(%i)",ipar ) );
421  gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );
422  Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;
423  Log() << "Value of estimator at minimum: " << estimator << Endl;
424 }
425 
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// compute estimator for given parameter set (to be minimised)
429 /// const Double_t sumOfWeights[] = { fSumOfWeightsSig, fSumOfWeightsBkg, fSumOfWeights };
430 
431 Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )
432 {
433  const Double_t sumOfWeights[] = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };
434  Double_t estimator[] = { 0, 0, 0 };
435 
436  Double_t result, deviation;
437  Double_t desired = 0.0;
438 
439  // calculate the deviation from the desired value
440  if( DoRegression() ){
441  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
442  // read the training event
443  const TMVA::Event* ev = GetEvent(ievt);
444 
445  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
446  desired = ev->GetTarget( dim );
447  result = InterpretFormula( ev, pars.begin(), pars.end() );
448  deviation = TMath::Power(result - desired, 2);
449  estimator[2] += deviation * ev->GetWeight();
450  }
451  }
452  estimator[2] /= sumOfWeights[2];
453  // return value is sum over normalised signal and background contributions
454  return estimator[2];
455 
456  }else if( DoMulticlass() ){
457  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
458  // read the training event
459  const TMVA::Event* ev = GetEvent(ievt);
460 
462 
463  Double_t crossEntropy = 0.0;
464  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
465  Double_t y = fMulticlassReturnVal->at(dim);
466  Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );
467  crossEntropy += t*log(y);
468  }
469  estimator[2] += ev->GetWeight()*crossEntropy;
470  }
471  estimator[2] /= sumOfWeights[2];
472  // return value is sum over normalised signal and background contributions
473  return estimator[2];
474 
475  }else{
476  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
477  // read the training event
478  const TMVA::Event* ev = GetEvent(ievt);
479 
480  desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);
481  result = InterpretFormula( ev, pars.begin(), pars.end() );
482  deviation = TMath::Power(result - desired, 2);
483  estimator[Int_t(desired)] += deviation * ev->GetWeight();
484  }
485  estimator[0] /= sumOfWeights[0];
486  estimator[1] /= sumOfWeights[1];
487  // return value is sum over normalised signal and background contributions
488  return estimator[0] + estimator[1];
489  }
490 }
491 
492 ////////////////////////////////////////////////////////////////////////////////
493 /// formula interpretation
494 
495 Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )
496 {
497  Int_t ipar = 0;
498  // std::cout << "pars ";
499  for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
500  // std::cout << " i" << ipar << " val" << (*it);
501  fFormula->SetParameter( ipar, (*it) );
502  ++ipar;
503  }
504  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );
505 
506  Double_t result = fFormula->Eval( 0 );
507  // std::cout << " result " << result << std::endl;
508  return result;
509 }
510 
511 ////////////////////////////////////////////////////////////////////////////////
512 /// returns MVA value for given event
513 
515 {
516  const Event* ev = GetEvent();
517 
518  // cannot determine error
519  NoErrorCalc(err, errUpper);
520 
521  return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );
522 }
523 
524 ////////////////////////////////////////////////////////////////////////////////
525 
526 const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()
527 {
528  if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
529  fRegressionReturnVal->clear();
530 
531  const Event* ev = GetEvent();
532 
533  Event* evT = new Event(*ev);
534 
535  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
536  Int_t offset = dim*fNPars;
537  evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) );
538  }
539  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
540  fRegressionReturnVal->push_back(evT2->GetTarget(0));
541 
542  delete evT;
543 
544  return (*fRegressionReturnVal);
545 }
546 
547 
548 ////////////////////////////////////////////////////////////////////////////////
549 
550 const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()
551 {
552  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
553  fMulticlassReturnVal->clear();
554  std::vector<Float_t> temp;
555 
556  // returns MVA value for given event
557  const TMVA::Event* evt = GetEvent();
558 
559  CalculateMulticlassValues( evt, fBestPars, temp );
560 
561  UInt_t nClasses = DataInfo().GetNClasses();
562  for(UInt_t iClass=0; iClass<nClasses; iClass++){
563  Double_t norm = 0.0;
564  for(UInt_t j=0;j<nClasses;j++){
565  if(iClass!=j)
566  norm+=exp(temp[j]-temp[iClass]);
567  }
568  (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
569  }
570 
571  return (*fMulticlassReturnVal);
572 }
573 
574 
575 ////////////////////////////////////////////////////////////////////////////////
576 /// calculate the values for multiclass
577 
578 void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)
579 {
580  values.clear();
581 
582  // std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );
583  // std::cout << std::endl;
584 
585  // char inp;
586  // std::cin >> inp;
587 
588  Double_t sum=0;
589  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)
590  Int_t offset = dim*fNPars;
591  Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );
592  // std::cout << "dim : " << dim << " value " << value << " offset " << offset << std::endl;
593  values.push_back( value );
594  sum += value;
595  }
596 
597  // // normalize to sum of value (commented out, .. have to think of how to treat negative classifier values)
598  // std::transform( fMulticlassReturnVal.begin(), fMulticlassReturnVal.end(), fMulticlassReturnVal.begin(), bind2nd( std::divides<float>(), sum) );
599 }
600 
601 
602 
603 ////////////////////////////////////////////////////////////////////////////////
604 /// read back the training results from a file (stream)
605 
606 void TMVA::MethodFDA::ReadWeightsFromStream( std::istream& istr )
607 {
608  // retrieve best function parameters
609  // coverity[tainted_data_argument]
610  istr >> fNPars;
611 
612  fBestPars.clear();
613  fBestPars.resize( fNPars );
614  for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];
615 }
616 
617 ////////////////////////////////////////////////////////////////////////////////
618 /// create XML description for LD classification and regression
619 /// (for arbitrary number of output classes/targets)
620 
621 void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const
622 {
623  void* wght = gTools().AddChild(parent, "Weights");
624  gTools().AddAttr( wght, "NPars", fNPars );
625  gTools().AddAttr( wght, "NDim", fOutputDimensions );
626  for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
627  void* coeffxml = gTools().AddChild( wght, "Parameter" );
628  gTools().AddAttr( coeffxml, "Index", ipar );
629  gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );
630  }
631 
632  // write formula
633  gTools().AddAttr( wght, "Formula", fFormulaStringP );
634 }
635 
636 ////////////////////////////////////////////////////////////////////////////////
637 /// read coefficients from xml weight file
638 
640 {
641  gTools().ReadAttr( wghtnode, "NPars", fNPars );
642 
643  if(gTools().HasAttr( wghtnode, "NDim")) {
644  gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );
645  } else {
646  // older weight files don't have this attribute
647  fOutputDimensions = 1;
648  }
649 
650  fBestPars.clear();
652 
653  void* ch = gTools().GetChild(wghtnode);
654  Double_t par;
655  UInt_t ipar;
656  while (ch) {
657  gTools().ReadAttr( ch, "Index", ipar );
658  gTools().ReadAttr( ch, "Value", par );
659 
660  // sanity check
661  if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "
662  << ipar << " >= " << fNPars << Endl;
663  fBestPars[ipar] = par;
664 
665  ch = gTools().GetNextChild(ch);
666  }
667 
668  // read formula
669  gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );
670 
671  // create the TFormula
672  CreateFormula();
673 }
674 
675 ////////////////////////////////////////////////////////////////////////////////
676 /// write FDA-specific classifier response
677 
678 void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const
679 {
680  fout << " double fParameter[" << fNPars << "];" << std::endl;
681  fout << "};" << std::endl;
682  fout << "" << std::endl;
683  fout << "inline void " << className << "::Initialize() " << std::endl;
684  fout << "{" << std::endl;
685  for(UInt_t ipar=0; ipar<fNPars; ipar++) {
686  fout << " fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << std::endl;
687  }
688  fout << "}" << std::endl;
689  fout << std::endl;
690  fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
691  fout << "{" << std::endl;
692  fout << " // interpret the formula" << std::endl;
693 
694  // replace parameters
695  TString str = fFormulaStringT;
696  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
697  str.ReplaceAll( Form("[%i]", ipar), Form("fParameter[%i]", ipar) );
698  }
699 
700  // replace input variables
701  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
702  str.ReplaceAll( Form("[%i]", ivar+fNPars), Form("inputValues[%i]", ivar) );
703  }
704 
705  fout << " double retval = " << str << ";" << std::endl;
706  fout << std::endl;
707  fout << " return retval; " << std::endl;
708  fout << "}" << std::endl;
709  fout << std::endl;
710  fout << "// Clean up" << std::endl;
711  fout << "inline void " << className << "::Clear() " << std::endl;
712  fout << "{" << std::endl;
713  fout << " // nothing to clear" << std::endl;
714  fout << "}" << std::endl;
715 }
716 
717 ////////////////////////////////////////////////////////////////////////////////
718 /// get help message text
719 ///
720 /// typical length of text line:
721 /// "|--------------------------------------------------------------|"
722 
724 {
725  Log() << Endl;
726  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
727  Log() << Endl;
728  Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;
729  Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;
730  Log() << Endl;
731  Log() << "The user provides the desired function with adjustable parameters" << Endl;
732  Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;
733  Log() << "it, requiring the signal (background) function value to be as close" << Endl;
734  Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;
735  Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;
736  Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;
737  Log() << "underperform for involved problems with complicated, phase space" << Endl;
738  Log() << "dependent nonlinear correlations." << Endl;
739  Log() << Endl;
740  Log() << "Please consult the Users Guide for the format of the formula string" << Endl;
741  Log() << "and the allowed parameter ranges:" << Endl;
742  if (gConfig().WriteOptionsReference()) {
743  Log() << "<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">"
744  << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" << Endl;
745  }
746  else Log() << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" << Endl;
747  Log() << Endl;
748  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
749  Log() << Endl;
750  Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;
751  Log() << "user-defined discriminator function. As a general rule, it should" << Endl;
752  Log() << "be able to reproduce the discrimination power of any linear" << Endl;
753  Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;
754  Log() << "useful to inspect the correlation profiles of the input variables," << Endl;
755  Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;
756  Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;
757  Log() << "be used as a guide." << Endl;
758  Log() << Endl;
759  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
760  Log() << Endl;
761  Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;
762  Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;
763  Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;
764  Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;
765  Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;
766  Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;
767  Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;
768  Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;
769  Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;
770  Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;
771  Log() << "minima." << Endl;
772 }
Config & gConfig()
Definition: Config.cxx:43
double par[1]
Definition: unuranDistr.cxx:38
void Init(void)
default initialisation
Definition: MethodFDA.cxx:121
static long int sum(long int i)
Definition: Factory.cxx:1785
Double_t fSumOfWeightsBkg
Definition: MethodFDA.h:151
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:157
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:340
#define REGISTER_METHOD(CLASS)
for example
Double_t Eval(Double_t x) const
Definition: TFormula.cxx:2546
Ssiz_t Length() const
Definition: TString.h:390
Collectable string class.
Definition: TObjString.h:32
float Float_t
Definition: RtypesCore.h:53
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
const char * GetName() const
Definition: MethodBase.h:330
void CheckForUnusedOptions() const
checks for unused options in option string
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
UInt_t GetNvar() const
Definition: MethodBase.h:340
EAnalysisType
Definition: Types.h:128
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:495
Basic string class.
Definition: TString.h:137
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:177
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodFDA.cxx:83
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
TString fFormulaStringP
Definition: MethodFDA.h:134
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:415
const char * Data() const
Definition: TString.h:349
TString fFitMethod
Definition: MethodFDA.h:143
Tools & gTools()
Definition: Tools.cxx:79
std::vector< Double_t > fBestPars
Definition: MethodFDA.h:142
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:514
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:80
Bool_t IsSignal(const Event *ev) const
std::vector< Interval * > fParRange
Definition: MethodFDA.h:141
TString fConverger
Definition: MethodFDA.h:144
void SetParameter(const char *name, Double_t value)
Definition: TFormula.cxx:2329
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
void SetOptions(const TString &s)
Definition: Configurable.h:91
FitterBase * fFitter
Definition: MethodFDA.h:145
TString fFormulaStringT
Definition: MethodFDA.h:136
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:328
A doubly linked list.
Definition: TList.h:47
TFormula * fFormula
Definition: MethodFDA.h:139
Int_t GetNpar() const
Definition: TFormula.h:175
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:578
The F O R M U L A class.
Definition: TFormula.h:89
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * GetEvent() const
Definition: MethodBase.h:745
char * Form(const char *fmt,...)
IFitterTarget * fConvergerFitter
Definition: MethodFDA.h:146
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:639
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:444
void Train(void)
FDA training.
Definition: MethodFDA.cxx:357
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodFDA.cxx:234
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:678
TString fParRangeStringP
Definition: MethodFDA.h:135
int Ssiz_t
Definition: RtypesCore.h:63
virtual Int_t GetSize() const
Definition: TCollection.h:95
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
int type
Definition: TGX11.cxx:120
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
Definition: MethodFDA.cxx:431
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Double_t y[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:606
MsgLogger & Log() const
Definition: Configurable.h:128
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
void AddPreDefVal(const T &)
Definition: Configurable.h:174
UInt_t GetClass() const
Definition: Event.h:89
void ExitFromTraining()
Definition: MethodBase.h:458
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Int_t fOutputDimensions
Definition: MethodFDA.h:155
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
Double_t fSumOfWeightsSig
Definition: MethodFDA.h:150
Bool_t DoRegression() const
Definition: MethodBase.h:434
Abstract ClassifierFactory template that handles arbitrary types.
TString fParRangeStringT
Definition: MethodFDA.h:137
Bool_t IsValid() const
Definition: TFormula.h:186
const TString & GetOptions() const
Definition: Configurable.h:90
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:526
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:550
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Double_t fSumOfWeights
Definition: MethodFDA.h:152
double result[121]
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:723
double exp(double)
const Bool_t kTRUE
Definition: Rtypes.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:405
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:320
double log(double)
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:467
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:621
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const