Logo ROOT   6.10/09
Reference Guide
MethodFDA.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodFDA *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * *
19  * Copyright (c) 2005-2006: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 /*! \class TMVA::MethodFDA
29 \ingroup TMVA
30 
31 Function discriminant analysis (FDA).
32 
33 This simple classifier
34 fits any user-defined TFormula (via option configuration string) to
35 the training data by requiring a formula response of 1 (0) to signal
36 (background) events. The parameter fitting is done via the abstract
37 class FitterBase, featuring Monte Carlo sampling, Genetic
38 Algorithm, Simulated Annealing, MINUIT and combinations of these.
39 
40 Can compute regression value for one dimensional output
41 */
42 
43 #include "TMVA/MethodFDA.h"
44 
45 #include "TMVA/ClassifierFactory.h"
46 #include "TMVA/Config.h"
47 #include "TMVA/Configurable.h"
48 #include "TMVA/DataSetInfo.h"
49 #include "TMVA/FitterBase.h"
50 #include "TMVA/GeneticFitter.h"
51 #include "TMVA/Interval.h"
52 #include "TMVA/IFitterTarget.h"
53 #include "TMVA/IMethod.h"
54 #include "TMVA/MCFitter.h"
55 #include "TMVA/MethodBase.h"
56 #include "TMVA/MinuitFitter.h"
57 #include "TMVA/MsgLogger.h"
58 #include "TMVA/Timer.h"
59 #include "TMVA/Tools.h"
61 #include "TMVA/Types.h"
63 
64 #include "Riostream.h"
65 #include "TList.h"
66 #include "TFormula.h"
67 #include "TString.h"
68 #include "TObjString.h"
69 #include "TRandom3.h"
70 #include "TMath.h"
71 
72 #include <algorithm>
73 #include <iterator>
74 #include <stdexcept>
75 #include <sstream>
76 
77 using std::stringstream;
78 
79 REGISTER_METHOD(FDA)
80 
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 /// standard constructor
85 
86  TMVA::MethodFDA::MethodFDA( const TString& jobName,
87  const TString& methodTitle,
88  DataSetInfo& theData,
89  const TString& theOption)
90  : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption),
91  IFitterTarget (),
92  fFormula ( 0 ),
93  fNPars ( 0 ),
94  fFitter ( 0 ),
95  fConvergerFitter( 0 ),
96  fSumOfWeightsSig( 0 ),
97  fSumOfWeightsBkg( 0 ),
98  fSumOfWeights ( 0 ),
99  fOutputDimensions( 0 )
100 {
101 }
102 
103 ////////////////////////////////////////////////////////////////////////////////
104 /// constructor from weight file
105 
107  const TString& theWeightFile)
108  : MethodBase( Types::kFDA, theData, theWeightFile),
109  IFitterTarget (),
110  fFormula ( 0 ),
111  fNPars ( 0 ),
112  fFitter ( 0 ),
113  fConvergerFitter( 0 ),
114  fSumOfWeightsSig( 0 ),
115  fSumOfWeightsBkg( 0 ),
116  fSumOfWeights ( 0 ),
117  fOutputDimensions( 0 )
118 {
119 }
120 
121 ////////////////////////////////////////////////////////////////////////////////
122 /// default initialisation
123 
125 {
126  fNPars = 0;
127 
128  fBestPars.clear();
129 
130  fSumOfWeights = 0;
131  fSumOfWeightsSig = 0;
132  fSumOfWeightsBkg = 0;
133 
134  fFormulaStringP = "";
135  fParRangeStringP = "";
136  fFormulaStringT = "";
137  fParRangeStringT = "";
138 
139  fFitMethod = "";
140  fConverger = "";
141 
142  if( DoMulticlass() )
143  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
144 
145 }
146 
147 ////////////////////////////////////////////////////////////////////////////////
148 /// define the options (their key words) that can be set in the option string
149 ///
150 /// format of function string:
151 ///
152 /// "x0*(0)+((1)/x1)**(2)..."
153 ///
154 /// where "[i]" are the parameters, and "xi" the input variables
155 ///
156 /// format of parameter string:
157 ///
158 /// "(-1.2,3.4);(-2.3,4.55);..."
159 ///
160 /// where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;
161 /// each parameter defined in the function string must have a corresponding range
162 
164 {
165  DeclareOptionRef( fFormulaStringP = "(0)", "Formula", "The discrimination formula" );
166  DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );
167 
168  // fitter
169  DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");
170  AddPreDefVal(TString("MC"));
171  AddPreDefVal(TString("GA"));
172  AddPreDefVal(TString("SA"));
173  AddPreDefVal(TString("MINUIT"));
174 
175  DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");
176  AddPreDefVal(TString("None"));
177  AddPreDefVal(TString("MINUIT"));
178 }
179 
180 ////////////////////////////////////////////////////////////////////////////////
181 /// translate formula string into TFormula, and parameter string into par ranges
182 
184 {
185  // process transient strings
187 
188  // interpret formula string
189 
190  // replace the parameters "(i)" by the TFormula style "[i]"
191  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
192  fFormulaStringT.ReplaceAll( Form("(%i)",ipar), Form("[%i]",ipar) );
193  }
194 
195  // sanity check, there should be no "(i)", with 'i' a number anymore
196  for (Int_t ipar=fNPars; ipar<1000; ipar++) {
197  if (fFormulaStringT.Contains( Form("(%i)",ipar) ))
198  Log() << kFATAL
199  << "<CreateFormula> Formula contains expression: \"" << Form("(%i)",ipar) << "\", "
200  << "which cannot be attributed to a parameter; "
201  << "it may be that the number of variable ranges given via \"ParRanges\" "
202  << "does not match the number of parameters in the formula expression, please verify!"
203  << Endl;
204  }
205 
206  // write the variables "xi" as additional parameters "[npar+i]"
207  for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {
208  fFormulaStringT.ReplaceAll( Form("x%i",ivar), Form("[%i]",ivar+fNPars) );
209  }
210 
211  // sanity check, there should be no "xi", with 'i' a number anymore
212  for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {
213  if (fFormulaStringT.Contains( Form("x%i",ivar) ))
214  Log() << kFATAL
215  << "<CreateFormula> Formula contains expression: \"" << Form("x%i",ivar) << "\", "
216  << "which cannot be attributed to an input variable" << Endl;
217  }
218 
219  Log() << "User-defined formula string : \"" << fFormulaStringP << "\"" << Endl;
220  Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;
221  Log() << kDEBUG << "Creating and compiling formula" << Endl;
222 
223  // create TF1
224  if (fFormula) delete fFormula;
225  fFormula = new TFormula( "FDA_Formula", fFormulaStringT );
226 
227  // is formula correct ?
228  if (!fFormula->IsValid())
229  Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;
230 
231  // other sanity checks
232  if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))
233  Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "
234  << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;
235 }
236 
237 ////////////////////////////////////////////////////////////////////////////////
238 /// the option string is decoded, for available options see "DeclareOptions"
239 
241 {
242  // process transient strings
244 
245  // interpret parameter string
246  fParRangeStringT.ReplaceAll( " ", "" );
248 
249  TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );
250  if ((UInt_t)parList->GetSize() != fNPars) {
251  Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "
252  << "the number of parameters: " << fNPars << " != ranges defined: "
253  << parList->GetSize() << "; the format of the \"ParRanges\" string "
254  << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
255  << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
256  << "each parameter defined in the function string must have a corresponding rang."
257  << Endl;
258  }
259 
260  fParRange.resize( fNPars );
261  for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
262 
263  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
264  // parse (a,b)
265  TString str = ((TObjString*)parList->At(ipar))->GetString();
266  Ssiz_t istr = str.First( ',' );
267  TString pminS(str(1,istr-1));
268  TString pmaxS(str(istr+1,str.Length()-2-istr));
269 
270  stringstream stmin; Float_t pmin=0; stmin << pminS.Data(); stmin >> pmin;
271  stringstream stmax; Float_t pmax=0; stmax << pmaxS.Data(); stmax >> pmax;
272 
273  // sanity check
274  if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
275  if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["
276  << ipar << "] : [" << pmin << ", " << pmax << "] " << Endl;
277 
278  Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;
279  fParRange[ipar] = new Interval( pmin, pmax );
280  }
281  delete parList;
282 
283  // create formula
284  CreateFormula();
285 
286 
287  // copy parameter ranges for each output dimension ==================
288  fOutputDimensions = 1;
289  if( DoRegression() )
291  if( DoMulticlass() )
293 
294  for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){
295  for( UInt_t par = 0; par < fNPars; ++par ){
296  fParRange.push_back( fParRange.at(par) );
297  }
298  }
299  // ====================
300 
301  // create minimiser
303  if (fConverger == "MINUIT") {
304  fConvergerFitter = new MinuitFitter( *this, Form("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );
305  SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());
306  }
307 
308  if(fFitMethod == "MC")
309  fFitter = new MCFitter( *fConvergerFitter, Form("%s_Fitter_MC", GetName()), fParRange, GetOptions() );
310  else if (fFitMethod == "GA")
311  fFitter = new GeneticFitter( *fConvergerFitter, Form("%s_Fitter_GA", GetName()), fParRange, GetOptions() );
312  else if (fFitMethod == "SA")
314  else if (fFitMethod == "MINUIT")
315  fFitter = new MinuitFitter( *fConvergerFitter, Form("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );
316  else {
317  Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;
318  }
319 
321 }
322 
323 ////////////////////////////////////////////////////////////////////////////////
324 /// destructor
325 
327 {
328  ClearAll();
329 }
330 
331 ////////////////////////////////////////////////////////////////////////////////
332 /// FDA can handle classification with 2 classes and regression with one regression-target
333 
335 {
336  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
337  if (type == Types::kMulticlass ) return kTRUE;
338  if (type == Types::kRegression ) return kTRUE;
339  return kFALSE;
340 }
341 
342 
343 ////////////////////////////////////////////////////////////////////////////////
344 /// delete and clear all class members
345 
347 {
348  // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).
349  // hence, ... erase the copied pointers to assure, that they are deleted only once.
350  // fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );
351  for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
352  if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }
353  }
354  fParRange.clear();
355 
356  if (fFormula != 0) { delete fFormula; fFormula = 0; }
357  fBestPars.clear();
358 }
359 
360 ////////////////////////////////////////////////////////////////////////////////
361 /// FDA training
362 
364 {
365  // cache training events
366  fSumOfWeights = 0;
367  fSumOfWeightsSig = 0;
368  fSumOfWeightsBkg = 0;
369 
370  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
371 
372  // read the training event
373  const Event* ev = GetEvent(ievt);
374 
375  // true event copy
376  Float_t w = ev->GetWeight();
377 
378  if (!DoRegression()) {
379  if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }
380  else { fSumOfWeightsBkg += w; }
381  }
382  fSumOfWeights += w;
383  }
384 
385  // sanity check
386  if (!DoRegression()) {
387  if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
388  Log() << kFATAL << "<Train> Troubles in sum of weights: "
389  << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;
390  }
391  }
392  else if (fSumOfWeights <= 0) {
393  Log() << kFATAL << "<Train> Troubles in sum of weights: "
394  << fSumOfWeights << Endl;
395  }
396 
397  // starting values (not used by all fitters)
398  fBestPars.clear();
399  for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); parIt++) {
400  fBestPars.push_back( (*parIt)->GetMean() );
401  }
402 
403  // execute the fit
404  Double_t estimator = fFitter->Run( fBestPars );
405 
406  // print results
407  PrintResults( fFitMethod, fBestPars, estimator );
408 
409  delete fFitter; fFitter = 0;
410  if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {
411  delete fConvergerFitter;
412  fConvergerFitter = 0;
413  }
415 }
416 
417 ////////////////////////////////////////////////////////////////////////////////
418 /// display fit parameters
419 /// check maximum length of variable name
420 
421 void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const
422 {
423  Log() << kINFO;
424  Log() << kHEADER << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;
425  std::vector<TString> parNames;
426  for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( Form("Par(%i)",ipar ) );
427  gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );
428  Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;
429  Log() << "Value of estimator at minimum: " << estimator << Endl;
430 }
431 
432 ////////////////////////////////////////////////////////////////////////////////
433 /// compute estimator for given parameter set (to be minimised)
434 
435 Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )
436 {
437  const Double_t sumOfWeights[] = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };
438  Double_t estimator[] = { 0, 0, 0 };
439 
440  Double_t result, deviation;
441  Double_t desired = 0.0;
442 
443  // calculate the deviation from the desired value
444  if( DoRegression() ){
445  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
446  // read the training event
447  const TMVA::Event* ev = GetEvent(ievt);
448 
449  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
450  desired = ev->GetTarget( dim );
451  result = InterpretFormula( ev, pars.begin(), pars.end() );
452  deviation = TMath::Power(result - desired, 2);
453  estimator[2] += deviation * ev->GetWeight();
454  }
455  }
456  estimator[2] /= sumOfWeights[2];
457  // return value is sum over normalised signal and background contributions
458  return estimator[2];
459 
460  }else if( DoMulticlass() ){
461  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
462  // read the training event
463  const TMVA::Event* ev = GetEvent(ievt);
464 
466 
467  Double_t crossEntropy = 0.0;
468  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
469  Double_t y = fMulticlassReturnVal->at(dim);
470  Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );
471  crossEntropy += t*log(y);
472  }
473  estimator[2] += ev->GetWeight()*crossEntropy;
474  }
475  estimator[2] /= sumOfWeights[2];
476  // return value is sum over normalised signal and background contributions
477  return estimator[2];
478 
479  }else{
480  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
481  // read the training event
482  const TMVA::Event* ev = GetEvent(ievt);
483 
484  desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);
485  result = InterpretFormula( ev, pars.begin(), pars.end() );
486  deviation = TMath::Power(result - desired, 2);
487  estimator[Int_t(desired)] += deviation * ev->GetWeight();
488  }
489  estimator[0] /= sumOfWeights[0];
490  estimator[1] /= sumOfWeights[1];
491  // return value is sum over normalised signal and background contributions
492  return estimator[0] + estimator[1];
493  }
494 }
495 
496 ////////////////////////////////////////////////////////////////////////////////
497 /// formula interpretation
498 
499 Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )
500 {
501  Int_t ipar = 0;
502  // std::cout << "pars ";
503  for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
504  // std::cout << " i" << ipar << " val" << (*it);
505  fFormula->SetParameter( ipar, (*it) );
506  ++ipar;
507  }
508  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );
509 
510  Double_t result = fFormula->Eval( 0 );
511  // std::cout << " result " << result << std::endl;
512  return result;
513 }
514 
515 ////////////////////////////////////////////////////////////////////////////////
516 /// returns MVA value for given event
517 
519 {
520  const Event* ev = GetEvent();
521 
522  // cannot determine error
523  NoErrorCalc(err, errUpper);
524 
525  return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );
526 }
527 
528 ////////////////////////////////////////////////////////////////////////////////
529 
530 const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()
531 {
532  if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
533  fRegressionReturnVal->clear();
534 
535  const Event* ev = GetEvent();
536 
537  Event* evT = new Event(*ev);
538 
539  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
540  Int_t offset = dim*fNPars;
541  evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) );
542  }
543  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
544  fRegressionReturnVal->push_back(evT2->GetTarget(0));
545 
546  delete evT;
547 
548  return (*fRegressionReturnVal);
549 }
550 
551 ////////////////////////////////////////////////////////////////////////////////
552 
553 const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()
554 {
555  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
556  fMulticlassReturnVal->clear();
557  std::vector<Float_t> temp;
558 
559  // returns MVA value for given event
560  const TMVA::Event* evt = GetEvent();
561 
562  CalculateMulticlassValues( evt, fBestPars, temp );
563 
564  UInt_t nClasses = DataInfo().GetNClasses();
565  for(UInt_t iClass=0; iClass<nClasses; iClass++){
566  Double_t norm = 0.0;
567  for(UInt_t j=0;j<nClasses;j++){
568  if(iClass!=j)
569  norm+=exp(temp[j]-temp[iClass]);
570  }
571  (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
572  }
573 
574  return (*fMulticlassReturnVal);
575 }
576 
577 
578 ////////////////////////////////////////////////////////////////////////////////
579 /// calculate the values for multiclass
580 
581 void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)
582 {
583  values.clear();
584 
585  // std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );
586  // std::cout << std::endl;
587 
588  // char inp;
589  // std::cin >> inp;
590 
591  Double_t sum=0;
592  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)
593  Int_t offset = dim*fNPars;
594  Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );
595  // std::cout << "dim : " << dim << " value " << value << " offset " << offset << std::endl;
596  values.push_back( value );
597  sum += value;
598  }
599 
600  // // normalize to sum of value (commented out, .. have to think of how to treat negative classifier values)
601  // std::transform( fMulticlassReturnVal.begin(), fMulticlassReturnVal.end(), fMulticlassReturnVal.begin(), bind2nd( std::divides<float>(), sum) );
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 /// read back the training results from a file (stream)
606 
607 void TMVA::MethodFDA::ReadWeightsFromStream( std::istream& istr )
608 {
609  // retrieve best function parameters
610  // coverity[tainted_data_argument]
611  istr >> fNPars;
612 
613  fBestPars.clear();
614  fBestPars.resize( fNPars );
615  for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];
616 }
617 
618 ////////////////////////////////////////////////////////////////////////////////
619 /// create XML description for LD classification and regression
620 /// (for arbitrary number of output classes/targets)
621 
622 void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const
623 {
624  void* wght = gTools().AddChild(parent, "Weights");
625  gTools().AddAttr( wght, "NPars", fNPars );
626  gTools().AddAttr( wght, "NDim", fOutputDimensions );
627  for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
628  void* coeffxml = gTools().AddChild( wght, "Parameter" );
629  gTools().AddAttr( coeffxml, "Index", ipar );
630  gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );
631  }
632 
633  // write formula
634  gTools().AddAttr( wght, "Formula", fFormulaStringP );
635 }
636 
637 ////////////////////////////////////////////////////////////////////////////////
638 /// read coefficients from xml weight file
639 
641 {
642  gTools().ReadAttr( wghtnode, "NPars", fNPars );
643 
644  if(gTools().HasAttr( wghtnode, "NDim")) {
645  gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );
646  } else {
647  // older weight files don't have this attribute
648  fOutputDimensions = 1;
649  }
650 
651  fBestPars.clear();
653 
654  void* ch = gTools().GetChild(wghtnode);
655  Double_t par;
656  UInt_t ipar;
657  while (ch) {
658  gTools().ReadAttr( ch, "Index", ipar );
659  gTools().ReadAttr( ch, "Value", par );
660 
661  // sanity check
662  if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "
663  << ipar << " >= " << fNPars << Endl;
664  fBestPars[ipar] = par;
665 
666  ch = gTools().GetNextChild(ch);
667  }
668 
669  // read formula
670  gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );
671 
672  // create the TFormula
673  CreateFormula();
674 }
675 
676 ////////////////////////////////////////////////////////////////////////////////
677 /// write FDA-specific classifier response
678 
679 void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const
680 {
681  fout << " double fParameter[" << fNPars << "];" << std::endl;
682  fout << "};" << std::endl;
683  fout << "" << std::endl;
684  fout << "inline void " << className << "::Initialize() " << std::endl;
685  fout << "{" << std::endl;
686  for(UInt_t ipar=0; ipar<fNPars; ipar++) {
687  fout << " fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << std::endl;
688  }
689  fout << "}" << std::endl;
690  fout << std::endl;
691  fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
692  fout << "{" << std::endl;
693  fout << " // interpret the formula" << std::endl;
694 
695  // replace parameters
696  TString str = fFormulaStringT;
697  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
698  str.ReplaceAll( Form("[%i]", ipar), Form("fParameter[%i]", ipar) );
699  }
700 
701  // replace input variables
702  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
703  str.ReplaceAll( Form("[%i]", ivar+fNPars), Form("inputValues[%i]", ivar) );
704  }
705 
706  fout << " double retval = " << str << ";" << std::endl;
707  fout << std::endl;
708  fout << " return retval; " << std::endl;
709  fout << "}" << std::endl;
710  fout << std::endl;
711  fout << "// Clean up" << std::endl;
712  fout << "inline void " << className << "::Clear() " << std::endl;
713  fout << "{" << std::endl;
714  fout << " // nothing to clear" << std::endl;
715  fout << "}" << std::endl;
716 }
717 
718 ////////////////////////////////////////////////////////////////////////////////
719 /// get help message text
720 ///
721 /// typical length of text line:
722 /// "|--------------------------------------------------------------|"
723 
725 {
726  Log() << Endl;
727  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
728  Log() << Endl;
729  Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;
730  Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;
731  Log() << Endl;
732  Log() << "The user provides the desired function with adjustable parameters" << Endl;
733  Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;
734  Log() << "it, requiring the signal (background) function value to be as close" << Endl;
735  Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;
736  Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;
737  Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;
738  Log() << "underperform for involved problems with complicated, phase space" << Endl;
739  Log() << "dependent nonlinear correlations." << Endl;
740  Log() << Endl;
741  Log() << "Please consult the Users Guide for the format of the formula string" << Endl;
742  Log() << "and the allowed parameter ranges:" << Endl;
743  if (gConfig().WriteOptionsReference()) {
744  Log() << "<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">"
745  << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" << Endl;
746  }
747  else Log() << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" << Endl;
748  Log() << Endl;
749  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
750  Log() << Endl;
751  Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;
752  Log() << "user-defined discriminator function. As a general rule, it should" << Endl;
753  Log() << "be able to reproduce the discrimination power of any linear" << Endl;
754  Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;
755  Log() << "useful to inspect the correlation profiles of the input variables," << Endl;
756  Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;
757  Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;
758  Log() << "be used as a guide." << Endl;
759  Log() << Endl;
760  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
761  Log() << Endl;
762  Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;
763  Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;
764  Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;
765  Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;
766  Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;
767  Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;
768  Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;
769  Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;
770  Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;
771  Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;
772  Log() << "minima." << Endl;
773 }
double par[1]
Definition: unuranDistr.cxx:38
void Init(void)
default initialisation
Definition: MethodFDA.cxx:124
static long int sum(long int i)
Definition: Factory.cxx:2162
Double_t fSumOfWeightsBkg
Definition: MethodFDA.h:147
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:163
Double_t Eval(Double_t x) const
Sets first variable (e.g. x) and evaluate formula.
Definition: TFormula.cxx:2642
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:346
Singleton class for Global types used by TMVA.
Definition: Types.h:73
Collectable string class.
Definition: TObjString.h:28
float Float_t
Definition: RtypesCore.h:53
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:640
UInt_t GetNvar() const
Definition: MethodBase.h:328
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:125
Virtual base Class for all MVA method.
Definition: MethodBase.h:106
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:499
Basic string class.
Definition: TString.h:129
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:183
Int_t GetNpar() const
Definition: TFormula.h:170
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:378
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodFDA.cxx:86
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
#define NULL
Definition: RtypesCore.h:88
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1135
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:628
TString fFormulaStringP
Definition: MethodFDA.h:130
TString fFitMethod
Definition: MethodFDA.h:139
std::vector< Double_t > fBestPars
Definition: MethodFDA.h:138
/Fitter using MINUIT
Definition: MinuitFitter.h:47
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:518
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
std::vector< Interval * > fParRange
Definition: MethodFDA.h:137
const Event * GetEvent() const
Definition: MethodBase.h:733
TString fConverger
Definition: MethodFDA.h:140
void SetParameter(const char *name, Double_t value)
Sets parameter value.
Definition: TFormula.cxx:2405
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
UInt_t GetClass() const
Definition: Event.h:81
DataSetInfo & DataInfo() const
Definition: MethodBase.h:394
void SetOptions(const TString &s)
Definition: Configurable.h:85
FitterBase * fFitter
Definition: MethodFDA.h:141
Bool_t DoRegression() const
Definition: MethodBase.h:422
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:477
Class that contains all the data information.
Definition: DataSetInfo.h:60
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
TString fFormulaStringT
Definition: MethodFDA.h:132
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:421
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:334
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:401
A doubly linked list.
Definition: TList.h:43
TFormula * fFormula
Definition: MethodFDA.h:135
Bool_t DoMulticlass() const
Definition: MethodBase.h:423
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:724
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
const char * GetName() const
Definition: MethodBase.h:318
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:679
The TMVA::Interval Class.
Definition: Interval.h:61
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:581
The Formula class.
Definition: TFormula.h:83
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
IFitterTarget * fConvergerFitter
Definition: MethodFDA.h:142
Ssiz_t Length() const
Definition: TString.h:388
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:290
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:315
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:640
Tools & gTools()
void Train(void)
FDA training.
Definition: MethodFDA.cxx:363
Fitter using Monte Carlo sampling of parameters.
Definition: MCFitter.h:43
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodFDA.cxx:240
TString fParRangeStringP
Definition: MethodFDA.h:131
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:622
const Bool_t kFALSE
Definition: RtypesCore.h:92
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
int Ssiz_t
Definition: RtypesCore.h:63
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:580
int type
Definition: TGX11.cxx:120
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised)
Definition: MethodFDA.cxx:435
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1173
Double_t y[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:607
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:572
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:168
void ExitFromTraining()
Definition: MethodBase.h:446
const TString & GetOptions() const
Definition: Configurable.h:84
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:898
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:839
Int_t fOutputDimensions
Definition: MethodFDA.h:151
Double_t fSumOfWeightsSig
Definition: MethodFDA.h:146
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
TString fParRangeStringT
Definition: MethodFDA.h:133
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:530
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:412
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:454
Bool_t IsSignal(const Event *ev) const
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:553
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:579
Double_t fSumOfWeights
Definition: MethodFDA.h:148
double result[121]
Interface for a fitter &#39;target&#39;.
Definition: IFitterTarget.h:44
virtual Int_t GetSize() const
Definition: TCollection.h:89
double exp(double)
Bool_t IsValid() const
Definition: TFormula.h:181
const Bool_t kTRUE
Definition: RtypesCore.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:43
void CheckForUnusedOptions() const
checks for unused options in option string
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:410
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:326
double log(double)
Fitter using a Simulated Annealing Algorithm.
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:829
const char * Data() const
Definition: TString.h:347
Function discriminant analysis (FDA).
Definition: MethodFDA.h:60