ROOT  6.06/09
Reference Guide
MethodFDA.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodFDA *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * *
19  * Copyright (c) 2005-2006: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 //_______________________________________________________________________
29 //
30 // Function discriminant analysis (FDA). This simple classifier //
31 // fits any user-defined TFormula (via option configuration string) to //
32 // the training data by requiring a formula response of 1 (0) to signal //
33 // (background) events. The parameter fitting is done via the abstract //
34 // class FitterBase, featuring Monte Carlo sampling, Genetic //
35 // Algorithm, Simulated Annealing, MINUIT and combinations of these. //
36 // //
37 // Can compute regression value for one dimensional output //
38 //_______________________________________________________________________
39 
40 #include "Riostream.h"
41 #include "TList.h"
42 #include "TFormula.h"
43 #include "TString.h"
44 #include "TObjString.h"
45 #include "TRandom3.h"
46 #include "TMath.h"
47 #include <sstream>
48 
49 #include <algorithm>
50 #include <iterator>
51 #include <stdexcept>
52 
53 #include "TMVA/ClassifierFactory.h"
54 #include "TMVA/MethodFDA.h"
55 #include "TMVA/Tools.h"
56 #include "TMVA/Interval.h"
57 #include "TMVA/Timer.h"
58 #include "TMVA/GeneticFitter.h"
60 #include "TMVA/MinuitFitter.h"
61 #include "TMVA/MCFitter.h"
62 #include "TMVA/Config.h"
63 
64 using std::stringstream;
65 
66 REGISTER_METHOD(FDA)
67 
68 ClassImp(TMVA::MethodFDA)
69 
70 ////////////////////////////////////////////////////////////////////////////////
71 /// standard constructor
72 
73 TMVA::MethodFDA::MethodFDA( const TString& jobName,
74  const TString& methodTitle,
75  DataSetInfo& theData,
76  const TString& theOption,
77  TDirectory* theTargetDir )
78  : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption, theTargetDir ),
79  IFitterTarget (),
80  fFormula ( 0 ),
81  fNPars ( 0 ),
82  fFitter ( 0 ),
83  fConvergerFitter( 0 ),
84  fSumOfWeightsSig( 0 ),
85  fSumOfWeightsBkg( 0 ),
86  fSumOfWeights ( 0 ),
87  fOutputDimensions( 0 )
88 {
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// constructor from weight file
93 
95  const TString& theWeightFile,
96  TDirectory* theTargetDir )
97  : MethodBase( Types::kFDA, theData, theWeightFile, theTargetDir ),
98  IFitterTarget (),
99  fFormula ( 0 ),
100  fNPars ( 0 ),
101  fFitter ( 0 ),
102  fConvergerFitter( 0 ),
103  fSumOfWeightsSig( 0 ),
104  fSumOfWeightsBkg( 0 ),
105  fSumOfWeights ( 0 ),
106  fOutputDimensions( 0 )
107 {
108 }
109 
110 ////////////////////////////////////////////////////////////////////////////////
111 /// default initialisation
112 
114 {
115  fNPars = 0;
116 
117  fBestPars.clear();
118 
119  fSumOfWeights = 0;
120  fSumOfWeightsSig = 0;
121  fSumOfWeightsBkg = 0;
122 
123  fFormulaStringP = "";
124  fParRangeStringP = "";
125  fFormulaStringT = "";
126  fParRangeStringT = "";
127 
128  fFitMethod = "";
129  fConverger = "";
130 
131  if( DoMulticlass() )
132  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
133 
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// define the options (their key words) that can be set in the option string
138 ///
139 /// format of function string:
140 /// "x0*(0)+((1)/x1)**(2)..."
141 /// where "[i]" are the parameters, and "xi" the input variables
142 ///
143 /// format of parameter string:
144 /// "(-1.2,3.4);(-2.3,4.55);..."
145 /// where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;
146 /// each parameter defined in the function string must have a corresponding range
147 ///
148 
150 {
151  DeclareOptionRef( fFormulaStringP = "(0)", "Formula", "The discrimination formula" );
152  DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );
153 
154  // fitter
155  DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");
156  AddPreDefVal(TString("MC"));
157  AddPreDefVal(TString("GA"));
158  AddPreDefVal(TString("SA"));
159  AddPreDefVal(TString("MINUIT"));
160 
161  DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");
162  AddPreDefVal(TString("None"));
163  AddPreDefVal(TString("MINUIT"));
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
167 /// translate formula string into TFormula, and parameter string into par ranges
168 
170 {
171  // process transient strings
172  fFormulaStringT = fFormulaStringP;
173 
174  // intepret formula string
175 
176  // replace the parameters "(i)" by the TFormula style "[i]"
177  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
178  fFormulaStringT.ReplaceAll( Form("(%i)",ipar), Form("[%i]",ipar) );
179  }
180 
181  // sanity check, there should be no "(i)", with 'i' a number anymore
182  for (Int_t ipar=fNPars; ipar<1000; ipar++) {
183  if (fFormulaStringT.Contains( Form("(%i)",ipar) ))
184  Log() << kFATAL
185  << "<CreateFormula> Formula contains expression: \"" << Form("(%i)",ipar) << "\", "
186  << "which cannot be attributed to a parameter; "
187  << "it may be that the number of variable ranges given via \"ParRanges\" "
188  << "does not match the number of parameters in the formula expression, please verify!"
189  << Endl;
190  }
191 
192  // write the variables "xi" as additional parameters "[npar+i]"
193  for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {
194  fFormulaStringT.ReplaceAll( Form("x%i",ivar), Form("[%i]",ivar+fNPars) );
195  }
196 
197  // sanity check, there should be no "xi", with 'i' a number anymore
198  for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {
199  if (fFormulaStringT.Contains( Form("x%i",ivar) ))
200  Log() << kFATAL
201  << "<CreateFormula> Formula contains expression: \"" << Form("x%i",ivar) << "\", "
202  << "which cannot be attributed to an input variable" << Endl;
203  }
204 
205  Log() << "User-defined formula string : \"" << fFormulaStringP << "\"" << Endl;
206  Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;
207  Log() << "Creating and compiling formula" << Endl;
208 
209  // create TF1
210  if (fFormula) delete fFormula;
211  fFormula = new TFormula( "FDA_Formula", fFormulaStringT );
212 
213  // is formula correct ?
214  if (!fFormula->IsValid())
215  Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;
216 
217  // other sanity checks
218  if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))
219  Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "
220  << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;
221 }
222 
223 ////////////////////////////////////////////////////////////////////////////////
224 /// the option string is decoded, for availabel options see "DeclareOptions"
225 
227 {
228  // process transient strings
229  fParRangeStringT = fParRangeStringP;
230 
231  // interpret parameter string
232  fParRangeStringT.ReplaceAll( " ", "" );
233  fNPars = fParRangeStringT.CountChar( ')' );
234 
235  TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );
236  if ((UInt_t)parList->GetSize() != fNPars) {
237  Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "
238  << "the number of parameters: " << fNPars << " != ranges defined: "
239  << parList->GetSize() << "; the format of the \"ParRanges\" string "
240  << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
241  << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
242  << "each parameter defined in the function string must have a corresponding rang."
243  << Endl;
244  }
245 
246  fParRange.resize( fNPars );
247  for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
248 
249  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
250  // parse (a,b)
251  TString str = ((TObjString*)parList->At(ipar))->GetString();
252  Ssiz_t istr = str.First( ',' );
253  TString pminS(str(1,istr-1));
254  TString pmaxS(str(istr+1,str.Length()-2-istr));
255 
256  stringstream stmin; Float_t pmin=0; stmin << pminS.Data(); stmin >> pmin;
257  stringstream stmax; Float_t pmax=0; stmax << pmaxS.Data(); stmax >> pmax;
258 
259  // sanity check
260  if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
261  if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["
262  << ipar << "] : [" << pmin << ", " << pmax << "] " << Endl;
263 
264  Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;
265  fParRange[ipar] = new Interval( pmin, pmax );
266  }
267  delete parList;
268 
269  // create formula
270  CreateFormula();
271 
272 
273  // copy parameter ranges for each output dimension ==================
274  fOutputDimensions = 1;
275  if( DoRegression() )
276  fOutputDimensions = DataInfo().GetNTargets();
277  if( DoMulticlass() )
278  fOutputDimensions = DataInfo().GetNClasses();
279 
280  for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){
281  for( UInt_t par = 0; par < fNPars; ++par ){
282  fParRange.push_back( fParRange.at(par) );
283  }
284  }
285  // ====================
286 
287  // create minimiser
288  fConvergerFitter = (IFitterTarget*)this;
289  if (fConverger == "MINUIT") {
290  fConvergerFitter = new MinuitFitter( *this, Form("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );
291  SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());
292  }
293 
294  if(fFitMethod == "MC")
295  fFitter = new MCFitter( *fConvergerFitter, Form("%s_Fitter_MC", GetName()), fParRange, GetOptions() );
296  else if (fFitMethod == "GA")
297  fFitter = new GeneticFitter( *fConvergerFitter, Form("%s_Fitter_GA", GetName()), fParRange, GetOptions() );
298  else if (fFitMethod == "SA")
299  fFitter = new SimulatedAnnealingFitter( *fConvergerFitter, Form("%s_Fitter_SA", GetName()), fParRange, GetOptions() );
300  else if (fFitMethod == "MINUIT")
301  fFitter = new MinuitFitter( *fConvergerFitter, Form("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );
302  else {
303  Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;
304  }
305 
306  fFitter->CheckForUnusedOptions();
307 }
308 
309 ////////////////////////////////////////////////////////////////////////////////
310 /// destructor
311 
313 {
314  ClearAll();
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////
318 /// FDA can handle classification with 2 classes and regression with one regression-target
319 
321 {
322  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
323  if (type == Types::kMulticlass ) return kTRUE;
324  if (type == Types::kRegression ) return kTRUE;
325  return kFALSE;
326 }
327 
328 
329 ////////////////////////////////////////////////////////////////////////////////
330 /// delete and clear all class members
331 
333 {
334  // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).
335  // hence, ... erase the copied pointers to assure, that they are deleted only once.
336 // fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );
337  for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
338  if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }
339  }
340  fParRange.clear();
341 
342  if (fFormula != 0) { delete fFormula; fFormula = 0; }
343  fBestPars.clear();
344 }
345 
346 ////////////////////////////////////////////////////////////////////////////////
347 /// FDA training
348 
350 {
351  // cache training events
352  fSumOfWeights = 0;
353  fSumOfWeightsSig = 0;
354  fSumOfWeightsBkg = 0;
355 
356  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
357 
358  // read the training event
359  const Event* ev = GetEvent(ievt);
360 
361  // true event copy
362  Float_t w = ev->GetWeight();
363 
364  if (!DoRegression()) {
365  if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }
366  else { fSumOfWeightsBkg += w; }
367  }
368  fSumOfWeights += w;
369  }
370 
371  // sanity check
372  if (!DoRegression()) {
373  if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
374  Log() << kFATAL << "<Train> Troubles in sum of weights: "
375  << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;
376  }
377  }
378  else if (fSumOfWeights <= 0) {
379  Log() << kFATAL << "<Train> Troubles in sum of weights: "
380  << fSumOfWeights << Endl;
381  }
382 
383  // starting values (not used by all fitters)
384  fBestPars.clear();
385  for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); parIt++) {
386  fBestPars.push_back( (*parIt)->GetMean() );
387  }
388 
389  // execute the fit
390  Double_t estimator = fFitter->Run( fBestPars );
391 
392  // print results
393  PrintResults( fFitMethod, fBestPars, estimator );
394 
395  delete fFitter; fFitter = 0;
396  if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {
397  delete fConvergerFitter;
398  fConvergerFitter = 0;
399  }
400 }
401 
402 ////////////////////////////////////////////////////////////////////////////////
403 /// display fit parameters
404 /// check maximum length of variable name
405 
406 void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const
407 {
408  Log() << kINFO;
409  Log() << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;
410  std::vector<TString> parNames;
411  for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( Form("Par(%i)",ipar ) );
412  gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );
413  Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;
414  Log() << "Value of estimator at minimum: " << estimator << Endl;
415 }
416 
417 
418 ////////////////////////////////////////////////////////////////////////////////
419 /// compute estimator for given parameter set (to be minimised)
420 /// const Double_t sumOfWeights[] = { fSumOfWeightsSig, fSumOfWeightsBkg, fSumOfWeights };
421 
422 Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )
423 {
424  const Double_t sumOfWeights[] = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };
425  Double_t estimator[] = { 0, 0, 0 };
426 
427  Double_t result, deviation;
428  Double_t desired = 0.0;
429 
430  // calculate the deviation from the desired value
431  if( DoRegression() ){
432  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
433  // read the training event
434  const TMVA::Event* ev = GetEvent(ievt);
435 
436  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
437  desired = ev->GetTarget( dim );
438  result = InterpretFormula( ev, pars.begin(), pars.end() );
439  deviation = TMath::Power(result - desired, 2);
440  estimator[2] += deviation * ev->GetWeight();
441  }
442  }
443  estimator[2] /= sumOfWeights[2];
444  // return value is sum over normalised signal and background contributions
445  return estimator[2];
446 
447  }else if( DoMulticlass() ){
448  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
449  // read the training event
450  const TMVA::Event* ev = GetEvent(ievt);
451 
452  CalculateMulticlassValues( ev, pars, *fMulticlassReturnVal );
453 
454  Double_t crossEntropy = 0.0;
455  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
456  Double_t y = fMulticlassReturnVal->at(dim);
457  Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );
458  crossEntropy += t*log(y);
459  }
460  estimator[2] += ev->GetWeight()*crossEntropy;
461  }
462  estimator[2] /= sumOfWeights[2];
463  // return value is sum over normalised signal and background contributions
464  return estimator[2];
465 
466  }else{
467  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
468  // read the training event
469  const TMVA::Event* ev = GetEvent(ievt);
470 
471  desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);
472  result = InterpretFormula( ev, pars.begin(), pars.end() );
473  deviation = TMath::Power(result - desired, 2);
474  estimator[Int_t(desired)] += deviation * ev->GetWeight();
475  }
476  estimator[0] /= sumOfWeights[0];
477  estimator[1] /= sumOfWeights[1];
478  // return value is sum over normalised signal and background contributions
479  return estimator[0] + estimator[1];
480  }
481 }
482 
483 ////////////////////////////////////////////////////////////////////////////////
484 /// formula interpretation
485 
486 Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )
487 {
488  Int_t ipar = 0;
489 // std::cout << "pars ";
490  for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
491 // std::cout << " i" << ipar << " val" << (*it);
492  fFormula->SetParameter( ipar, (*it) );
493  ++ipar;
494  }
495  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );
496 
497  Double_t result = fFormula->Eval( 0 );
498 // std::cout << " result " << result << std::endl;
499  return result;
500 }
501 
502 ////////////////////////////////////////////////////////////////////////////////
503 /// returns MVA value for given event
504 
506 {
507  const Event* ev = GetEvent();
508 
509  // cannot determine error
510  NoErrorCalc(err, errUpper);
511 
512  return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );
513 }
514 
515 ////////////////////////////////////////////////////////////////////////////////
516 
517 const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()
518 {
519  if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
520  fRegressionReturnVal->clear();
521 
522  const Event* ev = GetEvent();
523 
524  Event* evT = new Event(*ev);
525 
526  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
527  Int_t offset = dim*fNPars;
528  evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) );
529  }
530  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
531  fRegressionReturnVal->push_back(evT2->GetTarget(0));
532 
533  delete evT;
534 
535  return (*fRegressionReturnVal);
536 }
537 
538 
539 ////////////////////////////////////////////////////////////////////////////////
540 
541 const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()
542 {
543  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
544  fMulticlassReturnVal->clear();
545  std::vector<Float_t> temp;
546 
547  // returns MVA value for given event
548  const TMVA::Event* evt = GetEvent();
549 
550  CalculateMulticlassValues( evt, fBestPars, temp );
551 
552  UInt_t nClasses = DataInfo().GetNClasses();
553  for(UInt_t iClass=0; iClass<nClasses; iClass++){
554  Double_t norm = 0.0;
555  for(UInt_t j=0;j<nClasses;j++){
556  if(iClass!=j)
557  norm+=exp(temp[j]-temp[iClass]);
558  }
559  (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
560  }
561 
562  return (*fMulticlassReturnVal);
563 }
564 
565 
566 ////////////////////////////////////////////////////////////////////////////////
567 /// calculate the values for multiclass
568 
569 void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)
570 {
571  values.clear();
572 
573 // std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );
574 // std::cout << std::endl;
575 
576 // char inp;
577 // std::cin >> inp;
578 
579  Double_t sum=0;
580  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)
581  Int_t offset = dim*fNPars;
582  Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );
583 // std::cout << "dim : " << dim << " value " << value << " offset " << offset << std::endl;
584  values.push_back( value );
585  sum += value;
586  }
587 
588 // // normalize to sum of value (commented out, .. have to think of how to treat negative classifier values)
589 // std::transform( fMulticlassReturnVal.begin(), fMulticlassReturnVal.end(), fMulticlassReturnVal.begin(), bind2nd( std::divides<float>(), sum) );
590 }
591 
592 
593 
594 ////////////////////////////////////////////////////////////////////////////////
595 /// read back the training results from a file (stream)
596 
597 void TMVA::MethodFDA::ReadWeightsFromStream( std::istream& istr )
598 {
599  // retrieve best function parameters
600  // coverity[tainted_data_argument]
601  istr >> fNPars;
602 
603  fBestPars.clear();
604  fBestPars.resize( fNPars );
605  for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];
606 }
607 
608 ////////////////////////////////////////////////////////////////////////////////
609 /// create XML description for LD classification and regression
610 /// (for arbitrary number of output classes/targets)
611 
612 void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const
613 {
614  void* wght = gTools().AddChild(parent, "Weights");
615  gTools().AddAttr( wght, "NPars", fNPars );
616  gTools().AddAttr( wght, "NDim", fOutputDimensions );
617  for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
618  void* coeffxml = gTools().AddChild( wght, "Parameter" );
619  gTools().AddAttr( coeffxml, "Index", ipar );
620  gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );
621  }
622 
623  // write formula
624  gTools().AddAttr( wght, "Formula", fFormulaStringP );
625 }
626 
627 ////////////////////////////////////////////////////////////////////////////////
628 /// read coefficients from xml weight file
629 
631 {
632  gTools().ReadAttr( wghtnode, "NPars", fNPars );
633 
634  if(gTools().HasAttr( wghtnode, "NDim")) {
635  gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );
636  } else {
637  // older weight files don't have this attribute
638  fOutputDimensions = 1;
639  }
640 
641  fBestPars.clear();
642  fBestPars.resize( fNPars*fOutputDimensions );
643 
644  void* ch = gTools().GetChild(wghtnode);
645  Double_t par;
646  UInt_t ipar;
647  while (ch) {
648  gTools().ReadAttr( ch, "Index", ipar );
649  gTools().ReadAttr( ch, "Value", par );
650 
651  // sanity check
652  if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "
653  << ipar << " >= " << fNPars << Endl;
654  fBestPars[ipar] = par;
655 
656  ch = gTools().GetNextChild(ch);
657  }
658 
659  // read formula
660  gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );
661 
662  // create the TFormula
663  CreateFormula();
664 }
665 
666 ////////////////////////////////////////////////////////////////////////////////
667 /// write FDA-specific classifier response
668 
669 void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const
670 {
671  fout << " double fParameter[" << fNPars << "];" << std::endl;
672  fout << "};" << std::endl;
673  fout << "" << std::endl;
674  fout << "inline void " << className << "::Initialize() " << std::endl;
675  fout << "{" << std::endl;
676  for(UInt_t ipar=0; ipar<fNPars; ipar++) {
677  fout << " fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << std::endl;
678  }
679  fout << "}" << std::endl;
680  fout << std::endl;
681  fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
682  fout << "{" << std::endl;
683  fout << " // interpret the formula" << std::endl;
684 
685  // replace parameters
686  TString str = fFormulaStringT;
687  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
688  str.ReplaceAll( Form("[%i]", ipar), Form("fParameter[%i]", ipar) );
689  }
690 
691  // replace input variables
692  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
693  str.ReplaceAll( Form("[%i]", ivar+fNPars), Form("inputValues[%i]", ivar) );
694  }
695 
696  fout << " double retval = " << str << ";" << std::endl;
697  fout << std::endl;
698  fout << " return retval; " << std::endl;
699  fout << "}" << std::endl;
700  fout << std::endl;
701  fout << "// Clean up" << std::endl;
702  fout << "inline void " << className << "::Clear() " << std::endl;
703  fout << "{" << std::endl;
704  fout << " // nothing to clear" << std::endl;
705  fout << "}" << std::endl;
706 }
707 
708 ////////////////////////////////////////////////////////////////////////////////
709 /// get help message text
710 ///
711 /// typical length of text line:
712 /// "|--------------------------------------------------------------|"
713 
715 {
716  Log() << Endl;
717  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
718  Log() << Endl;
719  Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;
720  Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;
721  Log() << Endl;
722  Log() << "The user provides the desired function with adjustable parameters" << Endl;
723  Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;
724  Log() << "it, requiring the signal (background) function value to be as close" << Endl;
725  Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;
726  Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;
727  Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;
728  Log() << "underperform for involved problems with complicated, phase space" << Endl;
729  Log() << "dependent nonlinear correlations." << Endl;
730  Log() << Endl;
731  Log() << "Please consult the Users Guide for the format of the formula string" << Endl;
732  Log() << "and the allowed parameter ranges:" << Endl;
733  if (gConfig().WriteOptionsReference()) {
734  Log() << "<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">"
735  << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" << Endl;
736  }
737  else Log() << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" << Endl;
738  Log() << Endl;
739  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
740  Log() << Endl;
741  Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;
742  Log() << "user-defined discriminator function. As a general rule, it should" << Endl;
743  Log() << "be able to reproduce the discrimination power of any linear" << Endl;
744  Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;
745  Log() << "useful to inspect the correlation profiles of the input variables," << Endl;
746  Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;
747  Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;
748  Log() << "be used as a guide." << Endl;
749  Log() << Endl;
750  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
751  Log() << Endl;
752  Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;
753  Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;
754  Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;
755  Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;
756  Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;
757  Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;
758  Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;
759  Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;
760  Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;
761  Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;
762  Log() << "minima." << Endl;
763 }
double par[1]
Definition: unuranDistr.cxx:38
void Init(void)
default initialisation
Definition: MethodFDA.cxx:113
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:149
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:332
Ssiz_t Length() const
Definition: TString.h:390
Collectable string class.
Definition: TObjString.h:32
float Float_t
Definition: RtypesCore.h:53
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
Config & gConfig()
EAnalysisType
Definition: Types.h:124
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:486
Basic string class.
Definition: TString.h:137
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:169
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:310
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:406
const char * Data() const
Definition: TString.h:349
Tools & gTools()
Definition: Tools.cxx:79
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:505
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:320
A doubly linked list.
Definition: TList.h:47
ClassImp(TMVA::MethodFDA) TMVA
standard constructor
Definition: MethodFDA.cxx:68
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:569
The F O R M U L A class.
Definition: TFormula.h:89
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:354
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:295
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:630
void Train(void)
FDA training.
Definition: MethodFDA.cxx:349
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodFDA.cxx:226
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:669
int Ssiz_t
Definition: RtypesCore.h:63
virtual Int_t GetSize() const
Definition: TCollection.h:95
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:41
int type
Definition: TGX11.cxx:120
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
Definition: MethodFDA.cxx:422
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Double_t y[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:597
UInt_t GetClass() const
Definition: Event.h:86
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:517
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:541
double result[121]
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:714
double exp(double)
const Bool_t kTRUE
Definition: Rtypes.h:91
float value
Definition: math.cpp:443
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:312
Definition: math.cpp:60
double log(double)
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:466
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:612