ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodFDA.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodFDA *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * *
19  * Copyright (c) 2005-2006: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 //_______________________________________________________________________
29 //
30 // Function discriminant analysis (FDA). This simple classifier //
31 // fits any user-defined TFormula (via option configuration string) to //
32 // the training data by requiring a formula response of 1 (0) to signal //
33 // (background) events. The parameter fitting is done via the abstract //
34 // class FitterBase, featuring Monte Carlo sampling, Genetic //
35 // Algorithm, Simulated Annealing, MINUIT and combinations of these. //
36 // //
37 // Can compute regression value for one dimensional output //
38 //_______________________________________________________________________
39 
40 #include "TMVA/MethodFDA.h"
41 
42 #include "Riostream.h"
43 #include "TList.h"
44 #include "TFormula.h"
45 #include "TString.h"
46 #include "TObjString.h"
47 #include "TRandom3.h"
48 #include "TMath.h"
49 #include <sstream>
50 
51 #include <algorithm>
52 #include <iterator>
53 #include <stdexcept>
54 
55 #include "TMVA/ClassifierFactory.h"
56 #include "TMVA/Config.h"
57 #include "TMVA/Configurable.h"
58 #include "TMVA/DataSetInfo.h"
59 #include "TMVA/FitterBase.h"
60 #include "TMVA/GeneticFitter.h"
61 #include "TMVA/Interval.h"
62 #include "TMVA/IFitterTarget.h"
63 #include "TMVA/MCFitter.h"
64 #include "TMVA/MinuitFitter.h"
65 #include "TMVA/MsgLogger.h"
66 #include "TMVA/Timer.h"
67 #include "TMVA/Tools.h"
69 #include "TMVA/Types.h"
71 
72 using std::stringstream;
73 
74 REGISTER_METHOD(FDA)
75 
76 ClassImp(TMVA::MethodFDA)
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// standard constructor
80 
81 TMVA::MethodFDA::MethodFDA( const TString& jobName,
82  const TString& methodTitle,
83  DataSetInfo& theData,
84  const TString& theOption,
85  TDirectory* theTargetDir )
86  : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption, theTargetDir ),
87  IFitterTarget (),
88  fFormula ( 0 ),
89  fNPars ( 0 ),
90  fFitter ( 0 ),
91  fConvergerFitter( 0 ),
92  fSumOfWeightsSig( 0 ),
93  fSumOfWeightsBkg( 0 ),
94  fSumOfWeights ( 0 ),
95  fOutputDimensions( 0 )
96 {
97 }
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// constructor from weight file
101 
103  const TString& theWeightFile,
104  TDirectory* theTargetDir )
105  : MethodBase( Types::kFDA, theData, theWeightFile, theTargetDir ),
106  IFitterTarget (),
107  fFormula ( 0 ),
108  fNPars ( 0 ),
109  fFitter ( 0 ),
110  fConvergerFitter( 0 ),
111  fSumOfWeightsSig( 0 ),
112  fSumOfWeightsBkg( 0 ),
113  fSumOfWeights ( 0 ),
114  fOutputDimensions( 0 )
115 {
116 }
117 
118 ////////////////////////////////////////////////////////////////////////////////
119 /// default initialisation
120 
122 {
123  fNPars = 0;
124 
125  fBestPars.clear();
126 
127  fSumOfWeights = 0;
128  fSumOfWeightsSig = 0;
129  fSumOfWeightsBkg = 0;
130 
131  fFormulaStringP = "";
132  fParRangeStringP = "";
133  fFormulaStringT = "";
134  fParRangeStringT = "";
135 
136  fFitMethod = "";
137  fConverger = "";
138 
139  if( DoMulticlass() )
140  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
141 
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// define the options (their key words) that can be set in the option string
146 ///
147 /// format of function string:
148 /// "x0*(0)+((1)/x1)**(2)..."
149 /// where "[i]" are the parameters, and "xi" the input variables
150 ///
151 /// format of parameter string:
152 /// "(-1.2,3.4);(-2.3,4.55);..."
153 /// where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;
154 /// each parameter defined in the function string must have a corresponding range
155 ///
156 
158 {
159  DeclareOptionRef( fFormulaStringP = "(0)", "Formula", "The discrimination formula" );
160  DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );
161 
162  // fitter
163  DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");
164  AddPreDefVal(TString("MC"));
165  AddPreDefVal(TString("GA"));
166  AddPreDefVal(TString("SA"));
167  AddPreDefVal(TString("MINUIT"));
168 
169  DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");
170  AddPreDefVal(TString("None"));
171  AddPreDefVal(TString("MINUIT"));
172 }
173 
174 ////////////////////////////////////////////////////////////////////////////////
175 /// translate formula string into TFormula, and parameter string into par ranges
176 
178 {
179  // process transient strings
180  fFormulaStringT = fFormulaStringP;
181 
182  // intepret formula string
183 
184  // replace the parameters "(i)" by the TFormula style "[i]"
185  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
186  fFormulaStringT.ReplaceAll( Form("(%i)",ipar), Form("[%i]",ipar) );
187  }
188 
189  // sanity check, there should be no "(i)", with 'i' a number anymore
190  for (Int_t ipar=fNPars; ipar<1000; ipar++) {
191  if (fFormulaStringT.Contains( Form("(%i)",ipar) ))
192  Log() << kFATAL
193  << "<CreateFormula> Formula contains expression: \"" << Form("(%i)",ipar) << "\", "
194  << "which cannot be attributed to a parameter; "
195  << "it may be that the number of variable ranges given via \"ParRanges\" "
196  << "does not match the number of parameters in the formula expression, please verify!"
197  << Endl;
198  }
199 
200  // write the variables "xi" as additional parameters "[npar+i]"
201  for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {
202  fFormulaStringT.ReplaceAll( Form("x%i",ivar), Form("[%i]",ivar+fNPars) );
203  }
204 
205  // sanity check, there should be no "xi", with 'i' a number anymore
206  for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {
207  if (fFormulaStringT.Contains( Form("x%i",ivar) ))
208  Log() << kFATAL
209  << "<CreateFormula> Formula contains expression: \"" << Form("x%i",ivar) << "\", "
210  << "which cannot be attributed to an input variable" << Endl;
211  }
212 
213  Log() << "User-defined formula string : \"" << fFormulaStringP << "\"" << Endl;
214  Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;
215  Log() << "Creating and compiling formula" << Endl;
216 
217  // create TF1
218  if (fFormula) delete fFormula;
219  fFormula = new TFormula( "FDA_Formula", fFormulaStringT );
220 
221  // is formula correct ?
222  if (!fFormula->IsValid())
223  Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;
224 
225  // other sanity checks
226  if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))
227  Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "
228  << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;
229 }
230 
231 ////////////////////////////////////////////////////////////////////////////////
232 /// the option string is decoded, for availabel options see "DeclareOptions"
233 
235 {
236  // process transient strings
237  fParRangeStringT = fParRangeStringP;
238 
239  // interpret parameter string
240  fParRangeStringT.ReplaceAll( " ", "" );
241  fNPars = fParRangeStringT.CountChar( ')' );
242 
243  TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );
244  if ((UInt_t)parList->GetSize() != fNPars) {
245  Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "
246  << "the number of parameters: " << fNPars << " != ranges defined: "
247  << parList->GetSize() << "; the format of the \"ParRanges\" string "
248  << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
249  << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
250  << "each parameter defined in the function string must have a corresponding rang."
251  << Endl;
252  }
253 
254  fParRange.resize( fNPars );
255  for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
256 
257  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
258  // parse (a,b)
259  TString str = ((TObjString*)parList->At(ipar))->GetString();
260  Ssiz_t istr = str.First( ',' );
261  TString pminS(str(1,istr-1));
262  TString pmaxS(str(istr+1,str.Length()-2-istr));
263 
264  stringstream stmin; Float_t pmin=0; stmin << pminS.Data(); stmin >> pmin;
265  stringstream stmax; Float_t pmax=0; stmax << pmaxS.Data(); stmax >> pmax;
266 
267  // sanity check
268  if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
269  if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["
270  << ipar << "] : [" << pmin << ", " << pmax << "] " << Endl;
271 
272  Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;
273  fParRange[ipar] = new Interval( pmin, pmax );
274  }
275  delete parList;
276 
277  // create formula
278  CreateFormula();
279 
280 
281  // copy parameter ranges for each output dimension ==================
282  fOutputDimensions = 1;
283  if( DoRegression() )
284  fOutputDimensions = DataInfo().GetNTargets();
285  if( DoMulticlass() )
286  fOutputDimensions = DataInfo().GetNClasses();
287 
288  for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){
289  for( UInt_t par = 0; par < fNPars; ++par ){
290  fParRange.push_back( fParRange.at(par) );
291  }
292  }
293  // ====================
294 
295  // create minimiser
296  fConvergerFitter = (IFitterTarget*)this;
297  if (fConverger == "MINUIT") {
298  fConvergerFitter = new MinuitFitter( *this, Form("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );
299  SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());
300  }
301 
302  if(fFitMethod == "MC")
303  fFitter = new MCFitter( *fConvergerFitter, Form("%s_Fitter_MC", GetName()), fParRange, GetOptions() );
304  else if (fFitMethod == "GA")
305  fFitter = new GeneticFitter( *fConvergerFitter, Form("%s_Fitter_GA", GetName()), fParRange, GetOptions() );
306  else if (fFitMethod == "SA")
307  fFitter = new SimulatedAnnealingFitter( *fConvergerFitter, Form("%s_Fitter_SA", GetName()), fParRange, GetOptions() );
308  else if (fFitMethod == "MINUIT")
309  fFitter = new MinuitFitter( *fConvergerFitter, Form("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );
310  else {
311  Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;
312  }
313 
314  fFitter->CheckForUnusedOptions();
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////
318 /// destructor
319 
321 {
322  ClearAll();
323 }
324 
325 ////////////////////////////////////////////////////////////////////////////////
326 /// FDA can handle classification with 2 classes and regression with one regression-target
327 
329 {
330  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
331  if (type == Types::kMulticlass ) return kTRUE;
332  if (type == Types::kRegression ) return kTRUE;
333  return kFALSE;
334 }
335 
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 /// delete and clear all class members
339 
341 {
342  // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).
343  // hence, ... erase the copied pointers to assure, that they are deleted only once.
344 // fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );
345  for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
346  if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }
347  }
348  fParRange.clear();
349 
350  if (fFormula != 0) { delete fFormula; fFormula = 0; }
351  fBestPars.clear();
352 }
353 
354 ////////////////////////////////////////////////////////////////////////////////
355 /// FDA training
356 
358 {
359  // cache training events
360  fSumOfWeights = 0;
361  fSumOfWeightsSig = 0;
362  fSumOfWeightsBkg = 0;
363 
364  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
365 
366  // read the training event
367  const Event* ev = GetEvent(ievt);
368 
369  // true event copy
370  Float_t w = ev->GetWeight();
371 
372  if (!DoRegression()) {
373  if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }
374  else { fSumOfWeightsBkg += w; }
375  }
376  fSumOfWeights += w;
377  }
378 
379  // sanity check
380  if (!DoRegression()) {
381  if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
382  Log() << kFATAL << "<Train> Troubles in sum of weights: "
383  << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;
384  }
385  }
386  else if (fSumOfWeights <= 0) {
387  Log() << kFATAL << "<Train> Troubles in sum of weights: "
388  << fSumOfWeights << Endl;
389  }
390 
391  // starting values (not used by all fitters)
392  fBestPars.clear();
393  for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); parIt++) {
394  fBestPars.push_back( (*parIt)->GetMean() );
395  }
396 
397  // execute the fit
398  Double_t estimator = fFitter->Run( fBestPars );
399 
400  // print results
401  PrintResults( fFitMethod, fBestPars, estimator );
402 
403  delete fFitter; fFitter = 0;
404  if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {
405  delete fConvergerFitter;
406  fConvergerFitter = 0;
407  }
408 }
409 
410 ////////////////////////////////////////////////////////////////////////////////
411 /// display fit parameters
412 /// check maximum length of variable name
413 
414 void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const
415 {
416  Log() << kINFO;
417  Log() << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;
418  std::vector<TString> parNames;
419  for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( Form("Par(%i)",ipar ) );
420  gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );
421  Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;
422  Log() << "Value of estimator at minimum: " << estimator << Endl;
423 }
424 
425 
426 ////////////////////////////////////////////////////////////////////////////////
427 /// compute estimator for given parameter set (to be minimised)
428 /// const Double_t sumOfWeights[] = { fSumOfWeightsSig, fSumOfWeightsBkg, fSumOfWeights };
429 
430 Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )
431 {
432  const Double_t sumOfWeights[] = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };
433  Double_t estimator[] = { 0, 0, 0 };
434 
435  Double_t result, deviation;
436  Double_t desired = 0.0;
437 
438  // calculate the deviation from the desired value
439  if( DoRegression() ){
440  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
441  // read the training event
442  const TMVA::Event* ev = GetEvent(ievt);
443 
444  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
445  desired = ev->GetTarget( dim );
446  result = InterpretFormula( ev, pars.begin(), pars.end() );
447  deviation = TMath::Power(result - desired, 2);
448  estimator[2] += deviation * ev->GetWeight();
449  }
450  }
451  estimator[2] /= sumOfWeights[2];
452  // return value is sum over normalised signal and background contributions
453  return estimator[2];
454 
455  }else if( DoMulticlass() ){
456  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
457  // read the training event
458  const TMVA::Event* ev = GetEvent(ievt);
459 
460  CalculateMulticlassValues( ev, pars, *fMulticlassReturnVal );
461 
462  Double_t crossEntropy = 0.0;
463  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
464  Double_t y = fMulticlassReturnVal->at(dim);
465  Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );
466  crossEntropy += t*log(y);
467  }
468  estimator[2] += ev->GetWeight()*crossEntropy;
469  }
470  estimator[2] /= sumOfWeights[2];
471  // return value is sum over normalised signal and background contributions
472  return estimator[2];
473 
474  }else{
475  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
476  // read the training event
477  const TMVA::Event* ev = GetEvent(ievt);
478 
479  desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);
480  result = InterpretFormula( ev, pars.begin(), pars.end() );
481  deviation = TMath::Power(result - desired, 2);
482  estimator[Int_t(desired)] += deviation * ev->GetWeight();
483  }
484  estimator[0] /= sumOfWeights[0];
485  estimator[1] /= sumOfWeights[1];
486  // return value is sum over normalised signal and background contributions
487  return estimator[0] + estimator[1];
488  }
489 }
490 
491 ////////////////////////////////////////////////////////////////////////////////
492 /// formula interpretation
493 
494 Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )
495 {
496  Int_t ipar = 0;
497 // std::cout << "pars ";
498  for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
499 // std::cout << " i" << ipar << " val" << (*it);
500  fFormula->SetParameter( ipar, (*it) );
501  ++ipar;
502  }
503  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );
504 
505  Double_t result = fFormula->Eval( 0 );
506 // std::cout << " result " << result << std::endl;
507  return result;
508 }
509 
510 ////////////////////////////////////////////////////////////////////////////////
511 /// returns MVA value for given event
512 
514 {
515  const Event* ev = GetEvent();
516 
517  // cannot determine error
518  NoErrorCalc(err, errUpper);
519 
520  return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );
521 }
522 
523 ////////////////////////////////////////////////////////////////////////////////
524 
525 const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()
526 {
527  if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
528  fRegressionReturnVal->clear();
529 
530  const Event* ev = GetEvent();
531 
532  Event* evT = new Event(*ev);
533 
534  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
535  Int_t offset = dim*fNPars;
536  evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) );
537  }
538  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
539  fRegressionReturnVal->push_back(evT2->GetTarget(0));
540 
541  delete evT;
542 
543  return (*fRegressionReturnVal);
544 }
545 
546 
547 ////////////////////////////////////////////////////////////////////////////////
548 
549 const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()
550 {
551  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
552  fMulticlassReturnVal->clear();
553  std::vector<Float_t> temp;
554 
555  // returns MVA value for given event
556  const TMVA::Event* evt = GetEvent();
557 
558  CalculateMulticlassValues( evt, fBestPars, temp );
559 
560  UInt_t nClasses = DataInfo().GetNClasses();
561  for(UInt_t iClass=0; iClass<nClasses; iClass++){
562  Double_t norm = 0.0;
563  for(UInt_t j=0;j<nClasses;j++){
564  if(iClass!=j)
565  norm+=exp(temp[j]-temp[iClass]);
566  }
567  (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
568  }
569 
570  return (*fMulticlassReturnVal);
571 }
572 
573 
574 ////////////////////////////////////////////////////////////////////////////////
575 /// calculate the values for multiclass
576 
577 void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)
578 {
579  values.clear();
580 
581 // std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );
582 // std::cout << std::endl;
583 
584 // char inp;
585 // std::cin >> inp;
586 
587  Double_t sum=0;
588  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)
589  Int_t offset = dim*fNPars;
590  Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );
591 // std::cout << "dim : " << dim << " value " << value << " offset " << offset << std::endl;
592  values.push_back( value );
593  sum += value;
594  }
595 
596 // // normalize to sum of value (commented out, .. have to think of how to treat negative classifier values)
597 // std::transform( fMulticlassReturnVal.begin(), fMulticlassReturnVal.end(), fMulticlassReturnVal.begin(), bind2nd( std::divides<float>(), sum) );
598 }
599 
600 
601 
602 ////////////////////////////////////////////////////////////////////////////////
603 /// read back the training results from a file (stream)
604 
605 void TMVA::MethodFDA::ReadWeightsFromStream( std::istream& istr )
606 {
607  // retrieve best function parameters
608  // coverity[tainted_data_argument]
609  istr >> fNPars;
610 
611  fBestPars.clear();
612  fBestPars.resize( fNPars );
613  for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];
614 }
615 
616 ////////////////////////////////////////////////////////////////////////////////
617 /// create XML description for LD classification and regression
618 /// (for arbitrary number of output classes/targets)
619 
620 void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const
621 {
622  void* wght = gTools().AddChild(parent, "Weights");
623  gTools().AddAttr( wght, "NPars", fNPars );
624  gTools().AddAttr( wght, "NDim", fOutputDimensions );
625  for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
626  void* coeffxml = gTools().AddChild( wght, "Parameter" );
627  gTools().AddAttr( coeffxml, "Index", ipar );
628  gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );
629  }
630 
631  // write formula
632  gTools().AddAttr( wght, "Formula", fFormulaStringP );
633 }
634 
635 ////////////////////////////////////////////////////////////////////////////////
636 /// read coefficients from xml weight file
637 
639 {
640  gTools().ReadAttr( wghtnode, "NPars", fNPars );
641 
642  if(gTools().HasAttr( wghtnode, "NDim")) {
643  gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );
644  } else {
645  // older weight files don't have this attribute
646  fOutputDimensions = 1;
647  }
648 
649  fBestPars.clear();
650  fBestPars.resize( fNPars*fOutputDimensions );
651 
652  void* ch = gTools().GetChild(wghtnode);
653  Double_t par;
654  UInt_t ipar;
655  while (ch) {
656  gTools().ReadAttr( ch, "Index", ipar );
657  gTools().ReadAttr( ch, "Value", par );
658 
659  // sanity check
660  if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "
661  << ipar << " >= " << fNPars << Endl;
662  fBestPars[ipar] = par;
663 
664  ch = gTools().GetNextChild(ch);
665  }
666 
667  // read formula
668  gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );
669 
670  // create the TFormula
671  CreateFormula();
672 }
673 
674 ////////////////////////////////////////////////////////////////////////////////
675 /// write FDA-specific classifier response
676 
677 void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const
678 {
679  fout << " double fParameter[" << fNPars << "];" << std::endl;
680  fout << "};" << std::endl;
681  fout << "" << std::endl;
682  fout << "inline void " << className << "::Initialize() " << std::endl;
683  fout << "{" << std::endl;
684  for(UInt_t ipar=0; ipar<fNPars; ipar++) {
685  fout << " fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << std::endl;
686  }
687  fout << "}" << std::endl;
688  fout << std::endl;
689  fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
690  fout << "{" << std::endl;
691  fout << " // interpret the formula" << std::endl;
692 
693  // replace parameters
694  TString str = fFormulaStringT;
695  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
696  str.ReplaceAll( Form("[%i]", ipar), Form("fParameter[%i]", ipar) );
697  }
698 
699  // replace input variables
700  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
701  str.ReplaceAll( Form("[%i]", ivar+fNPars), Form("inputValues[%i]", ivar) );
702  }
703 
704  fout << " double retval = " << str << ";" << std::endl;
705  fout << std::endl;
706  fout << " return retval; " << std::endl;
707  fout << "}" << std::endl;
708  fout << std::endl;
709  fout << "// Clean up" << std::endl;
710  fout << "inline void " << className << "::Clear() " << std::endl;
711  fout << "{" << std::endl;
712  fout << " // nothing to clear" << std::endl;
713  fout << "}" << std::endl;
714 }
715 
716 ////////////////////////////////////////////////////////////////////////////////
717 /// get help message text
718 ///
719 /// typical length of text line:
720 /// "|--------------------------------------------------------------|"
721 
723 {
724  Log() << Endl;
725  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
726  Log() << Endl;
727  Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;
728  Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;
729  Log() << Endl;
730  Log() << "The user provides the desired function with adjustable parameters" << Endl;
731  Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;
732  Log() << "it, requiring the signal (background) function value to be as close" << Endl;
733  Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;
734  Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;
735  Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;
736  Log() << "underperform for involved problems with complicated, phase space" << Endl;
737  Log() << "dependent nonlinear correlations." << Endl;
738  Log() << Endl;
739  Log() << "Please consult the Users Guide for the format of the formula string" << Endl;
740  Log() << "and the allowed parameter ranges:" << Endl;
741  if (gConfig().WriteOptionsReference()) {
742  Log() << "<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">"
743  << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" << Endl;
744  }
745  else Log() << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" << Endl;
746  Log() << Endl;
747  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
748  Log() << Endl;
749  Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;
750  Log() << "user-defined discriminator function. As a general rule, it should" << Endl;
751  Log() << "be able to reproduce the discrimination power of any linear" << Endl;
752  Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;
753  Log() << "useful to inspect the correlation profiles of the input variables," << Endl;
754  Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;
755  Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;
756  Log() << "be used as a guide." << Endl;
757  Log() << Endl;
758  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
759  Log() << Endl;
760  Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;
761  Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;
762  Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;
763  Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;
764  Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;
765  Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;
766  Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;
767  Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;
768  Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;
769  Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;
770  Log() << "minima." << Endl;
771 }
double par[1]
Definition: unuranDistr.cxx:38
void Init(void)
default initialisation
Definition: MethodFDA.cxx:121
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:157
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:340
Ssiz_t Length() const
Definition: TString.h:390
Collectable string class.
Definition: TObjString.h:32
float Float_t
Definition: RtypesCore.h:53
tuple offset
Definition: tree.py:93
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
Config & gConfig()
std::vector< double > values
Definition: TwoHistoFit2D.C:32
EAnalysisType
Definition: Types.h:124
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:494
Basic string class.
Definition: TString.h:137
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:177
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:354
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:414
const char * Data() const
Definition: TString.h:349
Tools & gTools()
Definition: Tools.cxx:79
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:513
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:328
A doubly linked list.
Definition: TList.h:47
ClassImp(TMVA::MethodFDA) TMVA
standard constructor
Definition: MethodFDA.cxx:76
TThread * t[5]
Definition: threadsh1.C:13
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:577
The F O R M U L A class.
Definition: TFormula.h:89
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
tuple w
Definition: qtexample.py:51
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:295
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:638
void Train(void)
FDA training.
Definition: MethodFDA.cxx:357
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodFDA.cxx:234
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:677
int Ssiz_t
Definition: RtypesCore.h:63
virtual Int_t GetSize() const
Definition: TCollection.h:95
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
int type
Definition: TGX11.cxx:120
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
Definition: MethodFDA.cxx:430
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:231
Double_t y[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:605
UInt_t GetClass() const
Definition: Event.h:86
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
#define REGISTER_METHOD(CLASS)
for example
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:525
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:549
double result[121]
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:722
double exp(double)
const Bool_t kTRUE
Definition: Rtypes.h:91
float value
Definition: math.cpp:443
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:320
Definition: math.cpp:60
double log(double)
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:453
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:620