Logo ROOT   6.07/09
Reference Guide
MethodFisher.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Xavier Prudent, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : MethodFisher *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Original author of this Fisher-Discriminant implementation: *
14  * Andre Gaidot, CEA-France; *
15  * (Translation from FORTRAN) *
16  * *
17  * Authors (alphabetical): *
18  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
19  * Xavier Prudent <prudent@lapp.in2p3.fr> - LAPP, France *
20  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
21  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
22  * *
23  * Copyright (c) 2005: *
24  * CERN, Switzerland *
25  * U. of Victoria, Canada *
26  * MPI-K Heidelberg, Germany *
27  * LAPP, Annecy, France *
28  * *
29  * Redistribution and use in source and binary forms, with or without *
30  * modification, are permitted according to the terms listed in LICENSE *
31  * (http://tmva.sourceforge.net/LICENSE) *
32  **********************************************************************************/
33 
34 ////////////////////////////////////////////////////////////////////////////////
35 
36 /* Begin_Html
37  Fisher and Mahalanobis Discriminants (Linear Discriminant Analysis)
38 
39  <p>
40  In the method of Fisher discriminants event selection is performed
41  in a transformed variable space with zero linear correlations, by
42  distinguishing the mean values of the signal and background
43  distributions.<br></p>
44 
45  <p>
46  The linear discriminant analysis determines an axis in the (correlated)
47  hyperspace of the input variables
48  such that, when projecting the output classes (signal and background)
49  upon this axis, they are pushed as far as possible away from each other,
50  while events of a same class are confined in a close vicinity.
51  The linearity property of this method is reflected in the metric with
52  which "far apart" and "close vicinity" are determined: the covariance
53  matrix of the discriminant variable space.
54  </p>
55 
56  <p>
57  The classification of the events in signal and background classes
58  relies on the following characteristics (only): overall sample means,
59  <i><my:o>x</my:o><sub>i</sub></i>, for each input variable, <i>i</i>,
60  class-specific sample means, <i><my:o>x</my:o><sub>S(B),i</sub></i>,
61  and total covariance matrix <i>T<sub>ij</sub></i>. The covariance matrix
62  can be decomposed into the sum of a <i>within-</i> (<i>W<sub>ij</sub></i>)
63  and a <i>between-class</i> (<i>B<sub>ij</sub></i>) class matrix. They describe
64  the dispersion of events relative to the means of their own class (within-class
65  matrix), and relative to the overall sample means (between-class matrix).
66  The Fisher coefficients, <i>F<sub>i</sub></i>, are then given by <br>
67  <center>
68  <img vspace=6 src="gif/tmva_fisherC.gif" align="bottom" >
69  </center>
70  where in TMVA is set <i>N<sub>S</sub>=N<sub>B</sub></i>, so that the factor
71  in front of the sum simplifies to &frac12;.
72  The Fisher discriminant then reads<br>
73  <center>
74  <img vspace=6 src="gif/tmva_fisherD.gif" align="bottom" >
75  </center>
76  The offset <i>F</i><sub>0</sub> centers the sample mean of <i>x</i><sub>Fi</sub>
77  at zero. Instead of using the within-class matrix, the Mahalanobis variant
78  determines the Fisher coefficients as follows:<br>
79  <center>
80  <img vspace=6 src="gif/tmva_mahaC.gif" align="bottom" >
81  </center>
82  with resulting <i>x</i><sub>Ma</sub> that are very similar to the
83  <i>x</i><sub>Fi</sub>. <br></p>
84 
85  TMVA provides two outputs for the ranking of the input variables:<br><p></p>
86  <ul>
87  <li> <u>Fisher test:</u> the Fisher analysis aims at simultaneously maximising
88  the between-class separation, while minimising the within-class dispersion.
89  A useful measure of the discrimination power of a variable is hence given
90  by the diagonal quantity: <i>B<sub>ii</sub>/W<sub>ii</sub></i>.
91  </li>
92 
93  <li> <u>Discrimination power:</u> the value of the Fisher coefficient is a
94  measure of the discriminating power of a variable. The discrimination power
95  of set of input variables can therefore be measured by the scalar
96  <center>
97  <img vspace=6 src="gif/tmva_discpower.gif" align="bottom" >
98  </center>
99  </li>
100  </ul>
101  The corresponding numbers are printed on standard output.
102  End_Html */
103 //_______________________________________________________________________
104 
105 #include "TMVA/MethodFisher.h"
106 
107 #include "TMVA/ClassifierFactory.h"
108 #include "TMVA/Configurable.h"
109 #include "TMVA/DataSet.h"
110 #include "TMVA/DataSetInfo.h"
111 #include "TMVA/Event.h"
112 #include "TMVA/IMethod.h"
113 #include "TMVA/MethodBase.h"
114 #include "TMVA/MsgLogger.h"
115 #include "TMVA/Ranking.h"
116 #include "TMVA/Tools.h"
118 #include "TMVA/Types.h"
120 
121 #include "TMath.h"
122 #include "TMatrix.h"
123 #include "TList.h"
124 #include "Riostream.h"
125 
126 #include <iomanip>
127 #include <cassert>
128 
129 REGISTER_METHOD(Fisher)
130 
131 ClassImp(TMVA::MethodFisher);
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 /// standard constructor for the "Fisher"
135 
136 TMVA::MethodFisher::MethodFisher( const TString& jobName,
137  const TString& methodTitle,
138  DataSetInfo& dsi,
139  const TString& theOption ) :
140  MethodBase( jobName, Types::kFisher, methodTitle, dsi, theOption),
141  fMeanMatx ( 0 ),
142  fTheMethod ( "Fisher" ),
143  fFisherMethod ( kFisher ),
144  fBetw ( 0 ),
145  fWith ( 0 ),
146  fCov ( 0 ),
147  fSumOfWeightsS( 0 ),
148  fSumOfWeightsB( 0 ),
149  fDiscrimPow ( 0 ),
150  fFisherCoeff ( 0 ),
151  fF0 ( 0 )
152 {
153 }
154 
155 ////////////////////////////////////////////////////////////////////////////////
156 /// constructor from weight file
157 
159  const TString& theWeightFile) :
160  MethodBase( Types::kFisher, dsi, theWeightFile),
161  fMeanMatx ( 0 ),
162  fTheMethod ( "Fisher" ),
163  fFisherMethod ( kFisher ),
164  fBetw ( 0 ),
165  fWith ( 0 ),
166  fCov ( 0 ),
167  fSumOfWeightsS( 0 ),
168  fSumOfWeightsB( 0 ),
169  fDiscrimPow ( 0 ),
170  fFisherCoeff ( 0 ),
171  fF0 ( 0 )
172 {
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// default initialization called by all constructors
177 
179 {
180  // allocate Fisher coefficients
181  fFisherCoeff = new std::vector<Double_t>( GetNvar() );
182 
183  // the minimum requirement to declare an event signal-like
184  SetSignalReferenceCut( 0.0 );
185 
186  // this is the preparation for training
187  InitMatrices();
188 }
189 
190 ////////////////////////////////////////////////////////////////////////////////
191 ///
192 /// MethodFisher options:
193 /// format and syntax of option string: "type"
194 /// where type is "Fisher" or "Mahalanobis"
195 ///
196 
198 {
199  DeclareOptionRef( fTheMethod = "Fisher", "Method", "Discrimination method" );
200  AddPreDefVal(TString("Fisher"));
201  AddPreDefVal(TString("Mahalanobis"));
202 }
203 
204 ////////////////////////////////////////////////////////////////////////////////
205 /// process user options
206 
208 {
209  if (fTheMethod == "Fisher" ) fFisherMethod = kFisher;
211 
212  // this is the preparation for training
213  InitMatrices();
214 }
215 
216 ////////////////////////////////////////////////////////////////////////////////
217 /// destructor
218 
220 {
221  if (fBetw ) { delete fBetw; fBetw = 0; }
222  if (fWith ) { delete fWith; fWith = 0; }
223  if (fCov ) { delete fCov; fCov = 0; }
224  if (fDiscrimPow ) { delete fDiscrimPow; fDiscrimPow = 0; }
225  if (fFisherCoeff) { delete fFisherCoeff; fFisherCoeff = 0; }
226 }
227 
228 ////////////////////////////////////////////////////////////////////////////////
229 /// Fisher can only handle classification with 2 classes
230 
232 {
233  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
234  return kFALSE;
235 }
236 
237 ////////////////////////////////////////////////////////////////////////////////
238 /// computation of Fisher coefficients by series of matrix operations
239 
241 {
242  // get mean value of each variables for signal, backgd and signal+backgd
243  GetMean();
244 
245  // get the matrix of covariance 'within class'
247 
248  // get the matrix of covariance 'between class'
250 
251  // get the matrix of covariance 'between class'
252  GetCov_Full();
253 
254  //--------------------------------------------------------------
255 
256  // get the Fisher coefficients
257  GetFisherCoeff();
258 
259  // get the discriminating power of each variables
260  GetDiscrimPower();
261 
262  // nice output
264 
266 }
267 
268 ////////////////////////////////////////////////////////////////////////////////
269 /// returns the Fisher value (no fixed range)
270 
272 {
273  const Event * ev = GetEvent();
274  Double_t result = fF0;
275  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
276  result += (*fFisherCoeff)[ivar]*ev->GetValue(ivar);
277 
278  // cannot determine error
279  NoErrorCalc(err, errUpper);
280 
281  return result;
282 
283 }
284 
285 ////////////////////////////////////////////////////////////////////////////////
286 /// initializaton method; creates global matrices and vectors
287 
289 {
290  // average value of each variables for S, B, S+B
291  fMeanMatx = new TMatrixD( GetNvar(), 3 );
292 
293  // the covariance 'within class' and 'between class' matrices
294  fBetw = new TMatrixD( GetNvar(), GetNvar() );
295  fWith = new TMatrixD( GetNvar(), GetNvar() );
296  fCov = new TMatrixD( GetNvar(), GetNvar() );
297 
298  // discriminating power
299  fDiscrimPow = new std::vector<Double_t>( GetNvar() );
300 }
301 
302 ////////////////////////////////////////////////////////////////////////////////
303 /// compute mean values of variables in each sample, and the overall means
304 
306 {
307  // initialize internal sum-of-weights variables
308  fSumOfWeightsS = 0;
309  fSumOfWeightsB = 0;
310 
311  const UInt_t nvar = DataInfo().GetNVariables();
312 
313  // init vectors
314  Double_t* sumS = new Double_t[nvar];
315  Double_t* sumB = new Double_t[nvar];
316  for (UInt_t ivar=0; ivar<nvar; ivar++) { sumS[ivar] = sumB[ivar] = 0; }
317 
318  // compute sample means
319  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
320 
321  // read the Training Event into "event"
322  const Event * ev = GetEvent(ievt);
323 
324  // sum of weights
325  Double_t weight = ev->GetWeight();
326  if (DataInfo().IsSignal(ev)) fSumOfWeightsS += weight;
327  else fSumOfWeightsB += weight;
328 
329  Double_t* sum = DataInfo().IsSignal(ev) ? sumS : sumB;
330 
331  for (UInt_t ivar=0; ivar<nvar; ivar++) sum[ivar] += ev->GetValue( ivar )*weight;
332  }
333 
334  for (UInt_t ivar=0; ivar<nvar; ivar++) {
335  (*fMeanMatx)( ivar, 2 ) = sumS[ivar];
336  (*fMeanMatx)( ivar, 0 ) = sumS[ivar]/fSumOfWeightsS;
337 
338  (*fMeanMatx)( ivar, 2 ) += sumB[ivar];
339  (*fMeanMatx)( ivar, 1 ) = sumB[ivar]/fSumOfWeightsB;
340 
341  // signal + background
342  (*fMeanMatx)( ivar, 2 ) /= (fSumOfWeightsS + fSumOfWeightsB);
343  }
344 
345  // fMeanMatx->Print();
346  delete [] sumS;
347  delete [] sumB;
348 }
349 
350 ////////////////////////////////////////////////////////////////////////////////
351 /// the matrix of covariance 'within class' reflects the dispersion of the
352 /// events relative to the center of gravity of their own class
353 
355 {
356  // assert required
357  assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0 );
358 
359  // product matrices (x-<x>)(y-<y>) where x;y are variables
360 
361  // init
362  const Int_t nvar = GetNvar();
363  const Int_t nvar2 = nvar*nvar;
364  Double_t *sumSig = new Double_t[nvar2];
365  Double_t *sumBgd = new Double_t[nvar2];
366  Double_t *xval = new Double_t[nvar];
367  memset(sumSig,0,nvar2*sizeof(Double_t));
368  memset(sumBgd,0,nvar2*sizeof(Double_t));
369 
370  // 'within class' covariance
371  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
372 
373  // read the Training Event into "event"
374  const Event* ev = GetEvent(ievt);
375 
376  Double_t weight = ev->GetWeight(); // may ignore events with negative weights
377 
378  for (Int_t x=0; x<nvar; x++) xval[x] = ev->GetValue( x );
379  Int_t k=0;
380  for (Int_t x=0; x<nvar; x++) {
381  for (Int_t y=0; y<nvar; y++) {
382  if (DataInfo().IsSignal(ev)) {
383  Double_t v = ( (xval[x] - (*fMeanMatx)(x, 0))*(xval[y] - (*fMeanMatx)(y, 0)) )*weight;
384  sumSig[k] += v;
385  }else{
386  Double_t v = ( (xval[x] - (*fMeanMatx)(x, 1))*(xval[y] - (*fMeanMatx)(y, 1)) )*weight;
387  sumBgd[k] += v;
388  }
389  k++;
390  }
391  }
392  }
393  Int_t k=0;
394  for (Int_t x=0; x<nvar; x++) {
395  for (Int_t y=0; y<nvar; y++) {
396  //(*fWith)(x, y) = (sumSig[k] + sumBgd[k])/(fSumOfWeightsS + fSumOfWeightsB);
397  // HHV: I am still convinced that THIS is how it should be (below) However, while
398  // the old version corresponded so nicely with LD, the FIXED version does not, unless
399  // we agree to change LD. For LD, it is not "defined" to my knowledge how the weights
400  // are weighted, while it is clear how the "Within" matrix for Fisher should be calcuated
401  // (i.e. as seen below). In order to agree with the Fisher classifier, one would have to
402  // weigh signal and background such that they correspond to the same number of effective
403  // (weithed) events.
404  // THAT is NOT done currently, but just "event weights" are used.
405  (*fWith)(x, y) = sumSig[k]/fSumOfWeightsS + sumBgd[k]/fSumOfWeightsB;
406  k++;
407  }
408  }
409 
410  delete [] sumSig;
411  delete [] sumBgd;
412  delete [] xval;
413 }
414 
415 ////////////////////////////////////////////////////////////////////////////////
416 /// the matrix of covariance 'between class' reflects the dispersion of the
417 /// events of a class relative to the global center of gravity of all the class
418 /// hence the separation between classes
419 
421 {
422  // assert required
423  assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0);
424 
425  Double_t prodSig, prodBgd;
426 
427  for (UInt_t x=0; x<GetNvar(); x++) {
428  for (UInt_t y=0; y<GetNvar(); y++) {
429 
430  prodSig = ( ((*fMeanMatx)(x, 0) - (*fMeanMatx)(x, 2))*
431  ((*fMeanMatx)(y, 0) - (*fMeanMatx)(y, 2)) );
432  prodBgd = ( ((*fMeanMatx)(x, 1) - (*fMeanMatx)(x, 2))*
433  ((*fMeanMatx)(y, 1) - (*fMeanMatx)(y, 2)) );
434 
435  (*fBetw)(x, y) = (fSumOfWeightsS*prodSig + fSumOfWeightsB*prodBgd) / (fSumOfWeightsS + fSumOfWeightsB);
436  }
437  }
438 }
439 
440 ////////////////////////////////////////////////////////////////////////////////
441 /// compute full covariance matrix from sum of within and between matrices
442 
444 {
445  for (UInt_t x=0; x<GetNvar(); x++)
446  for (UInt_t y=0; y<GetNvar(); y++)
447  (*fCov)(x, y) = (*fWith)(x, y) + (*fBetw)(x, y);
448 }
449 
450 ////////////////////////////////////////////////////////////////////////////////
451 /// Fisher = Sum { [coeff]*[variables] }
452 ///
453 /// let Xs be the array of the mean values of variables for signal evts
454 /// let Xb be the array of the mean values of variables for backgd evts
455 /// let InvWith be the inverse matrix of the 'within class' correlation matrix
456 ///
457 /// then the array of Fisher coefficients is
458 /// [coeff] =sqrt(fNsig*fNbgd)/fNevt*transpose{Xs-Xb}*InvWith
459 
461 {
462  // assert required
463  assert( fSumOfWeightsS > 0 && fSumOfWeightsB > 0);
464 
465  // invert covariance matrix
466  TMatrixD* theMat = 0;
467  switch (GetFisherMethod()) {
468  case kFisher:
469  theMat = fWith;
470  break;
471  case kMahalanobis:
472  theMat = fCov;
473  break;
474  default:
475  Log() << kFATAL << "<GetFisherCoeff> undefined method" << GetFisherMethod() << Endl;
476  }
477 
478  TMatrixD invCov( *theMat );
479 
480  if ( TMath::Abs(invCov.Determinant()) < 10E-24 ) {
481  Log() << kWARNING << "<GetFisherCoeff> matrix is almost singular with deterninant="
482  << TMath::Abs(invCov.Determinant())
483  << " did you use the variables that are linear combinations or highly correlated?"
484  << Endl;
485  }
486  if ( TMath::Abs(invCov.Determinant()) < 10E-120 ) {
487  theMat->Print();
488  Log() << kFATAL << "<GetFisherCoeff> matrix is singular with determinant="
489  << TMath::Abs(invCov.Determinant())
490  << " did you use the variables that are linear combinations? \n"
491  << " do you any clue as to what went wrong in above printout of the covariance matrix? "
492  << Endl;
493  }
494 
495  invCov.Invert();
496 
497  // apply rescaling factor
499 
500  // compute difference of mean values
501  std::vector<Double_t> diffMeans( GetNvar() );
502  UInt_t ivar, jvar;
503  for (ivar=0; ivar<GetNvar(); ivar++) {
504  (*fFisherCoeff)[ivar] = 0;
505 
506  for (jvar=0; jvar<GetNvar(); jvar++) {
507  Double_t d = (*fMeanMatx)(jvar, 0) - (*fMeanMatx)(jvar, 1);
508  (*fFisherCoeff)[ivar] += invCov(ivar, jvar)*d;
509  }
510  // rescale
511  (*fFisherCoeff)[ivar] *= xfact;
512  }
513 
514 
515  // offset correction
516  fF0 = 0.0;
517  for (ivar=0; ivar<GetNvar(); ivar++){
518  fF0 += (*fFisherCoeff)[ivar]*((*fMeanMatx)(ivar, 0) + (*fMeanMatx)(ivar, 1));
519  }
520  fF0 /= -2.0;
521 }
522 
523 ////////////////////////////////////////////////////////////////////////////////
524 /// computation of discrimination power indicator for each variable
525 /// small values of "fWith" indicates little compactness of sig & of backgd
526 /// big values of "fBetw" indicates large separation between sig & backgd
527 ///
528 /// we want signal & backgd classes as compact and separated as possible
529 /// the discriminating power is then defined as the ration "fBetw/fWith"
530 
532 {
533  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
534  if ((*fCov)(ivar, ivar) != 0)
535  (*fDiscrimPow)[ivar] = (*fBetw)(ivar, ivar)/(*fCov)(ivar, ivar);
536  else
537  (*fDiscrimPow)[ivar] = 0;
538  }
539 }
540 
541 ////////////////////////////////////////////////////////////////////////////////
542 /// computes ranking of input variables
543 
545 {
546  // create the ranking object
547  fRanking = new Ranking( GetName(), "Discr. power" );
548 
549  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
550  fRanking->AddRank( Rank( GetInputLabel(ivar), (*fDiscrimPow)[ivar] ) );
551  }
552 
553  return fRanking;
554 }
555 
556 ////////////////////////////////////////////////////////////////////////////////
557 /// display Fisher coefficients and discriminating power for each variable
558 /// check maximum length of variable name
559 
561 {
562  Log() << kHEADER << "Results for Fisher coefficients:" << Endl;
563 
564  if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
565  Log() << kINFO << "NOTE: The coefficients must be applied to TRANFORMED variables" << Endl;
566  Log() << kINFO << " List of the transformation: " << Endl;
567  TListIter trIt(&GetTransformationHandler().GetTransformationList());
568  while (VariableTransformBase *trf = (VariableTransformBase*) trIt()) {
569  Log() << kINFO << " -- " << trf->GetName() << Endl;
570  }
571  }
572  std::vector<TString> vars;
573  std::vector<Double_t> coeffs;
574  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
575  vars .push_back( GetInputLabel(ivar) );
576  coeffs.push_back( (*fFisherCoeff)[ivar] );
577  }
578  vars .push_back( "(offset)" );
579  coeffs.push_back( fF0 );
580  TMVA::gTools().FormattedOutput( coeffs, vars, "Variable" , "Coefficient", Log() );
581 
582  // for (int i=0; i<coeffs.size(); i++)
583  // std::cout << "fisher coeff["<<i<<"]="<<coeffs[i]<<std::endl;
584 
585  if (IsNormalised()) {
586  Log() << kINFO << "NOTE: You have chosen to use the \"Normalise\" booking option. Hence, the" << Endl;
587  Log() << kINFO << " coefficients must be applied to NORMALISED (') variables as follows:" << Endl;
588  Int_t maxL = 0;
589  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) if (GetInputLabel(ivar).Length() > maxL) maxL = GetInputLabel(ivar).Length();
590 
591  // Print normalisation expression (see Tools.cxx): "2*(x - xmin)/(xmax - xmin) - 1.0"
592  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
593  Log() << kINFO
594  << std::setw(maxL+9) << TString("[") + GetInputLabel(ivar) + "]' = 2*("
595  << std::setw(maxL+2) << TString("[") + GetInputLabel(ivar) + "]"
596  << std::setw(3) << (GetXmin(ivar) > 0 ? " - " : " + ")
597  << std::setw(6) << TMath::Abs(GetXmin(ivar)) << std::setw(3) << ")/"
598  << std::setw(6) << (GetXmax(ivar) - GetXmin(ivar) )
599  << std::setw(3) << " - 1"
600  << Endl;
601  }
602  Log() << kINFO << "The TMVA Reader will properly account for this normalisation, but if the" << Endl;
603  Log() << kINFO << "Fisher classifier is applied outside the Reader, the transformation must be" << Endl;
604  Log() << kINFO << "implemented -- or the \"Normalise\" option is removed and Fisher retrained." << Endl;
605  Log() << kINFO << Endl;
606  }
607 }
608 
609 ////////////////////////////////////////////////////////////////////////////////
610 /// read Fisher coefficients from weight file
611 
613 {
614  istr >> fF0;
615  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) istr >> (*fFisherCoeff)[ivar];
616 }
617 
618 ////////////////////////////////////////////////////////////////////////////////
619 /// create XML description of Fisher classifier
620 
621 void TMVA::MethodFisher::AddWeightsXMLTo( void* parent ) const
622 {
623  void* wght = gTools().AddChild(parent, "Weights");
624  gTools().AddAttr( wght, "NCoeff", GetNvar()+1 );
625  void* coeffxml = gTools().AddChild(wght, "Coefficient");
626  gTools().AddAttr( coeffxml, "Index", 0 );
627  gTools().AddAttr( coeffxml, "Value", fF0 );
628  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
629  coeffxml = gTools().AddChild( wght, "Coefficient" );
630  gTools().AddAttr( coeffxml, "Index", ivar+1 );
631  gTools().AddAttr( coeffxml, "Value", (*fFisherCoeff)[ivar] );
632  }
633 }
634 
635 ////////////////////////////////////////////////////////////////////////////////
636 /// read Fisher coefficients from xml weight file
637 
639 {
640  UInt_t ncoeff, coeffidx;
641  gTools().ReadAttr( wghtnode, "NCoeff", ncoeff );
642  fFisherCoeff->resize(ncoeff-1);
643 
644  void* ch = gTools().GetChild(wghtnode);
645  Double_t coeff;
646  while (ch) {
647  gTools().ReadAttr( ch, "Index", coeffidx );
648  gTools().ReadAttr( ch, "Value", coeff );
649  if (coeffidx==0) fF0 = coeff;
650  else (*fFisherCoeff)[coeffidx-1] = coeff;
651  ch = gTools().GetNextChild(ch);
652  }
653 }
654 
655 ////////////////////////////////////////////////////////////////////////////////
656 /// write Fisher-specific classifier response
657 
658 void TMVA::MethodFisher::MakeClassSpecific( std::ostream& fout, const TString& className ) const
659 {
660  Int_t dp = fout.precision();
661  fout << " double fFisher0;" << std::endl;
662  fout << " std::vector<double> fFisherCoefficients;" << std::endl;
663  fout << "};" << std::endl;
664  fout << "" << std::endl;
665  fout << "inline void " << className << "::Initialize() " << std::endl;
666  fout << "{" << std::endl;
667  fout << " fFisher0 = " << std::setprecision(12) << fF0 << ";" << std::endl;
668  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
669  fout << " fFisherCoefficients.push_back( " << std::setprecision(12) << (*fFisherCoeff)[ivar] << " );" << std::endl;
670  }
671  fout << std::endl;
672  fout << " // sanity check" << std::endl;
673  fout << " if (fFisherCoefficients.size() != fNvars) {" << std::endl;
674  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\"::Initialize: mismatch in number of input values\"" << std::endl;
675  fout << " << fFisherCoefficients.size() << \" != \" << fNvars << std::endl;" << std::endl;
676  fout << " fStatusIsClean = false;" << std::endl;
677  fout << " } " << std::endl;
678  fout << "}" << std::endl;
679  fout << std::endl;
680  fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
681  fout << "{" << std::endl;
682  fout << " double retval = fFisher0;" << std::endl;
683  fout << " for (size_t ivar = 0; ivar < fNvars; ivar++) {" << std::endl;
684  fout << " retval += fFisherCoefficients[ivar]*inputValues[ivar];" << std::endl;
685  fout << " }" << std::endl;
686  fout << std::endl;
687  fout << " return retval;" << std::endl;
688  fout << "}" << std::endl;
689  fout << std::endl;
690  fout << "// Clean up" << std::endl;
691  fout << "inline void " << className << "::Clear() " << std::endl;
692  fout << "{" << std::endl;
693  fout << " // clear coefficients" << std::endl;
694  fout << " fFisherCoefficients.clear(); " << std::endl;
695  fout << "}" << std::endl;
696  fout << std::setprecision(dp);
697 }
698 
699 ////////////////////////////////////////////////////////////////////////////////
700 /// get help message text
701 ///
702 /// typical length of text line:
703 /// "|--------------------------------------------------------------|"
704 
706 {
707  Log() << Endl;
708  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
709  Log() << Endl;
710  Log() << "Fisher discriminants select events by distinguishing the mean " << Endl;
711  Log() << "values of the signal and background distributions in a trans- " << Endl;
712  Log() << "formed variable space where linear correlations are removed." << Endl;
713  Log() << Endl;
714  Log() << " (More precisely: the \"linear discriminator\" determines" << Endl;
715  Log() << " an axis in the (correlated) hyperspace of the input " << Endl;
716  Log() << " variables such that, when projecting the output classes " << Endl;
717  Log() << " (signal and background) upon this axis, they are pushed " << Endl;
718  Log() << " as far as possible away from each other, while events" << Endl;
719  Log() << " of a same class are confined in a close vicinity. The " << Endl;
720  Log() << " linearity property of this classifier is reflected in the " << Endl;
721  Log() << " metric with which \"far apart\" and \"close vicinity\" are " << Endl;
722  Log() << " determined: the covariance matrix of the discriminating" << Endl;
723  Log() << " variable space.)" << Endl;
724  Log() << Endl;
725  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
726  Log() << Endl;
727  Log() << "Optimal performance for Fisher discriminants is obtained for " << Endl;
728  Log() << "linearly correlated Gaussian-distributed variables. Any deviation" << Endl;
729  Log() << "from this ideal reduces the achievable separation power. In " << Endl;
730  Log() << "particular, no discrimination at all is achieved for a variable" << Endl;
731  Log() << "that has the same sample mean for signal and background, even if " << Endl;
732  Log() << "the shapes of the distributions are very different. Thus, Fisher " << Endl;
733  Log() << "discriminants often benefit from suitable transformations of the " << Endl;
734  Log() << "input variables. For example, if a variable x in [-1,1] has a " << Endl;
735  Log() << "a parabolic signal distributions, and a uniform background" << Endl;
736  Log() << "distributions, their mean value is zero in both cases, leading " << Endl;
737  Log() << "to no separation. The simple transformation x -> |x| renders this " << Endl;
738  Log() << "variable powerful for the use in a Fisher discriminant." << Endl;
739  Log() << Endl;
740  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
741  Log() << Endl;
742  Log() << "<None>" << Endl;
743 }
void GetCov_BetweenClass(void)
the matrix of covariance &#39;between class&#39; reflects the dispersion of the events of a class relative to...
const Ranking * CreateRanking()
computes ranking of input variables
void MakeClassSpecific(std::ostream &, const TString &) const
write Fisher-specific classifier response
static long int sum(long int i)
Definition: Factory.cxx:1785
MethodFisher(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="Fisher")
standard constructor for the "Fisher"
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
#define REGISTER_METHOD(CLASS)
for example
void ReadWeightsFromStream(std::istream &i)
read Fisher coefficients from weight file
virtual ~MethodFisher(void)
destructor
Ssiz_t Length() const
Definition: TString.h:390
void AddWeightsXMLTo(void *parent) const
create XML description of Fisher classifier
void GetCov_Full(void)
compute full covariance matrix from sum of within and between matrices
void GetDiscrimPower(void)
computation of discrimination power indicator for each variable small values of "fWith" indicates lit...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Fisher can only handle classification with 2 classes.
const char * GetName() const
Definition: MethodBase.h:330
EFisherMethod GetFisherMethod(void)
Definition: MethodFisher.h:91
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
DataSet * Data() const
Definition: MethodBase.h:405
UInt_t GetNvar() const
Definition: MethodBase.h:340
Double_t fSumOfWeightsS
Definition: MethodFisher.h:148
EAnalysisType
Definition: Types.h:128
Bool_t IsNormalised() const
Definition: MethodBase.h:490
Basic string class.
Definition: TString.h:137
TMatrixD * fMeanMatx
Definition: MethodFisher.h:136
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Double_t > * fFisherCoeff
Definition: MethodFisher.h:152
const Bool_t kFALSE
Definition: Rtypes.h:92
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
Iterator of linked list.
Definition: TList.h:187
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
Tools & gTools()
Definition: Tools.cxx:79
Double_t x[n]
Definition: legend1.C:17
Double_t fSumOfWeightsB
Definition: MethodFisher.h:149
Bool_t IsSignal(const Event *ev) const
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
EFisherMethod fFisherMethod
Definition: MethodFisher.h:140
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
void Print(Option_t *name="") const
Print the matrix as a table of elements.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns the Fisher value (no fixed range)
void Train(void)
computation of Fisher coefficients by series of matrix operations
void ReadWeightsFromXML(void *wghtnode)
read Fisher coefficients from xml weight file
void ProcessOptions()
process user options
SVector< double, 2 > v
Definition: Dict.h:5
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * GetEvent() const
Definition: MethodBase.h:745
Double_t E()
Definition: TMath.h:54
void InitMatrices(void)
initializaton method; creates global matrices and vectors
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
void GetHelpMessage() const
get help message text
void DeclareOptions()
MethodFisher options: format and syntax of option string: "type" where type is "Fisher" or "Mahalanob...
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
void GetMean(void)
compute mean values of variables in each sample, and the overall means
void GetCov_WithinClass(void)
the matrix of covariance &#39;within class&#39; reflects the dispersion of the events relative to the center ...
int type
Definition: TGX11.cxx:120
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
Double_t y[n]
Definition: legend1.C:17
MsgLogger & Log() const
Definition: Configurable.h:128
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
void AddPreDefVal(const T &)
Definition: Configurable.h:174
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:346
void ExitFromTraining()
Definition: MethodBase.h:458
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:581
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
virtual Double_t Determinant() const
Return the matrix determinant.
Definition: TMatrixT.cxx:1361
void PrintCoefficients(void)
display Fisher coefficients and discriminating power for each variable check maximum length of variab...
double result[121]
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
void GetFisherCoeff(void)
Fisher = Sum { [coeff]*[variables] }.
const Bool_t kTRUE
Definition: Rtypes.h:91
std::vector< Double_t > * fDiscrimPow
Definition: MethodFisher.h:151
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
void Init(void)
default initialization called by all constructors