Logo ROOT   6.07/09
Reference Guide
LDA.cxx
Go to the documentation of this file.
1 // $Id$
2 /**********************************************************************************
3  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
4  * Package: TMVA *
5  * Class : LDA *
6  * Web : http://tmva.sourceforge.net *
7  * *
8  * Description: *
9  * Local LDA method used by MethodKNN to compute MVA value. *
10  * This is experimental code under development. This class computes *
11  * parameters of signal and background PDFs using Gaussian aproximation. *
12  * *
13  * Author: *
14  * John Alison John.Alison@cern.ch - University of Pennsylvania, USA *
15  * *
16  * Copyright (c) 2007: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * University of Pennsylvania, USA *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 // Local
27 #include "TMVA/LDA.h"
28 
29 // C/C++
30 #include <iostream>
31 
32 #ifndef ROOT_TDecompSVD
33 #include "TDecompSVD.h"
34 #endif
35 #ifndef ROOT_TMatrixF
36 #include "TMatrixF.h"
37 #endif
38 #ifndef ROOT_TMath
39 #include "TMath.h"
40 #endif
41 
42 #ifndef ROOT_TMVA_Types
43 #include "TMVA/Types.h"
44 #endif
45 #ifndef ROOT_TMVA_MsgLogger
46 #include "TMVA/MsgLogger.h"
47 #endif
48 
49 ////////////////////////////////////////////////////////////////////////////////
50 /// constructor
51 
53  : fTolerence(tolerence),
54  fNumParams(0),
55  fSigma(0),
56  fSigmaInverse(0),
57  fDebug(debug),
58  fLogger( new MsgLogger("LDA", (debug?kINFO:kDEBUG)) )
59 {
60 }
61 
62 ////////////////////////////////////////////////////////////////////////////////
63 /// destructor
64 
66 {
67  delete fLogger;
68 }
69 
70 ////////////////////////////////////////////////////////////////////////////////
71 /// Create LDA matrix using local events found by knn method
72 
73 void TMVA::LDA::Initialize(const LDAEvents& inputSignalEvents, const LDAEvents& inputBackgroundEvents)
74 {
75  Log() << kDEBUG << "There are: " << inputSignalEvents.size() << " input signal events " << Endl;
76  Log() << kDEBUG << "There are: " << inputBackgroundEvents.size() << " input background events " << Endl;
77 
78  fNumParams = inputSignalEvents[0].size();
79 
80  UInt_t numSignalEvents = inputSignalEvents.size();
81  UInt_t numBackEvents = inputBackgroundEvents.size();
82  UInt_t numTotalEvents = numSignalEvents + numBackEvents;
83  fEventFraction[0] = (Float_t)numBackEvents/numTotalEvents;
84  fEventFraction[1] = (Float_t)numSignalEvents/numTotalEvents;
85  UInt_t K = 2;
86 
87  // get the mean of the signal and background for each parameter
88  std::vector<Float_t> m_muSignal (fNumParams,0.0);
89  std::vector<Float_t> m_muBackground (fNumParams,0.0);
90  for (UInt_t param=0; param < fNumParams; ++param) {
91  for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber)
92  m_muSignal[param] += inputSignalEvents[eventNumber][param];
93  for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber)
94  m_muBackground[param] += inputBackgroundEvents[eventNumber][param]/numBackEvents;
95  if (numSignalEvents > 0) m_muSignal[param] /= numSignalEvents;
96  if (numBackEvents > 0 ) m_muBackground[param] /= numBackEvents;
97  }
98  fMu[0] = m_muBackground;
99  fMu[1] = m_muSignal;
100 
101  if (fDebug) {
102  Log() << kDEBUG << "the signal means" << Endl;
103  for (UInt_t param=0; param < fNumParams; ++param)
104  Log() << kDEBUG << m_muSignal[param] << Endl;
105  Log() << kDEBUG << "the background means" << Endl;
106  for (UInt_t param=0; param < inputBackgroundEvents[0].size(); ++param)
107  Log() << kDEBUG << m_muBackground[param] << Endl;
108  }
109 
110  // sigma is a sum of two symmetric matrices, one for the background and one for signal
111  // get the matricies seperately (def not be the best way to do it!)
112 
113  // the signal, background, and total matrix
114  TMatrixF sigmaSignal(fNumParams, fNumParams);
115  TMatrixF sigmaBack(fNumParams, fNumParams);
116  if (fSigma!=0) delete fSigma;
117  fSigma = new TMatrixF(fNumParams, fNumParams);
118  for (UInt_t row=0; row < fNumParams; ++row) {
119  for (UInt_t col=0; col < fNumParams; ++col) {
120  sigmaSignal[row][col] = 0;
121  sigmaBack[row][col] = 0;
122  (*fSigma)[row][col] = 0;
123  }
124  }
125 
126  for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber) {
127  for (UInt_t row=0; row < fNumParams; ++row) {
128  for (UInt_t col=0; col < fNumParams; ++col) {
129  sigmaSignal[row][col] += (inputSignalEvents[eventNumber][row] - m_muSignal[row]) * (inputSignalEvents[eventNumber][col] - m_muSignal[col] );
130  }
131  }
132  }
133 
134  for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber) {
135  for (UInt_t row=0; row < fNumParams; ++row) {
136  for (UInt_t col=0; col < fNumParams; ++col) {
137  sigmaBack[row][col] += (inputBackgroundEvents[eventNumber][row] - m_muBackground[row]) * (inputBackgroundEvents[eventNumber][col] - m_muBackground[col] );
138  }
139  }
140  }
141 
142  // the total matrix
143  *fSigma = sigmaBack + sigmaSignal;
144  *fSigma *= 1.0/(numTotalEvents - K);
145 
146  if (fDebug) {
147  Log() << "after filling sigmaSignal" <<Endl;
148  sigmaSignal.Print();
149  Log() << "after filling sigmaBack" <<Endl;
150  sigmaBack.Print();
151  Log() << "after filling total Sigma" <<Endl;
152  fSigma->Print();
153  }
154 
155  TDecompSVD solutionSVD = TDecompSVD( *fSigma );
156  TMatrixF decomposed = TMatrixF( fNumParams, fNumParams );
157  TMatrixF diag ( fNumParams, fNumParams );
158  TMatrixF uTrans( fNumParams, fNumParams );
159  TMatrixF vTrans( fNumParams, fNumParams );
160  if (solutionSVD.Decompose()) {
161  for (UInt_t i = 0; i< fNumParams; ++i) {
162  if (solutionSVD.GetSig()[i] > fTolerence)
163  diag(i,i) = solutionSVD.GetSig()[i];
164  else
165  diag(i,i) = fTolerence;
166  }
167 
168  if (fDebug) {
169  Log() << "the diagonal" <<Endl;
170  diag.Print();
171  }
172 
173  decomposed = solutionSVD.GetV();
174  decomposed *= diag;
175  decomposed *= solutionSVD.GetU();
176 
177  if (fDebug) {
178  Log() << "the decomposition " <<Endl;
179  decomposed.Print();
180  }
181 
182  *fSigmaInverse = uTrans.Transpose(solutionSVD.GetU());
183  *fSigmaInverse /= diag;
184  *fSigmaInverse *= vTrans.Transpose(solutionSVD.GetV());
185 
186  if (fDebug) {
187  Log() << "the SigmaInverse " <<Endl;
188  fSigmaInverse->Print();
189 
190  Log() << "the real " <<Endl;
191  fSigma->Invert();
192  fSigma->Print();
193 
194  Bool_t problem = false;
195  for (UInt_t i =0; i< fNumParams; ++i) {
196  for (UInt_t j =0; j< fNumParams; ++j) {
197  if (TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) > 0.01) {
198  Log() << "problem, i= "<< i << " j= " << j << Endl;
199  Log() << "Sigma(i,j)= "<< (*fSigma)(i,j) << " SigmaInverse(i,j)= " << (*fSigmaInverse)(i,j) <<Endl;
200  Log() << "The difference is : " << TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) <<Endl;
201  problem = true;
202  }
203  }
204  }
205  if (problem) Log() << kWARNING << "Problem with the inversion!" << Endl;
206 
207  }
208  }
209 }
210 
211 ////////////////////////////////////////////////////////////////////////////////
212 ///
213 /// Probability value using Gaussian approximation
214 ///
215 
216 Float_t TMVA::LDA::FSub(const std::vector<Float_t>& x, Int_t k)
217 {
218  Float_t prefactor = 1.0/(TMath::TwoPi()*TMath::Sqrt(fSigma->Determinant()));
219  std::vector<Float_t> m_transPoseTimesSigmaInverse;
220 
221  for (UInt_t j=0; j < fNumParams; ++j) {
222  Float_t m_temp = 0;
223  for (UInt_t i=0; i < fNumParams; ++i) {
224  m_temp += (x[i] - fMu[k][i]) * (*fSigmaInverse)(j,i);
225  }
226  m_transPoseTimesSigmaInverse.push_back(m_temp);
227  }
228 
229  Float_t exponent = 0.0;
230  for (UInt_t i=0; i< fNumParams; ++i) {
231  exponent += m_transPoseTimesSigmaInverse[i]*(x[i] - fMu[k][i]);
232  }
233 
234  exponent *= -0.5;
235 
236  return prefactor*TMath::Exp( exponent );
237 }
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 ///
241 /// Signal probability with Gaussian approximation
242 ///
243 
244 Float_t TMVA::LDA::GetProb(const std::vector<Float_t>& x, Int_t k)
245 {
246  Float_t m_numerator = FSub(x,k)*fEventFraction[k];
247  Float_t m_denominator = FSub(x,0)*fEventFraction[0]+FSub(x,1)*fEventFraction[1];
248 
249  return m_numerator/m_denominator;
250 }
251 
252 ////////////////////////////////////////////////////////////////////////////////
253 ///
254 /// Log likelihood function with Gaussian approximation
255 ///
256 
257 Float_t TMVA::LDA::GetLogLikelihood( const std::vector<Float_t>& x, Int_t k )
258 {
259  return TMath::Log( FSub(x,k)/FSub(x,!k) ) + TMath::Log( fEventFraction[k]/fEventFraction[!k] );
260 }
LDA(Float_t tolerence=1.0e-5, Bool_t debug=false)
constructor
Definition: LDA.cxx:52
Float_t FSub(const std::vector< Float_t > &x, Int_t k)
Probability value using Gaussian approximation.
Definition: LDA.cxx:216
const TVectorD & GetSig()
Definition: TDecompSVD.h:61
TMatrixF * fSigmaInverse
Definition: LDA.h:78
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
~LDA()
destructor
Definition: LDA.cxx:65
Double_t Log(Double_t x)
Definition: TMath.h:526
float Float_t
Definition: RtypesCore.h:53
TMatrixT< Float_t > TMatrixF
Definition: TMatrixFfwd.h:24
MsgLogger * fLogger
Definition: LDA.h:82
MsgLogger & Log() const
Definition: LDA.h:69
std::map< Int_t, std::vector< Float_t > > fMu
Definition: LDA.h:76
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Float_t fTolerence
Definition: LDA.h:74
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
TMatrixT.
Definition: TMatrixDfwd.h:24
Single Value Decomposition class.
Definition: TDecompSVD.h:25
Double_t x[n]
Definition: legend1.C:17
Float_t GetProb(const std::vector< Float_t > &x, Int_t k)
Signal probability with Gaussian approximation.
Definition: LDA.cxx:244
std::map< Int_t, Float_t > fEventFraction
Definition: LDA.h:79
Double_t TwoPi()
Definition: TMath.h:45
const TMatrixD & GetU()
Definition: TDecompSVD.h:57
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
virtual Bool_t Decompose()
SVD decomposition of matrix If the decomposition succeeds, bit kDecomposed is set ...
Definition: TDecompSVD.cxx:123
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:595
void Print(Option_t *name="") const
Print the matrix as a table of elements.
Float_t GetLogLikelihood(const std::vector< Float_t > &x, Int_t k)
Log likelihood function with Gaussian approximation.
Definition: LDA.cxx:257
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t K()
Definition: TMath.h:95
UInt_t fNumParams
Definition: LDA.h:75
void Initialize(const LDAEvents &inputSignal, const LDAEvents &inputBackground)
Create LDA matrix using local events found by knn method.
Definition: LDA.cxx:73
Double_t Exp(Double_t x)
Definition: TMath.h:495
Bool_t fDebug
Definition: LDA.h:80
std::vector< std::vector< Float_t > > LDAEvents
Definition: LDA.h:42
virtual Double_t Determinant() const
Return the matrix determinant.
Definition: TMatrixT.cxx:1361
bool debug
TMatrixF * fSigma
Definition: LDA.h:77
const TMatrixD & GetV()
Definition: TDecompSVD.h:59
Double_t Sqrt(Double_t x)
Definition: TMath.h:464