Logo ROOT   6.14/05
Reference Guide
LDA.cxx
Go to the documentation of this file.
1 // $Id$
2 /**********************************************************************************
3  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
4  * Package: TMVA *
5  * Class : LDA *
6  * Web : http://tmva.sourceforge.net *
7  * *
8  * Description: *
9  * Local LDA method used by MethodKNN to compute MVA value. *
10  * This is experimental code under development. This class computes *
11  * parameters of signal and background PDFs using Gaussian approximation. *
12  * *
13  * Author: *
14  * John Alison John.Alison@cern.ch - University of Pennsylvania, USA *
15  * *
16  * Copyright (c) 2007: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * University of Pennsylvania, USA *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 /*! \class TMVA::LDA
27 \ingroup TMVA
28 
29 */
30 
31 // Local
32 #include "TMVA/LDA.h"
33 
34 // C/C++
35 #include <iostream>
36 
37 #include "TDecompSVD.h"
38 #include "TMatrixF.h"
39 #include "TMath.h"
40 
41 #include "TMVA/Types.h"
42 #include "TMVA/MsgLogger.h"
43 
44 /////////////////////////////////////////////////////////////////////////////////
45 /// constructor
46 
47 TMVA::LDA::LDA( Float_t tolerence, Bool_t debug )
48  : fTolerence(tolerence),
49  fNumParams(0),
50  fSigma(0),
51  fSigmaInverse(0),
52  fDebug(debug),
53  fLogger( new MsgLogger("LDA", (debug?kINFO:kDEBUG)) )
54 {
55 }
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// destructor
59 
61 {
62  delete fLogger;
63 }
64 
65 ////////////////////////////////////////////////////////////////////////////////
66 /// Create LDA matrix using local events found by knn method
67 
68 void TMVA::LDA::Initialize(const LDAEvents& inputSignalEvents, const LDAEvents& inputBackgroundEvents)
69 {
70  Log() << kDEBUG << "There are: " << inputSignalEvents.size() << " input signal events " << Endl;
71  Log() << kDEBUG << "There are: " << inputBackgroundEvents.size() << " input background events " << Endl;
72 
73  fNumParams = inputSignalEvents[0].size();
74 
75  UInt_t numSignalEvents = inputSignalEvents.size();
76  UInt_t numBackEvents = inputBackgroundEvents.size();
77  UInt_t numTotalEvents = numSignalEvents + numBackEvents;
78  fEventFraction[0] = (Float_t)numBackEvents/numTotalEvents;
79  fEventFraction[1] = (Float_t)numSignalEvents/numTotalEvents;
80  UInt_t K = 2;
81 
82  // get the mean of the signal and background for each parameter
83  std::vector<Float_t> m_muSignal (fNumParams,0.0);
84  std::vector<Float_t> m_muBackground (fNumParams,0.0);
85  for (UInt_t param=0; param < fNumParams; ++param) {
86  for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber)
87  m_muSignal[param] += inputSignalEvents[eventNumber][param];
88  for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber)
89  m_muBackground[param] += inputBackgroundEvents[eventNumber][param]/numBackEvents;
90  if (numSignalEvents > 0) m_muSignal[param] /= numSignalEvents;
91  if (numBackEvents > 0 ) m_muBackground[param] /= numBackEvents;
92  }
93  fMu[0] = m_muBackground;
94  fMu[1] = m_muSignal;
95 
96  if (fDebug) {
97  Log() << kDEBUG << "the signal means" << Endl;
98  for (UInt_t param=0; param < fNumParams; ++param)
99  Log() << kDEBUG << m_muSignal[param] << Endl;
100  Log() << kDEBUG << "the background means" << Endl;
101  for (UInt_t param=0; param < inputBackgroundEvents[0].size(); ++param)
102  Log() << kDEBUG << m_muBackground[param] << Endl;
103  }
104 
105  // sigma is a sum of two symmetric matrices, one for the background and one for signal
106  // get the matrices separately (def not be the best way to do it!)
107 
108  // the signal, background, and total matrix
109  TMatrixF sigmaSignal(fNumParams, fNumParams);
110  TMatrixF sigmaBack(fNumParams, fNumParams);
111  if (fSigma!=0) delete fSigma;
112  fSigma = new TMatrixF(fNumParams, fNumParams);
113  for (UInt_t row=0; row < fNumParams; ++row) {
114  for (UInt_t col=0; col < fNumParams; ++col) {
115  sigmaSignal[row][col] = 0;
116  sigmaBack[row][col] = 0;
117  (*fSigma)[row][col] = 0;
118  }
119  }
120 
121  for (UInt_t eventNumber=0; eventNumber < numSignalEvents; ++eventNumber) {
122  for (UInt_t row=0; row < fNumParams; ++row) {
123  for (UInt_t col=0; col < fNumParams; ++col) {
124  sigmaSignal[row][col] += (inputSignalEvents[eventNumber][row] - m_muSignal[row]) * (inputSignalEvents[eventNumber][col] - m_muSignal[col] );
125  }
126  }
127  }
128 
129  for (UInt_t eventNumber=0; eventNumber < numBackEvents; ++eventNumber) {
130  for (UInt_t row=0; row < fNumParams; ++row) {
131  for (UInt_t col=0; col < fNumParams; ++col) {
132  sigmaBack[row][col] += (inputBackgroundEvents[eventNumber][row] - m_muBackground[row]) * (inputBackgroundEvents[eventNumber][col] - m_muBackground[col] );
133  }
134  }
135  }
136 
137  // the total matrix
138  *fSigma = sigmaBack + sigmaSignal;
139  *fSigma *= 1.0/(numTotalEvents - K);
140 
141  if (fDebug) {
142  Log() << "after filling sigmaSignal" <<Endl;
143  sigmaSignal.Print();
144  Log() << "after filling sigmaBack" <<Endl;
145  sigmaBack.Print();
146  Log() << "after filling total Sigma" <<Endl;
147  fSigma->Print();
148  }
149 
150  TDecompSVD solutionSVD = TDecompSVD( *fSigma );
151  TMatrixF decomposed = TMatrixF( fNumParams, fNumParams );
152  TMatrixF diag ( fNumParams, fNumParams );
153  TMatrixF uTrans( fNumParams, fNumParams );
154  TMatrixF vTrans( fNumParams, fNumParams );
155  if (solutionSVD.Decompose()) {
156  for (UInt_t i = 0; i< fNumParams; ++i) {
157  if (solutionSVD.GetSig()[i] > fTolerence)
158  diag(i,i) = solutionSVD.GetSig()[i];
159  else
160  diag(i,i) = fTolerence;
161  }
162 
163  if (fDebug) {
164  Log() << "the diagonal" <<Endl;
165  diag.Print();
166  }
167 
168  decomposed = solutionSVD.GetV();
169  decomposed *= diag;
170  decomposed *= solutionSVD.GetU();
171 
172  if (fDebug) {
173  Log() << "the decomposition " <<Endl;
174  decomposed.Print();
175  }
176 
177  *fSigmaInverse = uTrans.Transpose(solutionSVD.GetU());
178  *fSigmaInverse /= diag;
179  *fSigmaInverse *= vTrans.Transpose(solutionSVD.GetV());
180 
181  if (fDebug) {
182  Log() << "the SigmaInverse " <<Endl;
183  fSigmaInverse->Print();
184 
185  Log() << "the real " <<Endl;
186  fSigma->Invert();
187  fSigma->Print();
188 
189  Bool_t problem = false;
190  for (UInt_t i =0; i< fNumParams; ++i) {
191  for (UInt_t j =0; j< fNumParams; ++j) {
192  if (TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) > 0.01) {
193  Log() << "problem, i= "<< i << " j= " << j << Endl;
194  Log() << "Sigma(i,j)= "<< (*fSigma)(i,j) << " SigmaInverse(i,j)= " << (*fSigmaInverse)(i,j) <<Endl;
195  Log() << "The difference is : " << TMath::Abs((Float_t)(*fSigma)(i,j) - (Float_t)(*fSigmaInverse)(i,j)) <<Endl;
196  problem = true;
197  }
198  }
199  }
200  if (problem) Log() << kWARNING << "Problem with the inversion!" << Endl;
201 
202  }
203  }
204 }
205 
206 ////////////////////////////////////////////////////////////////////////////////
207 ///
208 /// Probability value using Gaussian approximation
209 ///
210 
211 Float_t TMVA::LDA::FSub(const std::vector<Float_t>& x, Int_t k)
212 {
213  Float_t prefactor = 1.0/(TMath::TwoPi()*TMath::Sqrt(fSigma->Determinant()));
214  std::vector<Float_t> m_transPoseTimesSigmaInverse;
215 
216  for (UInt_t j=0; j < fNumParams; ++j) {
217  Float_t m_temp = 0;
218  for (UInt_t i=0; i < fNumParams; ++i) {
219  m_temp += (x[i] - fMu[k][i]) * (*fSigmaInverse)(j,i);
220  }
221  m_transPoseTimesSigmaInverse.push_back(m_temp);
222  }
223 
224  Float_t exponent = 0.0;
225  for (UInt_t i=0; i< fNumParams; ++i) {
226  exponent += m_transPoseTimesSigmaInverse[i]*(x[i] - fMu[k][i]);
227  }
228 
229  exponent *= -0.5;
230 
231  return prefactor*TMath::Exp( exponent );
232 }
233 
234 ////////////////////////////////////////////////////////////////////////////////
235 ///
236 /// Signal probability with Gaussian approximation
237 ///
238 
239 Float_t TMVA::LDA::GetProb(const std::vector<Float_t>& x, Int_t k)
240 {
241  Float_t m_numerator = FSub(x,k)*fEventFraction[k];
242  Float_t m_denominator = FSub(x,0)*fEventFraction[0]+FSub(x,1)*fEventFraction[1];
243 
244  return m_numerator/m_denominator;
245 }
246 
247 ////////////////////////////////////////////////////////////////////////////////
248 ///
249 /// Log likelihood function with Gaussian approximation
250 ///
251 
252 Float_t TMVA::LDA::GetLogLikelihood( const std::vector<Float_t>& x, Int_t k )
253 {
254  return TMath::Log( FSub(x,k)/FSub(x,!k) ) + TMath::Log( fEventFraction[k]/fEventFraction[!k] );
255 }
LDA(Float_t tolerence=1.0e-5, Bool_t debug=false)
constructor
Definition: LDA.cxx:47
Float_t FSub(const std::vector< Float_t > &x, Int_t k)
Probability value using Gaussian approximation.
Definition: LDA.cxx:211
const TVectorD & GetSig()
Definition: TDecompSVD.h:59
TMatrixF * fSigmaInverse
Definition: LDA.h:74
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
~LDA()
destructor
Definition: LDA.cxx:60
Double_t Log(Double_t x)
Definition: TMath.h:759
float Float_t
Definition: RtypesCore.h:53
constexpr Double_t TwoPi()
Definition: TMath.h:45
TMatrixT< Float_t > TMatrixF
Definition: TMatrixFfwd.h:22
MsgLogger * fLogger
Definition: LDA.h:78
std::map< Int_t, std::vector< Float_t > > fMu
Definition: LDA.h:72
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
Float_t fTolerence
Definition: LDA.h:70
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
Definition: TObject.cxx:550
virtual Double_t Determinant() const
Return the matrix determinant.
Definition: TMatrixT.cxx:1361
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
TMatrixT.
Definition: TMatrixDfwd.h:22
Single Value Decomposition class.
Definition: TDecompSVD.h:23
Double_t x[n]
Definition: legend1.C:17
Float_t GetProb(const std::vector< Float_t > &x, Int_t k)
Signal probability with Gaussian approximation.
Definition: LDA.cxx:239
std::map< Int_t, Float_t > fEventFraction
Definition: LDA.h:75
const TMatrixD & GetU()
Definition: TDecompSVD.h:55
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
virtual Bool_t Decompose()
SVD decomposition of matrix If the decomposition succeeds, bit kDecomposed is set ...
Definition: TDecompSVD.cxx:123
static const uint32_t K[64]
Definition: RSha256.hxx:148
Float_t GetLogLikelihood(const std::vector< Float_t > &x, Int_t k)
Log likelihood function with Gaussian approximation.
Definition: LDA.cxx:252
unsigned int UInt_t
Definition: RtypesCore.h:42
UInt_t fNumParams
Definition: LDA.h:71
void Initialize(const LDAEvents &inputSignal, const LDAEvents &inputBackground)
Create LDA matrix using local events found by knn method.
Definition: LDA.cxx:68
Double_t Exp(Double_t x)
Definition: TMath.h:726
MsgLogger & Log() const
Definition: LDA.h:65
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
void Print(Option_t *name="") const
Print the matrix as a table of elements.
Bool_t fDebug
Definition: LDA.h:76
std::vector< std::vector< Float_t > > LDAEvents
Definition: LDA.h:38
TMatrixF * fSigma
Definition: LDA.h:73
const TMatrixD & GetV()
Definition: TDecompSVD.h:57
Double_t Sqrt(Double_t x)
Definition: TMath.h:690