Logo ROOT   6.14/05
Reference Guide
VariablePCATransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariablePCATransform *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * U. of Bonn, Germany *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * (http://tmva.sourceforge.net/LICENSE) *
28  **********************************************************************************/
29 
30 /*! \class TMVA::VariablePCATransform
31 \ingroup TMVA
32 Linear interpolation class
33 */
34 
36 
37 #include "TMVA/DataSet.h"
38 #include "TMVA/Event.h"
39 #include "TMVA/MsgLogger.h"
40 #include "TMVA/Tools.h"
41 #include "TMVA/Types.h"
42 
43 #include "TMatrixD.h"
44 #include "TMatrixDBase.h"
45 #include "TPrincipal.h"
46 #include "TVectorD.h"
47 #include "TVectorF.h"
48 
49 #include <iostream>
50 #include <iomanip>
51 #include <stdexcept>
52 #include <algorithm>
53 
55 
56 ////////////////////////////////////////////////////////////////////////////////
57 /// constructor
58 
60 : VariableTransformBase( dsi, Types::kPCA, "PCA" )
61 {
62 }
63 
64 ////////////////////////////////////////////////////////////////////////////////
65 /// destructor
66 
68 {
69  for (UInt_t i=0; i<fMeanValues.size(); i++) {
70  if (fMeanValues.at(i) != 0) delete fMeanValues.at(i);
71  if (fEigenVectors.at(i) != 0) delete fEigenVectors.at(i);
72  }
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// initialization of the transformation.
77 /// Has to be called in the preparation and not in the constructor,
78 /// since the number of classes it not known at construction, but
79 /// only after the creation of the DataSet which might be later.
80 
82 {
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// calculate the principal components using the ROOT class TPrincipal
87 /// and the normalization
88 
89 Bool_t TMVA::VariablePCATransform::PrepareTransformation (const std::vector<Event*>& events)
90 {
91  Initialize();
92 
93  if (!IsEnabled() || IsCreated()) return kTRUE;
94 
95  Log() << kINFO << "Preparing the Principle Component (PCA) transformation..." << Endl;
96 
97  UInt_t inputSize = fGet.size();
98 
99  SetNVariables(inputSize);
100 
101  // TPrincipal doesn't support PCA transformation for 1 or less variables
102  if (inputSize <= 1) {
103  Log() << kFATAL << "Cannot perform PCA transformation for " << inputSize << " variable only" << Endl;
104  return kFALSE;
105  }
106 
107  if (inputSize > 200) {
108  Log() << kINFO << "----------------------------------------------------------------------------"
109  << Endl;
110  Log() << kINFO
111  << ": More than 200 variables, will not calculate PCA!" << Endl;
112  Log() << kINFO << "----------------------------------------------------------------------------"
113  << Endl;
114  return kFALSE;
115  }
116 
118 
119  SetCreated( kTRUE );
120 
121  return kTRUE;
122 }
123 
124 ////////////////////////////////////////////////////////////////////////////////
125 /// apply the principal component analysis
126 
127 const TMVA::Event* TMVA::VariablePCATransform::Transform( const Event* const ev, Int_t cls ) const
128 {
129  if (!IsCreated()) return 0;
130 
131  // const Int_t inputSize = fGet.size();
132  // const UInt_t nCls = GetNClasses();
133 
134  // if we have more than one class, take the last PCA analysis where all classes are combined if
135  // the cls parameter is outside the defined classes
136  // If there is only one class, then no extra class for all events of all classes has to be created
137 
138  //if (cls < 0 || cls > GetNClasses()) cls = (fMeanValues.size()==1?0:2);//( GetNClasses() == 1 ? 0 : 1 ); ;
139  // EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
140  if (cls < 0 || cls >= (int) fMeanValues.size()) cls = fMeanValues.size()-1;
141  // EVT workaround end
142 
143  // Perform PCA and put it into PCAed events tree
144 
145  if (fTransformedEvent==0 ) {
146  fTransformedEvent = new Event();
147  }
148 
149  std::vector<Float_t> input;
150  std::vector<Char_t> mask;
151  std::vector<Float_t> principalComponents;
152 
153  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
154 
155  if( hasMaskedEntries ){ // targets might be masked (for events where the targets have not been computed yet)
156  UInt_t numMasked = std::count(mask.begin(), mask.end(), (Char_t)kTRUE);
157  UInt_t numOK = std::count(mask.begin(), mask.end(), (Char_t)kFALSE);
158  if( numMasked>0 && numOK>0 ){
159  Log() << kFATAL << "You mixed variables and targets in the decorrelation transformation. This is not possible." << Endl;
160  }
161  SetOutput( fTransformedEvent, input, mask, ev );
162  return fTransformedEvent;
163  }
164 
165  X2P( principalComponents, input, cls );
166  SetOutput( fTransformedEvent, principalComponents, mask, ev );
167 
168  return fTransformedEvent;
169 }
170 
171 ////////////////////////////////////////////////////////////////////////////////
172 /// apply the principal component analysis
173 /// TODO: implementation of inverse transformation
174 /// Log() << kFATAL << "Inverse transformation for PCA transformation not yet implemented. Hence, this transformation cannot be applied together with regression. Please contact the authors if necessary." << Endl;
175 
177 {
178  if (!IsCreated()) return 0;
179  // const Int_t inputSize = fGet.size();
180  const UInt_t nCls = GetNClasses();
181  //UInt_t evCls = ev->GetClass();
182 
183  // if we have more than one class, take the last PCA analysis where all classes are combined if
184  // the cls parameter is outside the defined classes
185  // If there is only one class, then no extra class for all events of all classes has to be created
186  if (cls < 0 || UInt_t(cls) > nCls) cls = (fMeanValues.size()==1?0:2);//( GetNClasses() == 1 ? 0 : 1 ); ;
187  // Perform PCA and put it into PCAed events tree
188 
190 
191  std::vector<Float_t> principalComponents;
192  std::vector<Char_t> mask;
193  std::vector<Float_t> output;
194 
195  GetInput( ev, principalComponents, mask, kTRUE );
196  P2X( output, principalComponents, cls );
197  SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
198 
199  return fBackTransformedEvent;
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// calculate the principal components for the signal and the background data
204 /// it uses the MakePrincipal method of ROOT's TPrincipal class
205 
206 void TMVA::VariablePCATransform::CalculatePrincipalComponents( const std::vector< Event*>& events )
207 {
208  UInt_t nvars = 0, ntgts = 0, nspcts = 0;
209  CountVariableTypes( nvars, ntgts, nspcts );
210  if( nvars>0 && ntgts>0 )
211  Log() << kFATAL << "Variables and targets cannot be mixed in PCA transformation." << Endl;
212 
213  const Int_t inputSize = fGet.size();
214 
215  // if we have more than one class, add another PCA analysis which combines all classes
216  const UInt_t nCls = GetNClasses();
217  const UInt_t maxPCA = (nCls<=1) ? nCls : nCls+1;
218 
219  // PCA [signal/background/class x/class y/... /all classes]
220  std::vector<TPrincipal*> pca(maxPCA);
221  for (UInt_t i=0; i<maxPCA; i++) pca[i] = new TPrincipal(nvars,"");
222 
223  // !! Not normalizing and not storing input data, for performance reasons. Should perhaps restore normalization.
224  // But this can be done afterwards by adding a normalisation transformation (user defined)
225 
226  Long64_t ievt, entries = events.size();
227  Double_t *dvec = new Double_t[inputSize];
228 
229  std::vector<Float_t> input;
230  std::vector<Char_t> mask;
231  for (ievt=0; ievt<entries; ievt++) {
232  const Event* ev = events[ievt];
233  UInt_t cls = ev->GetClass();
234 
235  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
236  if (hasMaskedEntries){
237  Log() << kWARNING << "Print event which triggers an error" << Endl;
238  std::ostringstream oss;
239  ev->Print(oss);
240  Log() << oss.str();
241  Log() << kFATAL << "Masked entries found in event read in when calculating the principal components for the PCA transformation." << Endl;
242  }
243 
244  UInt_t iinp = 0;
245  for( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp )
246  {
247  Float_t value = (*itInp);
248  dvec[iinp] = (Double_t)value;
249  ++iinp;
250  }
251 
252  pca.at(cls)->AddRow( dvec );
253  if (nCls > 1) pca.at(maxPCA-1)->AddRow( dvec );
254  }
255 
256  // delete possible leftovers
257  for (UInt_t i=0; i<fMeanValues.size(); i++) if (fMeanValues[i] != 0) delete fMeanValues[i];
258  for (UInt_t i=0; i<fEigenVectors.size(); i++) if (fEigenVectors[i] != 0) delete fEigenVectors[i];
259  fMeanValues.resize(maxPCA,0);
260  fEigenVectors.resize(maxPCA,0);
261 
262  for (UInt_t i=0; i<maxPCA; i++ ) {
263  pca.at(i)->MakePrincipals();
264 
265  // retrieve mean values, eigenvectors and sigmas
266  fMeanValues[i] = new TVectorD( *(pca.at(i)->GetMeanValues()) ); // need to copy since we want to own
267  fEigenVectors[i] = new TMatrixD( *(pca.at(i)->GetEigenVectors()) );
268  }
269 
270  for (UInt_t i=0; i<maxPCA; i++) delete pca.at(i);
271  delete [] dvec;
272 }
273 
274 ////////////////////////////////////////////////////////////////////////////////
275 /// Calculate the principal components from the original data vector
276 /// x, and return it in p (function extracted from TPrincipal::X2P)
277 /// It's the users responsibility to make sure that both x and p are
278 /// of the right size (i.e., memory must be allocated for p)
279 
280 void TMVA::VariablePCATransform::X2P( std::vector<Float_t>& pc, const std::vector<Float_t>& x, Int_t cls ) const
281 {
282  const Int_t nInput = x.size();
283  pc.assign(nInput,0);
284 
285  for (Int_t i = 0; i < nInput; i++) {
286  Double_t pv = 0;
287  for (Int_t j = 0; j < nInput; j++)
288  pv += (((Double_t)x.at(j)) - (*fMeanValues.at(cls))(j)) * (*fEigenVectors.at(cls))(j,i);
289  pc[i] = pv;
290  }
291 }
292 
293 ////////////////////////////////////////////////////////////////////////////////
294 /// Perform the back-transformation from the principal components
295 /// pc, and return x
296 /// It's the users responsibility to make sure that both x and pc are
297 /// of the right size (i.e., memory must be allocated for p)
298 
299 void TMVA::VariablePCATransform::P2X( std::vector<Float_t>& x, const std::vector<Float_t>& pc, Int_t cls ) const
300 {
301  const Int_t nInput = pc.size();
302  x.assign(nInput,0);
303 
304  for (Int_t i = 0; i < nInput; i++) {
305  Double_t xv = 0;
306  for (Int_t j = 0; j < nInput; j++)
307  xv += (((Double_t)pc.at(j)) * (*fEigenVectors.at(cls))(i,j) ) + (*fMeanValues.at(cls))(j);
308  x[i] = xv;
309  }
310 }
311 
312 ////////////////////////////////////////////////////////////////////////////////
313 /// write mean values to stream
314 
316 {
317  for (Int_t sbType=0; sbType<2; sbType++) {
318  o << "# PCA mean values " << std::endl;
319  const TVectorD* means = fMeanValues[sbType];
320  o << (sbType==0 ? "Signal" : "Background") << " " << means->GetNrows() << std::endl;
321  for (Int_t row = 0; row<means->GetNrows(); row++) {
322  o << std::setprecision(12) << std::setw(20) << (*means)[row];
323  }
324  o << std::endl;
325  }
326  o << "##" << std::endl;
327 
328  // write eigenvectors to stream
329  for (Int_t sbType=0; sbType<2; sbType++) {
330  o << "# PCA eigenvectors " << std::endl;
331  const TMatrixD* mat = fEigenVectors[sbType];
332  o << (sbType==0 ? "Signal" : "Background") << " " << mat->GetNrows() << " x " << mat->GetNcols() << std::endl;
333  for (Int_t row = 0; row<mat->GetNrows(); row++) {
334  for (Int_t col = 0; col<mat->GetNcols(); col++) {
335  o << std::setprecision(12) << std::setw(20) << (*mat)[row][col] << " ";
336  }
337  o << std::endl;
338  }
339  }
340  o << "##" << std::endl;
341 }
342 
343 ////////////////////////////////////////////////////////////////////////////////
344 /// create XML description of PCA transformation
345 
347  void* trfxml = gTools().AddChild(parent, "Transform");
348  gTools().AddAttr(trfxml, "Name", "PCA");
349 
351 
352  // write mean values to stream
353  for (UInt_t sbType=0; sbType<fMeanValues.size(); sbType++) {
354  void* meanxml = gTools().AddChild( trfxml, "Statistics");
355  const TVectorD* means = fMeanValues[sbType];
356  gTools().AddAttr( meanxml, "Class", (sbType==0 ? "Signal" :(sbType==1 ? "Background":"Combined")) );
357  gTools().AddAttr( meanxml, "ClassIndex", sbType );
358  gTools().AddAttr( meanxml, "NRows", means->GetNrows() );
359  TString meansdef = "";
360  for (Int_t row = 0; row<means->GetNrows(); row++)
361  meansdef += gTools().StringFromDouble((*means)[row]) + " ";
362  gTools().AddRawLine( meanxml, meansdef );
363  }
364 
365  // write eigenvectors to stream
366  for (UInt_t sbType=0; sbType<fEigenVectors.size(); sbType++) {
367  void* evxml = gTools().AddChild( trfxml, "Eigenvectors");
368  const TMatrixD* mat = fEigenVectors[sbType];
369  gTools().AddAttr( evxml, "Class", (sbType==0 ? "Signal" :(sbType==1 ? "Background":"Combined") ) );
370  gTools().AddAttr( evxml, "ClassIndex", sbType );
371  gTools().AddAttr( evxml, "NRows", mat->GetNrows() );
372  gTools().AddAttr( evxml, "NCols", mat->GetNcols() );
373  TString evdef = "";
374  for (Int_t row = 0; row<mat->GetNrows(); row++)
375  for (Int_t col = 0; col<mat->GetNcols(); col++)
376  evdef += gTools().StringFromDouble((*mat)[row][col]) + " ";
377  gTools().AddRawLine( evxml, evdef );
378  }
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// Read the transformation matrices from the xml node
383 
385 {
386  Int_t nrows, ncols;
387  UInt_t clsIdx;
388  TString classtype;
389  TString nodeName;
390 
391  Bool_t newFormat = kFALSE;
392 
393  void* inpnode = NULL;
394 
395  inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
396  if( inpnode!=NULL )
397  newFormat = kTRUE; // new xml format
398 
399  if( newFormat ){
400  // ------------- new format --------------------
401  // read input
403 
404  }
405 
406  void* ch = gTools().GetChild(trfnode);
407  while (ch) {
408  nodeName = gTools().GetName(ch);
409  if (nodeName == "Statistics") {
410  // read mean values
411  gTools().ReadAttr(ch, "Class", classtype);
412  gTools().ReadAttr(ch, "ClassIndex", clsIdx);
413  gTools().ReadAttr(ch, "NRows", nrows);
414 
415  // set the correct size
416  if (fMeanValues.size()<=clsIdx) fMeanValues.resize(clsIdx+1,0);
417  if (fMeanValues[clsIdx]==0) fMeanValues[clsIdx] = new TVectorD( nrows );
418  fMeanValues[clsIdx]->ResizeTo( nrows );
419 
420  // now read vector entries
421  std::stringstream s(gTools().GetContent(ch));
422  for (Int_t row = 0; row<nrows; row++) s >> (*fMeanValues[clsIdx])(row);
423  }
424  else if ( nodeName == "Eigenvectors" ) {
425  // Read eigenvectors
426  gTools().ReadAttr(ch, "Class", classtype);
427  gTools().ReadAttr(ch, "ClassIndex", clsIdx);
428  gTools().ReadAttr(ch, "NRows", nrows);
429  gTools().ReadAttr(ch, "NCols", ncols);
430 
431  if (fEigenVectors.size()<=clsIdx) fEigenVectors.resize(clsIdx+1,0);
432  if (fEigenVectors[clsIdx]==0) fEigenVectors[clsIdx] = new TMatrixD( nrows, ncols );
433  fEigenVectors[clsIdx]->ResizeTo( nrows, ncols );
434 
435  // now read matrix entries
436  std::stringstream s(gTools().GetContent(ch));
437  for (Int_t row = 0; row<nrows; row++)
438  for (Int_t col = 0; col<ncols; col++)
439  s >> (*fEigenVectors[clsIdx])[row][col];
440  } // done reading eigenvectors
441  ch = gTools().GetNextChild(ch);
442  }
443 
444  SetCreated();
445 }
446 
447 ////////////////////////////////////////////////////////////////////////////////
448 /// Read mean values from input stream
449 
450 void TMVA::VariablePCATransform::ReadTransformationFromStream( std::istream& istr, const TString& classname )
451 {
452  char buf[512];
453  istr.getline(buf,512);
454  TString strvar, dummy;
455  Int_t nrows(0), ncols(0);
456  UInt_t classIdx=(classname=="signal"?0:1);
457 
458  for (UInt_t i=0; i<fMeanValues.size(); i++) {
459  if (fMeanValues.at(i) != 0) delete fMeanValues.at(i);
460  if (fEigenVectors.at(i) != 0) delete fEigenVectors.at(i);
461  }
462  fMeanValues.resize(3);
463  fEigenVectors.resize(3);
464 
465  Log() << kINFO << "VariablePCATransform::ReadTransformationFromStream(): " << Endl;
466 
467  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
468  char* p = buf;
469  while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
470  if (*p=='#' || *p=='\0') {
471  istr.getline(buf,512);
472  continue; // if comment or empty line, read the next line
473  }
474  std::stringstream sstr(buf);
475  sstr >> strvar;
476  if (strvar=="signal" || strvar=="background") {
477 
478  sstr >> nrows;
479  Int_t sbType = (strvar=="signal" ? 0 : 1);
480 
481  if (fMeanValues[sbType] == 0) fMeanValues[sbType] = new TVectorD( nrows );
482  else fMeanValues[sbType]->ResizeTo( nrows );
483 
484  // now read vector entries
485  for (Int_t row = 0; row<nrows; row++) istr >> (*fMeanValues[sbType])(row);
486 
487  } // done reading vector
488 
489  istr.getline(buf,512); // reading the next line
490  }
491 
492  // Read eigenvectors from input stream
493  istr.getline(buf,512);
494  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
495  char* p = buf;
496  while(*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
497  if (*p=='#' || *p=='\0') {
498  istr.getline(buf,512);
499  continue; // if comment or empty line, read the next line
500  }
501  std::stringstream sstr(buf);
502  sstr >> strvar;
503  if (strvar=="signal" || strvar=="background") {
504 
505  // coverity[tainted_data_argument]
506  sstr >> nrows >> dummy >> ncols;
507  Int_t sbType = (strvar=="signal" ? 0 : 1);
508 
509  if (fEigenVectors[sbType] == 0) fEigenVectors[sbType] = new TMatrixD( nrows, ncols );
510  else fEigenVectors[sbType]->ResizeTo( nrows, ncols );
511 
512  // now read matrix entries
513  for (Int_t row = 0; row<fEigenVectors[sbType]->GetNrows(); row++) {
514  for (Int_t col = 0; col<fEigenVectors[sbType]->GetNcols(); col++) {
515  istr >> (*fEigenVectors[sbType])[row][col];
516  }
517  }
518 
519  } // done reading matrix
520  istr.getline(buf,512); // reading the next line
521  }
522  fMeanValues[2] = new TVectorD( *fMeanValues[classIdx] );
523  fEigenVectors[2] = new TMatrixD( *fEigenVectors[classIdx] );
524 
525  SetCreated();
526 }
527 
528 ////////////////////////////////////////////////////////////////////////////////
529 /// creates C++ code fragment of the PCA transform for inclusion in standalone C++ class
530 
531 void TMVA::VariablePCATransform::MakeFunction( std::ostream& fout, const TString& fcncName,
532  Int_t part, UInt_t trCounter, Int_t )
533 {
534  UInt_t nvar = fEigenVectors[0]->GetNrows();
535 
536  // creates a PCA transformation function
537  UInt_t numC = fMeanValues.size();
538  if (part==1) {
539  fout << std::endl;
540  fout << " void X2P_"<<trCounter<<"( const double*, double*, int ) const;" << std::endl;
541  fout << " double fMeanValues_"<<trCounter<<"["<<numC<<"]["
542  << fMeanValues[0]->GetNrows() << "];" << std::endl; // mean values
543  fout << " double fEigenVectors_"<<trCounter<<"["<<numC<<"]["
544  << fEigenVectors[0]->GetNrows() << "]["
545  << fEigenVectors[0]->GetNcols() <<"];" << std::endl; // eigenvectors
546  fout << std::endl;
547  }
548 
549  // sanity check
550  if (numC>1){
551  if (fMeanValues[0]->GetNrows() != fMeanValues[1]->GetNrows() ||
552  fEigenVectors[0]->GetNrows() != fEigenVectors[1]->GetNrows() ||
553  fEigenVectors[0]->GetNcols() != fEigenVectors[1]->GetNcols()) {
554  Log() << kFATAL << "<MakeFunction> Mismatch in vector/matrix dimensions" << Endl;
555  }
556  }
557 
558  if (part==2) {
559 
560  fout << std::endl;
561  fout << "//_______________________________________________________________________" << std::endl;
562  fout << "inline void " << fcncName << "::X2P_"<<trCounter<<"( const double* x, double* p, int index ) const" << std::endl;
563  fout << "{" << std::endl;
564  fout << " // Calculate the principal components from the original data vector" << std::endl;
565  fout << " // x, and return it in p (function extracted from TPrincipal::X2P)" << std::endl;
566  fout << " // It's the users responsibility to make sure that both x and p are" << std::endl;
567  fout << " // of the right size (i.e., memory must be allocated for p)." << std::endl;
568  fout << " const int nVar = " << nvar << ";" << std::endl;
569  fout << std::endl;
570  fout << " for (int i = 0; i < nVar; i++) {" << std::endl;
571  fout << " p[i] = 0;" << std::endl;
572  fout << " for (int j = 0; j < nVar; j++) p[i] += (x[j] - fMeanValues_"<<trCounter<<"[index][j]) * fEigenVectors_"<<trCounter<<"[index][j][i];" << std::endl;
573  fout << " }" << std::endl;
574  fout << "}" << std::endl;
575  fout << std::endl;
576  fout << "//_______________________________________________________________________" << std::endl;
577  fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
578  fout << "{" << std::endl;
579  fout << " // PCA transformation, initialisation" << std::endl;
580 
581  // fill vector of mean values
582  fout << " // initialise vector of mean values" << std::endl;
583  std::streamsize dp = fout.precision();
584  for (UInt_t index=0; index<numC; index++) {
585  for (int i=0; i<fMeanValues[index]->GetNrows(); i++) {
586  fout << " fMeanValues_"<<trCounter<<"["<<index<<"]["<<i<<"] = " << std::setprecision(12)
587  << (*fMeanValues[index])(i) << ";" << std::endl;
588  }
589  }
590 
591  // fill matrix of eigenvectors
592  fout << std::endl;
593  fout << " // initialise matrix of eigenvectors" << std::endl;
594  for (UInt_t index=0; index<numC; index++) {
595  for (int i=0; i<fEigenVectors[index]->GetNrows(); i++) {
596  for (int j=0; j<fEigenVectors[index]->GetNcols(); j++) {
597  fout << " fEigenVectors_"<<trCounter<<"["<<index<<"]["<<i<<"]["<<j<<"] = " << std::setprecision(12)
598  << (*fEigenVectors[index])(i,j) << ";" << std::endl;
599  }
600  }
601  }
602  fout << std::setprecision(dp);
603  fout << "}" << std::endl;
604  fout << std::endl;
605  fout << "//_______________________________________________________________________" << std::endl;
606  fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int cls ) const" << std::endl;
607  fout << "{" << std::endl;
608  fout << " // PCA transformation" << std::endl;
609  fout << " const int nVar = " << nvar << ";" << std::endl;
610  fout << " double *dv = new double[nVar];" << std::endl;
611  fout << " double *rv = new double[nVar];" << std::endl;
612  fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
613  fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
614  fout << " else cls = "<<(numC==1?0:2)<<";"<< std::endl;
615  fout << " }"<< std::endl;
616 
617  VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
618 
619  fout << " for (int ivar=0; ivar<nVar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)];" << std::endl;
620 
621  fout << std::endl;
622  fout << " // Perform PCA and put it into PCAed events tree" << std::endl;
623  fout << " this->X2P_"<<trCounter<<"( dv, rv, cls );" << std::endl;
624  fout << " for (int ivar=0; ivar<nVar; ivar++) iv[indicesPut.at(ivar)] = rv[ivar];" << std::endl;
625 
626  fout << std::endl;
627  fout << " delete [] dv;" << std::endl;
628  fout << " delete [] rv;" << std::endl;
629  fout << "}" << std::endl;
630  }
631 }
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void P2X(std::vector< Float_t > &, const std::vector< Float_t > &, Int_t cls) const
Perform the back-transformation from the principal components pc, and return x It&#39;s the users respons...
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
float Float_t
Definition: RtypesCore.h:53
void ReadTransformationFromStream(std::istream &, const TString &)
Read mean values from input stream.
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
Int_t GetNcols() const
Definition: TMatrixTBase.h:125
virtual void CountVariableTypes(UInt_t &nvars, UInt_t &ntgts, UInt_t &nspcts) const
count variables, targets and spectators
TVectorT.
Definition: TMatrixTBase.h:77
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Basic string class.
Definition: TString.h:131
Int_t GetNrows() const
Definition: TVectorT.h:75
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void WriteTransformationToStream(std::ostream &) const
write mean values to stream
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
virtual void AttachXMLTo(void *parent)
create XML description of PCA transformation
virtual Bool_t GetInput(const Event *event, std::vector< Float_t > &input, std::vector< Char_t > &mask, Bool_t backTransform=kFALSE) const
select the values from the event
Double_t x[n]
Definition: legend1.C:17
virtual void SetOutput(Event *event, std::vector< Float_t > &output, std::vector< Char_t > &mask, const Event *oldEvent=0, Bool_t backTransform=kFALSE) const
select the values from the event
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
UInt_t GetClass() const
Definition: Event.h:81
Class that contains all the data information.
Definition: DataSetInfo.h:60
TVectorT< Double_t > TVectorD
Definition: TVectorDfwd.h:22
Bool_t AddRawLine(void *node, const char *raw)
XML helpers.
Definition: Tools.cxx:1202
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
void CalculatePrincipalComponents(const std::vector< Event *> &)
calculate the principal components for the signal and the background data it uses the MakePrincipal m...
Linear interpolation class.
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1245
Bool_t PrepareTransformation(const std::vector< Event *> &)
calculate the principal components using the ROOT class TPrincipal and the normalization ...
unsigned int UInt_t
Definition: RtypesCore.h:42
Linear interpolation class.
void Print(std::ostream &o) const
print method
Definition: Event.cxx:352
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
Tools & gTools()
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
Int_t GetNrows() const
Definition: TMatrixTBase.h:122
const Bool_t kFALSE
Definition: RtypesCore.h:88
#define ClassImp(name)
Definition: Rtypes.h:359
double Double_t
Definition: RtypesCore.h:55
static RooMathCoreReg dummy
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
static constexpr double s
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the principal component analysis
void Initialize()
initialization of the transformation.
char Char_t
Definition: RtypesCore.h:29
std::vector< TMatrixD * > fEigenVectors
VariablePCATransform(DataSetInfo &dsi)
constructor
const char * GetName(void *node)
XML helpers.
Definition: Tools.cxx:1194
virtual ~VariablePCATransform(void)
destructor
static constexpr double pc
std::vector< TVectorD * > fMeanValues
void X2P(std::vector< Float_t > &, const std::vector< Float_t > &, Int_t cls) const
Calculate the principal components from the original data vector x, and return it in p (function extr...
const Bool_t kTRUE
Definition: RtypesCore.h:87
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the principal component analysis TODO: implementation of inverse transformation Log() << kFATAL...
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates C++ code fragment of the PCA transform for inclusion in standalone C++ class ...