42 #ifndef ROOT_TMVA_MsgLogger
53 TMVA::VariablePCATransform::VariablePCATransform( DataSetInfo& dsi )
54 : VariableTransformBase( dsi, Types::
kPCA, "PCA" )
87 if (!IsEnabled() || IsCreated())
return kTRUE;
89 Log() <<
kINFO <<
"Preparing the Principle Component (PCA) transformation..." <<
Endl;
91 UInt_t inputSize = fGet.size();
93 SetNVariables(inputSize);
97 Log() <<
kFATAL <<
"Cannot perform PCA transformation for " << inputSize <<
" variable only" <<
Endl;
101 if (inputSize > 200) {
102 Log() <<
kINFO <<
"----------------------------------------------------------------------------"
105 <<
": More than 200 variables, will not calculate PCA!" <<
Endl;
106 Log() <<
kINFO <<
"----------------------------------------------------------------------------"
111 CalculatePrincipalComponents( events );
123 if (!IsCreated())
return 0;
134 if (cls < 0 || cls >= (
int) fMeanValues.size()) cls = fMeanValues.size()-1;
139 if (fTransformedEvent==0 ) {
140 fTransformedEvent =
new Event();
143 std::vector<Float_t> input;
144 std::vector<Char_t> mask;
145 std::vector<Float_t> principalComponents;
147 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
149 if( hasMaskedEntries ){
152 if( numMasked>0 && numOK>0 ){
153 Log() <<
kFATAL <<
"You mixed variables and targets in the decorrelation transformation. This is not possible." <<
Endl;
155 SetOutput( fTransformedEvent, input, mask, ev );
156 return fTransformedEvent;
159 X2P( principalComponents, input, cls );
160 SetOutput( fTransformedEvent, principalComponents, mask, ev );
162 return fTransformedEvent;
172 if (!IsCreated())
return 0;
174 const UInt_t nCls = GetNClasses();
180 if (cls < 0 ||
UInt_t(cls) > nCls) cls = (fMeanValues.size()==1?0:2);
183 if (fBackTransformedEvent==0 ) fBackTransformedEvent =
new Event();
185 std::vector<Float_t> principalComponents;
186 std::vector<Char_t> mask;
187 std::vector<Float_t>
output;
189 GetInput( ev, principalComponents, mask,
kTRUE );
190 P2X( output, principalComponents, cls );
191 SetOutput( fBackTransformedEvent, output, mask, ev,
kTRUE );
193 return fBackTransformedEvent;
202 UInt_t nvars = 0, ntgts = 0, nspcts = 0;
203 CountVariableTypes( nvars, ntgts, nspcts );
204 if( nvars>0 && ntgts>0 )
205 Log() <<
kFATAL <<
"Variables and targets cannot be mixed in PCA transformation." <<
Endl;
207 const Int_t inputSize = fGet.size();
210 const UInt_t nCls = GetNClasses();
211 const UInt_t maxPCA = (nCls<=1) ? nCls : nCls+1;
214 std::vector<TPrincipal*> pca(maxPCA);
220 Long64_t ievt, entries = events.size();
223 std::vector<Float_t> input;
224 std::vector<Char_t> mask;
225 for (ievt=0; ievt<entries; ievt++) {
226 const Event* ev = events[ievt];
229 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
230 if (hasMaskedEntries){
233 Log() <<
kFATAL <<
"Masked entries found in event read in when calculating the principal components for the PCA transformation." <<
Endl;
237 for( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp )
244 pca.at(cls)->AddRow( dvec );
245 if (nCls > 1) pca.at(maxPCA-1)->AddRow( dvec );
249 for (
UInt_t i=0; i<fMeanValues.size(); i++)
if (fMeanValues[i] != 0)
delete fMeanValues[i];
250 for (
UInt_t i=0; i<fEigenVectors.size(); i++)
if (fEigenVectors[i] != 0)
delete fEigenVectors[i];
251 fMeanValues.resize(maxPCA,0);
252 fEigenVectors.resize(maxPCA,0);
254 for (
UInt_t i=0; i<maxPCA; i++ ) {
255 pca.at(i)->MakePrincipals();
258 fMeanValues[i] =
new TVectorD( *(pca.at(i)->GetMeanValues()) );
259 fEigenVectors[i] =
new TMatrixD( *(pca.at(i)->GetEigenVectors()) );
262 for (
UInt_t i=0; i<maxPCA; i++)
delete pca.at(i);
274 const Int_t nInput = x.size();
277 for (
Int_t i = 0; i < nInput; i++) {
279 for (
Int_t j = 0; j < nInput; j++)
280 pv += (((
Double_t)x.at(j)) - (*fMeanValues.at(cls))(j)) * (*fEigenVectors.at(cls))(j,i);
293 const Int_t nInput = pc.size();
296 for (
Int_t i = 0; i < nInput; i++) {
298 for (
Int_t j = 0; j < nInput; j++)
299 xv += (((
Double_t)pc.at(j)) * (*fEigenVectors.at(cls))(i,j) ) + (*fMeanValues.at(cls))(j);
309 for (
Int_t sbType=0; sbType<2; sbType++) {
310 o <<
"# PCA mean values " << std::endl;
311 const TVectorD* means = fMeanValues[sbType];
312 o << (sbType==0 ?
"Signal" :
"Background") <<
" " << means->
GetNrows() << std::endl;
314 o << std::setprecision(12) << std::setw(20) << (*means)[row];
318 o <<
"##" << std::endl;
321 for (
Int_t sbType=0; sbType<2; sbType++) {
322 o <<
"# PCA eigenvectors " << std::endl;
323 const TMatrixD* mat = fEigenVectors[sbType];
324 o << (sbType==0 ?
"Signal" :
"Background") <<
" " << mat->
GetNrows() <<
" x " << mat->
GetNcols() << std::endl;
327 o << std::setprecision(12) << std::setw(20) << (*mat)[row][col] <<
" ";
332 o <<
"##" << std::endl;
345 for (
UInt_t sbType=0; sbType<fMeanValues.size(); sbType++) {
347 const TVectorD* means = fMeanValues[sbType];
348 gTools().
AddAttr( meanxml,
"Class", (sbType==0 ?
"Signal" :(sbType==1 ?
"Background":
"Combined")) );
358 for (
UInt_t sbType=0; sbType<fEigenVectors.size(); sbType++) {
360 const TMatrixD* mat = fEigenVectors[sbType];
361 gTools().
AddAttr( evxml,
"Class", (sbType==0 ?
"Signal" :(sbType==1 ?
"Background":
"Combined") ) );
385 void* inpnode =
NULL;
401 if (nodeName ==
"Statistics") {
408 if (fMeanValues.size()<=clsIdx) fMeanValues.resize(clsIdx+1,0);
409 if (fMeanValues[clsIdx]==0) fMeanValues[clsIdx] =
new TVectorD( nrows );
410 fMeanValues[clsIdx]->ResizeTo( nrows );
413 std::stringstream s(
gTools().GetContent(ch));
414 for (
Int_t row = 0; row<nrows; row++) s >> (*fMeanValues[clsIdx])(row);
416 else if ( nodeName ==
"Eigenvectors" ) {
423 if (fEigenVectors.size()<=clsIdx) fEigenVectors.resize(clsIdx+1,0);
424 if (fEigenVectors[clsIdx]==0) fEigenVectors[clsIdx] =
new TMatrixD( nrows, ncols );
425 fEigenVectors[clsIdx]->ResizeTo( nrows, ncols );
428 std::stringstream s(
gTools().GetContent(ch));
429 for (
Int_t row = 0; row<nrows; row++)
430 for (
Int_t col = 0; col<ncols; col++)
431 s >> (*fEigenVectors[clsIdx])[row][col];
445 istr.getline(buf,512);
447 Int_t nrows(0), ncols(0);
448 UInt_t classIdx=(classname==
"signal"?0:1);
450 for (
UInt_t i=0; i<fMeanValues.size(); i++) {
451 if (fMeanValues.at(i) != 0)
delete fMeanValues.at(i);
452 if (fEigenVectors.at(i) != 0)
delete fEigenVectors.at(i);
454 fMeanValues.resize(3);
455 fEigenVectors.resize(3);
457 Log() <<
kINFO <<
"VariablePCATransform::ReadTransformationFromStream(): " <<
Endl;
459 while (!(buf[0]==
'#'&& buf[1]==
'#')) {
461 while (*p==
' ' || *p==
'\t') p++;
462 if (*p==
'#' || *p==
'\0') {
463 istr.getline(buf,512);
466 std::stringstream sstr(buf);
468 if (strvar==
"signal" || strvar==
"background") {
471 Int_t sbType = (strvar==
"signal" ? 0 : 1);
473 if (fMeanValues[sbType] == 0) fMeanValues[sbType] =
new TVectorD( nrows );
474 else fMeanValues[sbType]->ResizeTo( nrows );
477 for (
Int_t row = 0; row<nrows; row++) istr >> (*fMeanValues[sbType])(row);
481 istr.getline(buf,512);
485 istr.getline(buf,512);
486 while (!(buf[0]==
'#'&& buf[1]==
'#')) {
488 while(*p==
' ' || *p==
'\t') p++;
489 if (*p==
'#' || *p==
'\0') {
490 istr.getline(buf,512);
493 std::stringstream sstr(buf);
495 if (strvar==
"signal" || strvar==
"background") {
498 sstr >> nrows >> dummy >> ncols;
499 Int_t sbType = (strvar==
"signal" ? 0 : 1);
501 if (fEigenVectors[sbType] == 0) fEigenVectors[sbType] =
new TMatrixD( nrows, ncols );
502 else fEigenVectors[sbType]->ResizeTo( nrows, ncols );
505 for (
Int_t row = 0; row<fEigenVectors[sbType]->GetNrows(); row++) {
506 for (
Int_t col = 0; col<fEigenVectors[sbType]->GetNcols(); col++) {
507 istr >> (*fEigenVectors[sbType])[row][col];
512 istr.getline(buf,512);
514 fMeanValues[2] =
new TVectorD( *fMeanValues[classIdx] );
515 fEigenVectors[2] =
new TMatrixD( *fEigenVectors[classIdx] );
526 UInt_t nvar = fEigenVectors[0]->GetNrows();
529 UInt_t numC = fMeanValues.size();
532 fout <<
" void X2P_"<<trCounter<<
"( const double*, double*, int ) const;" << std::endl;
533 fout <<
" double fMeanValues_"<<trCounter<<
"["<<numC<<
"]["
534 << fMeanValues[0]->GetNrows() <<
"];" << std::endl;
535 fout <<
" double fEigenVectors_"<<trCounter<<
"["<<numC<<
"]["
536 << fEigenVectors[0]->GetNrows() <<
"]["
537 << fEigenVectors[0]->GetNcols() <<
"];" << std::endl;
543 if (fMeanValues[0]->GetNrows() != fMeanValues[1]->GetNrows() ||
544 fEigenVectors[0]->GetNrows() != fEigenVectors[1]->GetNrows() ||
545 fEigenVectors[0]->GetNcols() != fEigenVectors[1]->GetNcols()) {
546 Log() <<
kFATAL <<
"<MakeFunction> Mismatch in vector/matrix dimensions" <<
Endl;
553 fout <<
"//_______________________________________________________________________" << std::endl;
554 fout <<
"inline void " << fcncName <<
"::X2P_"<<trCounter<<
"( const double* x, double* p, int index ) const" << std::endl;
555 fout <<
"{" << std::endl;
556 fout <<
" // Calculate the principal components from the original data vector" << std::endl;
557 fout <<
" // x, and return it in p (function extracted from TPrincipal::X2P)" << std::endl;
558 fout <<
" // It's the users responsibility to make sure that both x and p are" << std::endl;
559 fout <<
" // of the right size (i.e., memory must be allocated for p)." << std::endl;
560 fout <<
" const int nVar = " << nvar <<
";" << std::endl;
562 fout <<
" for (int i = 0; i < nVar; i++) {" << std::endl;
563 fout <<
" p[i] = 0;" << std::endl;
564 fout <<
" for (int j = 0; j < nVar; j++) p[i] += (x[j] - fMeanValues_"<<trCounter<<
"[index][j]) * fEigenVectors_"<<trCounter<<
"[index][j][i];" << std::endl;
565 fout <<
" }" << std::endl;
566 fout <<
"}" << std::endl;
568 fout <<
"//_______________________________________________________________________" << std::endl;
569 fout <<
"inline void " << fcncName <<
"::InitTransform_"<<trCounter<<
"()" << std::endl;
570 fout <<
"{" << std::endl;
571 fout <<
" // PCA transformation, initialisation" << std::endl;
574 fout <<
" // initialise vector of mean values" << std::endl;
575 std::streamsize dp = fout.precision();
576 for (
UInt_t index=0; index<numC; index++) {
577 for (
int i=0; i<fMeanValues[index]->GetNrows(); i++) {
578 fout <<
" fMeanValues_"<<trCounter<<
"["<<index<<
"]["<<i<<
"] = " << std::setprecision(12)
579 << (*fMeanValues[index])(i) <<
";" << std::endl;
585 fout <<
" // initialise matrix of eigenvectors" << std::endl;
586 for (
UInt_t index=0; index<numC; index++) {
587 for (
int i=0; i<fEigenVectors[index]->GetNrows(); i++) {
588 for (
int j=0; j<fEigenVectors[index]->GetNcols(); j++) {
589 fout <<
" fEigenVectors_"<<trCounter<<
"["<<index<<
"]["<<i<<
"]["<<j<<
"] = " << std::setprecision(12)
590 << (*fEigenVectors[index])(i,j) <<
";" << std::endl;
594 fout << std::setprecision(dp);
595 fout <<
"}" << std::endl;
597 fout <<
"//_______________________________________________________________________" << std::endl;
598 fout <<
"inline void " << fcncName <<
"::Transform_"<<trCounter<<
"( std::vector<double>& iv, int cls ) const" << std::endl;
599 fout <<
"{" << std::endl;
600 fout <<
" // PCA transformation" << std::endl;
601 fout <<
" const int nVar = " << nvar <<
";" << std::endl;
602 fout <<
" double *dv = new double[nVar];" << std::endl;
603 fout <<
" double *rv = new double[nVar];" << std::endl;
604 fout <<
" if (cls < 0 || cls > "<<GetNClasses()<<
") {"<< std::endl;
605 fout <<
" if ("<<GetNClasses()<<
" > 1 ) cls = "<<GetNClasses()<<
";"<< std::endl;
606 fout <<
" else cls = "<<(numC==1?0:2)<<
";"<< std::endl;
607 fout <<
" }"<< std::endl;
611 fout <<
" for (int ivar=0; ivar<nVar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)];" << std::endl;
614 fout <<
" // Perform PCA and put it into PCAed events tree" << std::endl;
615 fout <<
" this->X2P_"<<trCounter<<
"( dv, rv, cls );" << std::endl;
616 fout <<
" for (int ivar=0; ivar<nVar; ivar++) iv[indicesPut.at(ivar)] = rv[ivar];" << std::endl;
619 fout <<
" delete [] dv;" << std::endl;
620 fout <<
" delete [] rv;" << std::endl;
621 fout <<
"}" << std::endl;
Principal Components Analysis (PCA)
MsgLogger & Endl(MsgLogger &ml)
TVectorT< Double_t > TVectorD
TMatrixT< Double_t > TMatrixD
void Initialize(Bool_t useTMVAStyle=kTRUE)
void Print(std::ostream &o) const
print method
static RooMathCoreReg dummy
Abstract ClassifierFactory template that handles arbitrary types.
static void output(int code)