94 Log() << kINFO <<
"Preparing the Principle Component (PCA) transformation..." <<
Endl;
101 if (inputSize <= 1) {
102 Log() << kFATAL <<
"Cannot perform PCA transformation for " << inputSize <<
" variable only" <<
Endl;
106 if (inputSize > 200) {
107 Log() << kINFO <<
"----------------------------------------------------------------------------"
110 <<
": More than 200 variables, will not calculate PCA!" <<
Endl;
111 Log() << kINFO <<
"----------------------------------------------------------------------------"
148 std::vector<Float_t> input;
149 std::vector<Char_t> mask;
150 std::vector<Float_t> principalComponents;
154 if( hasMaskedEntries ){
157 if( numMasked>0 && numOK>0 ){
158 Log() << kFATAL <<
"You mixed variables and targets in the decorrelation transformation. This is not possible." <<
Endl;
164 X2P( principalComponents, input, cls );
190 std::vector<Float_t> principalComponents;
191 std::vector<Char_t> mask;
192 std::vector<Float_t> output;
195 P2X( output, principalComponents, cls );
207 UInt_t nvars = 0, ntgts = 0, nspcts = 0;
209 if( nvars>0 && ntgts>0 )
210 Log() << kFATAL <<
"Variables and targets cannot be mixed in PCA transformation." <<
Endl;
216 const UInt_t maxPCA = (nCls<=1) ? nCls : nCls+1;
219 std::vector<TPrincipal*> pca(maxPCA);
225 Long64_t ievt, entries = events.size();
228 std::vector<Float_t> input;
229 std::vector<Char_t> mask;
230 for (ievt=0; ievt<entries; ievt++) {
231 const Event* ev = events[ievt];
235 if (hasMaskedEntries){
236 Log() << kWARNING <<
"Print event which triggers an error" <<
Endl;
237 std::ostringstream oss;
240 Log() << kFATAL <<
"Masked entries found in event read in when calculating the principal components for the PCA transformation." <<
Endl;
244 for( std::vector<Float_t>::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp )
251 pca.at(cls)->AddRow( dvec );
252 if (nCls > 1) pca.at(maxPCA-1)->AddRow( dvec );
261 for (
UInt_t i=0; i<maxPCA; i++ ) {
262 pca.at(i)->MakePrincipals();
269 for (
UInt_t i=0; i<maxPCA; i++)
delete pca.at(i);
281 const Int_t nInput =
x.size();
284 for (
Int_t i = 0; i < nInput; i++) {
286 for (
Int_t j = 0; j < nInput; j++)
300 const Int_t nInput = pc.size();
303 for (
Int_t i = 0; i < nInput; i++) {
305 for (
Int_t j = 0; j < nInput; j++)
316 for (
Int_t sbType=0; sbType<2; sbType++) {
317 o <<
"# PCA mean values " << std::endl;
319 o << (sbType==0 ?
"Signal" :
"Background") <<
" " << means->
GetNrows() << std::endl;
321 o << std::setprecision(12) << std::setw(20) << (*means)[row];
325 o <<
"##" << std::endl;
328 for (
Int_t sbType=0; sbType<2; sbType++) {
329 o <<
"# PCA eigenvectors " << std::endl;
331 o << (sbType==0 ?
"Signal" :
"Background") <<
" " << mat->
GetNrows() <<
" x " << mat->
GetNcols() << std::endl;
334 o << std::setprecision(12) << std::setw(20) << (*mat)[row][col] <<
" ";
339 o <<
"##" << std::endl;
355 gTools().
AddAttr( meanxml,
"Class", (sbType==0 ?
"Signal" :(sbType==1 ?
"Background":
"Combined")) );
368 gTools().
AddAttr( evxml,
"Class", (sbType==0 ?
"Signal" :(sbType==1 ?
"Background":
"Combined") ) );
392 void* inpnode = NULL;
408 if (nodeName ==
"Statistics") {
420 std::stringstream s(
gTools().GetContent(ch));
423 else if ( nodeName ==
"Eigenvectors" ) {
435 std::stringstream s(
gTools().GetContent(ch));
436 for (
Int_t row = 0; row<nrows; row++)
437 for (
Int_t col = 0; col<ncols; col++)
452 istr.getline(buf,512);
454 Int_t nrows(0), ncols(0);
455 UInt_t classIdx=(classname==
"signal"?0:1);
464 Log() << kINFO <<
"VariablePCATransform::ReadTransformationFromStream(): " <<
Endl;
466 while (!(buf[0]==
'#'&& buf[1]==
'#')) {
468 while (*p==
' ' || *p==
'\t') p++;
469 if (*p==
'#' || *p==
'\0') {
470 istr.getline(buf,512);
473 std::stringstream sstr(buf);
475 if (strvar==
"signal" || strvar==
"background") {
478 Int_t sbType = (strvar==
"signal" ? 0 : 1);
488 istr.getline(buf,512);
492 istr.getline(buf,512);
493 while (!(buf[0]==
'#'&& buf[1]==
'#')) {
495 while(*p==
' ' || *p==
'\t') p++;
496 if (*p==
'#' || *p==
'\0') {
497 istr.getline(buf,512);
500 std::stringstream sstr(buf);
502 if (strvar==
"signal" || strvar==
"background") {
505 sstr >> nrows >> dummy >> ncols;
506 Int_t sbType = (strvar==
"signal" ? 0 : 1);
519 istr.getline(buf,512);
539 fout <<
" void X2P_"<<trCounter<<
"( const double*, double*, int ) const;" << std::endl;
540 fout <<
" double fMeanValues_"<<trCounter<<
"["<<numC<<
"]["
541 <<
fMeanValues[0]->GetNrows() <<
"];" << std::endl;
542 fout <<
" double fEigenVectors_"<<trCounter<<
"["<<numC<<
"]["
553 Log() << kFATAL <<
"<MakeFunction> Mismatch in vector/matrix dimensions" <<
Endl;
560 fout <<
"//_______________________________________________________________________" << std::endl;
561 fout <<
"inline void " << fcncName <<
"::X2P_"<<trCounter<<
"( const double* x, double* p, int index ) const" << std::endl;
562 fout <<
"{" << std::endl;
563 fout <<
" // Calculate the principal components from the original data vector" << std::endl;
564 fout <<
" // x, and return it in p (function extracted from TPrincipal::X2P)" << std::endl;
565 fout <<
" // It's the users responsibility to make sure that both x and p are" << std::endl;
566 fout <<
" // of the right size (i.e., memory must be allocated for p)." << std::endl;
567 fout <<
" const int nVar = " << nvar <<
";" << std::endl;
569 fout <<
" for (int i = 0; i < nVar; i++) {" << std::endl;
570 fout <<
" p[i] = 0;" << std::endl;
571 fout <<
" for (int j = 0; j < nVar; j++) p[i] += (x[j] - fMeanValues_"<<trCounter<<
"[index][j]) * fEigenVectors_"<<trCounter<<
"[index][j][i];" << std::endl;
572 fout <<
" }" << std::endl;
573 fout <<
"}" << std::endl;
575 fout <<
"//_______________________________________________________________________" << std::endl;
576 fout <<
"inline void " << fcncName <<
"::InitTransform_"<<trCounter<<
"()" << std::endl;
577 fout <<
"{" << std::endl;
578 fout <<
" // PCA transformation, initialisation" << std::endl;
581 fout <<
" // initialise vector of mean values" << std::endl;
582 std::streamsize dp = fout.precision();
583 for (
UInt_t index=0; index<numC; index++) {
584 for (
int i=0; i<
fMeanValues[index]->GetNrows(); i++) {
585 fout <<
" fMeanValues_"<<trCounter<<
"["<<index<<
"]["<<i<<
"] = " << std::setprecision(12)
592 fout <<
" // initialise matrix of eigenvectors" << std::endl;
593 for (
UInt_t index=0; index<numC; index++) {
596 fout <<
" fEigenVectors_"<<trCounter<<
"["<<index<<
"]["<<i<<
"]["<<j<<
"] = " << std::setprecision(12)
601 fout << std::setprecision(dp);
602 fout <<
"}" << std::endl;
604 fout <<
"//_______________________________________________________________________" << std::endl;
605 fout <<
"inline void " << fcncName <<
"::Transform_"<<trCounter<<
"( std::vector<double>& iv, int cls ) const" << std::endl;
606 fout <<
"{" << std::endl;
607 fout <<
" // PCA transformation" << std::endl;
608 fout <<
" const int nVar = " << nvar <<
";" << std::endl;
609 fout <<
" double *dv = new double[nVar];" << std::endl;
610 fout <<
" double *rv = new double[nVar];" << std::endl;
611 fout <<
" if (cls < 0 || cls > "<<
GetNClasses()<<
") {"<< std::endl;
613 fout <<
" else cls = "<<(numC==1?0:2)<<
";"<< std::endl;
614 fout <<
" }"<< std::endl;
618 fout <<
" for (int ivar=0; ivar<nVar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)];" << std::endl;
621 fout <<
" // Perform PCA and put it into PCAed events tree" << std::endl;
622 fout <<
" this->X2P_"<<trCounter<<
"( dv, rv, cls );" << std::endl;
623 fout <<
" for (int ivar=0; ivar<nVar; ivar++) iv[indicesPut.at(ivar)] = rv[ivar];" << std::endl;
626 fout <<
" delete [] dv;" << std::endl;
627 fout <<
" delete [] rv;" << std::endl;
628 fout <<
"}" << std::endl;
int Int_t
Signed integer 4 bytes (int).
char Char_t
Character 1 byte (char).
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
bool Bool_t
Boolean (0=false, 1=true) (bool).
double Double_t
Double 8 bytes.
long long Long64_t
Portable signed long integer 8 bytes.
float Float_t
Float 4 bytes (float).
TMatrixT< Double_t > TMatrixD
TVectorT< Double_t > TVectorD
Class that contains all the data information.
void Print(std::ostream &o) const
print method
Singleton class for Global types used by TMVA.
MsgLogger & Endl(MsgLogger &ml)