Logo ROOT  
Reference Guide
Loading...
Searching...
No Matches
VariableGaussTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableGaussTransform *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * (see tmva/doc/LICENSE) *
28 **********************************************************************************/
29
30/*! \class TMVA::VariableGaussTransform
31\ingroup TMVA
32Gaussian Transformation of input variables.
33*/
34
36
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/PDF.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
43
44#include "TH1F.h"
45#include "TMath.h"
46#include "TVectorF.h"
47#include "TVectorD.h"
48
49#include <exception>
50#include <iostream>
51#include <list>
52#include <limits>
53#include <stdexcept>
54
55
56////////////////////////////////////////////////////////////////////////////////
57/// constructor
58/// can only be applied one after the other when they are created. But in order to
59/// determine the Gauss transformation
60
62: VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
67{
68 if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
69 SetName("Uniform");
70 }
71}
72
73////////////////////////////////////////////////////////////////////////////////
74/// destructor
75
80
81////////////////////////////////////////////////////////////////////////////////
82
86
87////////////////////////////////////////////////////////////////////////////////
88/// calculate the cumulative distributions
89
91{
92 Initialize();
93
94 if (!IsEnabled() || IsCreated()) return kTRUE;
95
96 Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
97
98 UInt_t inputSize = fGet.size();
99 SetNVariables(inputSize);
100
101 if (inputSize > 200) {
102 Log() << kWARNING << "----------------------------------------------------------------------------"
103 << Endl;
104 Log() << kWARNING
105 << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
106 Log() << kWARNING << "----------------------------------------------------------------------------"
107 << Endl;
108 // return kFALSE;
109 }
110
111 GetCumulativeDist( events );
112
113 SetCreated( kTRUE );
114
115 return kTRUE;
116}
117
118////////////////////////////////////////////////////////////////////////////////
119/// apply the Gauss transformation
120
122{
123 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
124 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
125 //EVT if (cls <0 || cls > GetNClasses() ) {
126 //EVT cls = GetNClasses();
127 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
128 //EVT}
129 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
130 //EVT workaround end
131
132 // get the variable vector of the current event
133 UInt_t inputSize = fGet.size();
134
135 std::vector<Float_t> input(0);
136 std::vector<Float_t> output(0);
137
138 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
139 GetInput( ev, input, mask );
140
141 std::vector<Char_t>::iterator itMask = mask.begin();
142
143 // TVectorD vec( inputSize );
144 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
145 Double_t cumulant;
146 //transformation
147 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
148
149 if ( (*itMask) ){
150 ++itMask;
151 continue;
152 }
153
154 if (0 != fCumulativePDF[ivar][cls]) {
155 // first make it flat
156 if(fTMVAVersion>TMVA_VERSION(3,9,7))
157 cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
158 else
159 cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
160 cumulant = TMath::Min(cumulant,1.-10e-10);
161 cumulant = TMath::Max(cumulant,0.+10e-10);
162
163 if (fFlatNotGauss)
164 output.push_back( cumulant );
165 else {
166 // sanity correction for out-of-range values
167 Double_t maxErfInvArgRange = 0.99999999;
168 Double_t arg = 2.0*cumulant - 1.0;
169 arg = TMath::Min(+maxErfInvArgRange,arg);
170 arg = TMath::Max(-maxErfInvArgRange,arg);
171
172 output.push_back( 1.414213562*TMath::ErfInverse(arg) );
173 }
174 }
175 }
176
177 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
179 fTransformedEvent = new Event();
180 }
181
182 SetOutput( fTransformedEvent, output, mask, ev );
183
184 return fTransformedEvent;
185}
186
187////////////////////////////////////////////////////////////////////////////////
188/// apply the inverse Gauss or inverse uniform transformation
189
191{
192 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
193 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
194 //EVT if (cls <0 || cls > GetNClasses() ) {
195 //EVT cls = GetNClasses();
196 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
197 //EVT}
198 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
199 //EVT workaround end
200
201 // get the variable vector of the current event
202 UInt_t inputSize = fGet.size();
203
204 std::vector<Float_t> input(0);
205 std::vector<Float_t> output(0);
206
207 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
208 GetInput( ev, input, mask, kTRUE );
209
210 std::vector<Char_t>::iterator itMask = mask.begin();
211
212 // TVectorD vec( inputSize );
213 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
214 Double_t invCumulant;
215 //transformation
216 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
217
218 if ( (*itMask) ){
219 ++itMask;
220 continue;
221 }
222
223 if (0 != fCumulativePDF[ivar][cls]) {
224 invCumulant = input.at(ivar);
225
226 // first de-gauss ist if gaussianized
227 if (!fFlatNotGauss)
228 invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
229
230 // then de-uniform the values
231 if(fTMVAVersion>TMVA_VERSION(4,0,0))
232 invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
233 else
234 Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
235
236 output.push_back(invCumulant);
237 }
238 }
239
241
242 SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
243
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// fill the cumulative distributions
249
250void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
251{
252 const UInt_t inputSize = fGet.size();
253 // const UInt_t nCls = GetNClasses();
254
255 // const UInt_t nvar = GetNVariables();
256 UInt_t nevt = events.size();
257
258 const UInt_t nClasses = GetNClasses();
259 UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
260
261 if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
262
263 UInt_t **nbins = new UInt_t*[numDist];
264
265 std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
266 std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
267 for (UInt_t i=0; i < numDist; i++) {
268 listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
269 vsForBinning[i] = new std::vector<Float_t> [inputSize];
270 nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
271 }
272
273 std::vector<Float_t> input;
274 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
275
276 // perform event loop
277 Float_t *sumOfWeights = new Float_t[numDist];
278 Float_t *minWeight = new Float_t[numDist];
279 Float_t *maxWeight = new Float_t[numDist];
280 for (UInt_t i=0; i<numDist; i++) {
281 sumOfWeights[i]=0;
282 minWeight[i]=10E10; // TODO: change this to std::max ?
283 maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
284 }
285 for (UInt_t ievt=0; ievt < nevt; ievt++) {
286 const Event* ev= events[ievt];
287 Int_t cls = ev->GetClass();
288 Float_t eventWeight = ev->GetWeight();
289 sumOfWeights[cls] += eventWeight;
290 if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
291 if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
292 if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
293
294 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
295 if( hasMaskedEntries ){
296 Log() << kWARNING << "Incomplete event" << Endl;
297 std::ostringstream oss;
298 ev->Print(oss);
299 Log() << oss.str();
300 Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
301 }
302
303
304 Int_t ivar = 0;
305 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
306 Float_t value = (*itInput);
307 listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
308 if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
309 ++ivar;
310 }
311 }
312 if (numDist > 1) {
313 for (UInt_t icl=0; icl<numDist-1; icl++){
314 minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
315 maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
316 }
317 }
318
319 // Sorting the lists, getting nbins ...
320 const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
321 const UInt_t nbinsmax=2000; // maximum number of bins
322
323 for (UInt_t icl=0; icl< numDist; icl++){
324 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
325 listsForBinning[icl][ivar].sort();
326 std::list< TMVA::TMVAGaussPair >::iterator it;
327 Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
328 sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
329 Float_t sum=0;
330 Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
331 Float_t lastev_value=ev_value;
332 const Float_t eps = 1.e-4;
333 vsForBinning[icl][ivar].push_back(ev_value-eps);
334 vsForBinning[icl][ivar].push_back(ev_value);
335
336 for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
337 sum+= it->GetWeight();
338 if (sum >= sumPerBin) {
339 ev_value=it->GetValue();
340 if (ev_value>lastev_value) { // protection against bin width of 0
341 vsForBinning[icl][ivar].push_back(ev_value);
342 sum = 0.;
343 lastev_value=ev_value;
344 }
345 }
346 }
347 if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
348 nbins[icl][ivar] = vsForBinning[icl][ivar].size();
349 }
350 }
351
352 delete[] sumOfWeights;
353 delete[] minWeight;
354 delete[] maxWeight;
355
356 // create histogram for the cumulative distribution.
357 fCumulativeDist.resize(inputSize);
358 for (UInt_t icls = 0; icls < numDist; icls++) {
359 for (UInt_t ivar=0; ivar < inputSize; ivar++){
360 Float_t* binnings = new Float_t[nbins[icls][ivar]];
361 //the binning for this particular histogram:
362 for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
363 binnings[k] = vsForBinning[icls][ivar][k];
364 }
365 fCumulativeDist[ivar].resize(numDist);
366 if (fCumulativeDist[ivar][icls] ) {
367 delete fCumulativeDist[ivar][icls];
368 }
369 fCumulativeDist[ivar][icls] = new TH1F(TString::Format("Cumulative_Var%d_cls%d",ivar,icls),
370 TString::Format("Cumulative_Var%d_cls%d",ivar,icls),
371 nbins[icls][ivar] -1, // class icls
372 binnings);
373 fCumulativeDist[ivar][icls]->SetDirectory(nullptr);
374 delete [] binnings;
375 }
376 }
377
378 // Deallocation
379 for (UInt_t i=0; i<numDist; i++) {
380 delete [] listsForBinning[numDist-i-1];
381 delete [] vsForBinning[numDist-i-1];
382 delete [] nbins[numDist-i-1];
383 }
384 delete [] listsForBinning;
385 delete [] vsForBinning;
386 delete [] nbins;
387
388 // perform event loop
389 std::vector<Int_t> ic(numDist);
390 for (UInt_t ievt=0; ievt<nevt; ievt++) {
391
392 const Event* ev= events[ievt];
393 Int_t cls = ev->GetClass();
394 Float_t eventWeight = ev->GetWeight();
395
396 GetInput( ev, input, mask );
397
398 Int_t ivar = 0;
399 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
400 Float_t value = (*itInput);
401 fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
402 if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
403
404 ++ivar;
405 }
406 }
407
408 // clean up
410
411 // now sum up in order to get the real cumulative distribution
412 Double_t sum = 0, total=0;
413 fCumulativePDF.resize(inputSize);
414 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
415 // fCumulativePDF.resize(ivar+1);
416 for (UInt_t icls=0; icls<numDist; icls++) {
417 (fCumulativeDist[ivar][icls])->Smooth();
418 sum = 0;
419 total = 0.;
420 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
421 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
422 if (val>0) total += val;
423 }
424 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
425 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
426 if (val>0) sum += val;
427 (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
428 }
429 // create PDf
430 fCumulativePDF[ivar].push_back(new PDF( TString::Format("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
431 }
432 }
433}
434
435////////////////////////////////////////////////////////////////////////////////
436
438{
439 Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
440}
441
442////////////////////////////////////////////////////////////////////////////////
443/// clean up of cumulative arrays
444
446 if (opt == "ALL" || opt == "PDF"){
447 for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
448 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
449 if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
450 }
451 }
452 fCumulativePDF.clear();
453 }
454 if (opt == "ALL" || opt == "Dist"){
455 for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
456 for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
457 if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
458 }
459 }
460 fCumulativeDist.clear();
461 }
462}
463////////////////////////////////////////////////////////////////////////////////
464/// create XML description of Gauss transformation
465
467 void* trfxml = gTools().AddChild(parent, "Transform");
468 gTools().AddAttr(trfxml, "Name", "Gauss");
469 gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
470
472
473 UInt_t nvar = fGet.size();
474 for (UInt_t ivar=0; ivar<nvar; ivar++) {
475 void* varxml = gTools().AddChild( trfxml, "Variable");
476 // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
477 gTools().AddAttr( varxml, "VarIndex", ivar );
478
479 if ( fCumulativePDF[ivar][0]==0 ||
480 (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
481 Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
482
483 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
484 void* pdfxml = gTools().AddChild( varxml, TString::Format("CumulativePDF_cls%d",icls));
485 (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
486 }
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// Read the transformation matrices from the xml node
492
494 // clean up first
496 TString FlatOrGauss;
497
498 gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
499
500 if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
501 else fFlatNotGauss = kFALSE;
502
503 Bool_t newFormat = kFALSE;
504
505 void* inpnode = NULL;
506
507 inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
508 if( inpnode!=NULL )
509 newFormat = kTRUE; // new xml format
510
511 void* varnode = NULL;
512 if( newFormat ){
513 // ------------- new format --------------------
514 // read input
516
517 varnode = gTools().GetNextChild(inpnode);
518 }else
519 varnode = gTools().GetChild(trfnode);
520
521 // Read the cumulative distribution
522
523 TString varname, histname, classname;
524 UInt_t ivar;
525 while(varnode) {
526 if( gTools().HasAttr(varnode,"Name") )
527 gTools().ReadAttr(varnode, "Name", varname);
528 gTools().ReadAttr(varnode, "VarIndex", ivar);
529
530 void* clsnode = gTools().GetChild( varnode);
531
532 while(clsnode) {
533 void* pdfnode = gTools().GetChild( clsnode);
534 PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
535 pdfToRead->ReadXML(pdfnode); // pdfnode
536 // push_back PDF
537 fCumulativePDF.resize( ivar+1 );
538 fCumulativePDF[ivar].push_back(pdfToRead);
539 clsnode = gTools().GetNextChild(clsnode);
540 }
541
542 varnode = gTools().GetNextChild(varnode);
543 }
544 SetCreated();
545}
546
547////////////////////////////////////////////////////////////////////////////////
548/// Read the cumulative distribution
549
551{
552 TDirectory::TContext dirCtx{nullptr}; // Don't register histograms to current directory
553 char buf[512];
554 istr.getline(buf,512);
555
556 TString strvar, dummy;
557
558 while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
559 char* p = buf;
560 while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
561 if (*p=='#' || *p=='\0') {
562 istr.getline(buf,512);
563 continue; // if comment or empty line, read the next line
564 }
565 std::stringstream sstr(buf);
566 sstr >> strvar;
567
568 if (strvar=="CumulativeHistogram") {
569 UInt_t type(0), ivar(0);
570 TString devnullS(""),hname("");
571 Int_t nbins(0);
572
573 // coverity[tainted_data_argument]
574 sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
575
576 Float_t *Binnings = new Float_t[nbins+1];
577 Float_t val;
578 istr >> devnullS; // read the line "BinBoundaries" ..
579 for (Int_t ibin=0; ibin<nbins+1; ibin++) {
580 istr >> val;
581 Binnings[ibin]=val;
582 }
583
584 if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
585 if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
586
587 TH1F * histToRead = fCumulativeDist[ivar][type];
588 if ( histToRead !=0 ) delete histToRead;
589 // recreate the cumulative histogram to be filled with the values read
590 histToRead = new TH1F( hname, hname, nbins, Binnings );
591 histToRead->SetDirectory(nullptr);
592 fCumulativeDist[ivar][type]=histToRead;
593
594 istr >> devnullS; // read the line "BinContent" ..
595 for (Int_t ibin=0; ibin<nbins; ibin++) {
596 istr >> val;
597 histToRead->SetBinContent(ibin+1,val);
598 }
599
600 PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
601 // push_back PDF
602 fCumulativePDF.resize(ivar+1);
603 fCumulativePDF[ivar].resize(type+1);
604 fCumulativePDF[ivar][type] = pdf;
605 delete [] Binnings;
606 }
607
608 // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
609 if (strvar=="Uniform") { // don't correct this spelling mistake
610 sstr >> fFlatNotGauss;
611 istr.getline(buf,512);
612 break;
613 }
614
615 istr.getline(buf,512); // reading the next line
616 }
617
618 UInt_t classIdx=(classname=="signal")?0:1;
619 for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
620 PDF* src = fCumulativePDF[ivar][classIdx];
621 fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
622 }
623
625
626 SetCreated();
627}
628
629////////////////////////////////////////////////////////////////////////////////
630
632
633 Int_t bin = h->FindBin(x);
634 bin = TMath::Max(bin,1);
635 bin = TMath::Min(bin,h->GetNbinsX());
636
637 Double_t cumulant;
638 Double_t x0, x1, y0, y1;
639 Double_t total = h->GetNbinsX()*fElementsperbin;
640 Double_t supmin = 0.5/total;
641
642 x0 = h->GetBinLowEdge(TMath::Max(bin,1));
643 x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
644
645 y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
646 y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
647
648 if (bin == 0) {
649 y0 = supmin;
650 y1 = supmin;
651 }
652 if (bin == 1) {
653 y0 = supmin;
654 }
655 if (bin > h->GetNbinsX()) {
656 y0 = 1.-supmin;
657 y1 = 1.-supmin;
658 }
659 if (bin == h->GetNbinsX()) {
660 y1 = 1.-supmin;
661 }
662
663 if (x0 == x1) {
664 cumulant = y1;
665 } else {
666 cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
667 }
668
669 if (x <= h->GetBinLowEdge(1)){
670 cumulant = supmin;
671 }
672 if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
673 cumulant = 1-supmin;
674 }
675 return cumulant;
676}
677
678////////////////////////////////////////////////////////////////////////////////
679/// prints the transformation
680
682{
683 Int_t cls = 0;
684 Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
685 cls++;
686}
687
688////////////////////////////////////////////////////////////////////////////////
689/// creates the transformation function
690///
691
692void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
693 Int_t part, UInt_t trCounter, Int_t )
694{
695 const UInt_t nvar = fGet.size();
696 UInt_t numDist = GetNClasses() + 1;
697 Int_t nBins = -1;
698 for (UInt_t icls=0; icls<numDist; icls++) {
699 for (UInt_t ivar=0; ivar<nvar; ivar++) {
700 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
701 if (nbin > nBins) nBins=nbin;
702 }
703 }
704
705 // creates the gauss transformation function
706 if (part==1) {
707 fout << std::endl;
708 fout << " int nvar;" << std::endl;
709 fout << std::endl;
710 // declare variables
711 fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
712 fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
713 fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
714 fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
715 fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
716 }
717 if (part==2) {
718 fout << std::endl;
719 fout << "#include \"math.h\"" << std::endl;
720 fout << std::endl;
721 fout << "//_______________________________________________________________________" << std::endl;
722 fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
723 fout << "{" << std::endl;
724 fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
725 fout << " nvar=" << nvar << ";" << std::endl;
726 for (UInt_t icls=0; icls<numDist; icls++) {
727 for (UInt_t ivar=0; ivar<nvar; ivar++) {
728 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
729 fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
730 }
731 }
732
733 // fill meat here
734 // loop over nvar , cls, loop over nBins
735 // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
736 for (UInt_t icls=0; icls<numDist; icls++) {
737 for (UInt_t ivar=0; ivar<nvar; ivar++) {
738 // Int_t idx = 0;
739 try{
740 // idx = fGet.at(ivar).second;
741 Char_t type = fGet.at(ivar).first;
742 if( type != 'v' ){
743 Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
744 }
745 }catch( std::out_of_range &){
746 Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
747 }
748
749 // Double_t xmn=Variables()[idx].GetMin();
750 // Double_t xmx=Variables()[idx].GetMax();
751 Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
752 Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
753
754 fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
755 fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
756 for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
757 fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
758 fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
759
760 }
761 }
762 }
763 fout << "}" << std::endl;
764 fout << std::endl;
765 fout << "//_______________________________________________________________________" << std::endl;
766 fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
767 fout << "{" << std::endl;
768 fout << " // Gauss/Uniform transformation" << std::endl;
769 fout << " int cls=clsIn;" << std::endl;
770 fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
771 fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
772 fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
773 fout << " }"<< std::endl;
774
775 fout << " // copy the variables which are going to be transformed "<< std::endl;
776 VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
777 fout << " static std::vector<double> dv; "<< std::endl;
778 fout << " dv.resize(nvar); "<< std::endl;
779 fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
780 fout << " "<< std::endl;
781 fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
782 fout << " double cumulant; "<< std::endl;
783 fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
784 fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
785 fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
786 fout << " int ibin=0; "<< std::endl;
787 fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
788 fout << " "<< std::endl;
789 fout << " if (ibin<0) { ibin=0;} "<< std::endl;
790 fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
791 fout << " int nextbin = ibin; "<< std::endl;
792 fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
793 fout << " nextbin++; "<< std::endl;
794 fout << " else "<< std::endl;
795 fout << " nextbin--; "<< std::endl;
796 fout << " "<< std::endl;
797 fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
798 fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
799 fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
800 fout << " "<< std::endl;
801 fout << " "<< std::endl;
802 fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
803 fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
804 fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
805 fout << " else { "<< std::endl;
806 fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
807 fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
808 fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
809 fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
810 fout << " double inverf=0., stp=1. ; "<< std::endl;
811 fout << " while (stp >1.e-10){; "<< std::endl;
812 fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
813 fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
814 fout << " else inverf += stp; "<< std::endl;
815 fout << " } ; "<< std::endl;
816 fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
817 fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
818 fout << " } "<< std::endl;
819 fout << " } "<< std::endl;
820 fout << " // copy the transformed variables back "<< std::endl;
821 fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
822 fout << "} "<< std::endl;
823 }
824}
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Int_t
Signed integer 4 bytes (int).
Definition RtypesCore.h:59
char Char_t
Character 1 byte (char).
Definition RtypesCore.h:51
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int).
Definition RtypesCore.h:60
bool Bool_t
Boolean (0=false, 1=true) (bool).
Definition RtypesCore.h:77
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
float Float_t
Float 4 bytes (float).
Definition RtypesCore.h:71
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
static unsigned int total
#define TMVA_VERSION(a, b, c)
Definition Version.h:48
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
virtual void SetDirectory(TDirectory *dir)
By default, when a histogram is created, it is added to the list of histogram objects in the current ...
Definition TH1.cxx:9074
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition TH1.cxx:9356
Class that contains all the data information.
Definition DataSetInfo.h:62
UInt_t GetNVariables() const
accessor to the number of variables
Definition Event.cxx:316
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:389
UInt_t GetClass() const
Definition Event.h:86
void Print(std::ostream &o) const
print method
Definition Event.cxx:359
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
const char * GetName() const override
Returns name of object.
Definition PDF.h:116
void ReadXML(void *pdfnode)
XML file reading.
Definition PDF.cxx:960
@ kSpline1
Definition PDF.h:70
@ kSpline0
Definition PDF.h:70
TString StringFromDouble(Double_t d)
string tools
Definition Tools.cxx:1208
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
VariableGaussTransform(DataSetInfo &dsi, TString strcor="")
constructor can only be applied one after the other when they are created.
void ReadFromXML(void *trfnode) override
Read the transformation matrices from the xml node.
const Event * InverseTransform(const Event *const, Int_t cls) const override
apply the inverse Gauss or inverse uniform transformation
void AttachXMLTo(void *parent) override
create XML description of Gauss transformation
const Event * Transform(const Event *const, Int_t cls) const override
apply the Gauss transformation
Bool_t PrepareTransformation(const std::vector< Event * > &) override
calculate the cumulative distributions
std::vector< std::vector< TH1F * > > fCumulativeDist
! The Cumulative distributions
virtual ~VariableGaussTransform(void)
destructor
void WriteTransformationToStream(std::ostream &) const override
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
std::vector< std::vector< PDF * > > fCumulativePDF
The cumulative PDF.
void ReadTransformationFromStream(std::istream &, const TString &) override
Read the cumulative distribution.
void PrintTransformation(std::ostream &o) override
prints the transformation
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls) override
creates the transformation function
Double_t OldCumulant(Float_t x, TH1 *h) const
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
virtual void SetOutput(Event *event, std::vector< Float_t > &output, std::vector< Char_t > &mask, const Event *oldEvent=nullptr, Bool_t backTransform=kFALSE) const
select the values from the event
virtual Bool_t GetInput(const Event *event, std::vector< Float_t > &input, std::vector< Char_t > &mask, Bool_t backTransform=kFALSE) const
select the values from the event
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
Event * fBackTransformedEvent
holds the current back-transformed event
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Event * fTransformedEvent
holds the current transformed event
VariableTransformBase(DataSetInfo &dsi, Types::EVariableTransform tf, const TString &trfName)
standard constructor
VectorOfCharAndInt fGet
get variables/targets/spectators
void SetTMVAVersion(TMVAVersion_t v)
Basic string class.
Definition TString.h:138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2385
Double_t x[n]
Definition legend1.C:17
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:249
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition TMath.cxx:190
Double_t ErfInverse(Double_t x)
Returns the inverse error function.
Definition TMath.cxx:208
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:197
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2338