Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
VariableGaussTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableGaussTransform *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * U. of Bonn, Germany *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * (see tmva/doc/LICENSE) *
28 **********************************************************************************/
29
30/*! \class TMVA::VariableGaussTransform
31\ingroup TMVA
32Gaussian Transformation of input variables.
33*/
34
36
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/PDF.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
43
44#include "TH1F.h"
45#include "TMath.h"
46#include "TVectorF.h"
47#include "TVectorD.h"
48
49#include <exception>
50#include <iostream>
51#include <list>
52#include <limits>
53#include <stdexcept>
54
55
56////////////////////////////////////////////////////////////////////////////////
57/// constructor
58/// can only be applied one after the other when they are created. But in order to
59/// determine the Gauss transformation
60
63 fFlatNotGauss(kFALSE),
64 fPdfMinSmooth(0),
65 fPdfMaxSmooth(0),
66 fElementsperbin(0)
67{
68 if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
69 SetName("Uniform");
70 }
71}
72
73////////////////////////////////////////////////////////////////////////////////
74/// destructor
75
77{
78 CleanUpCumulativeArrays();
79}
80
81////////////////////////////////////////////////////////////////////////////////
82
86
87////////////////////////////////////////////////////////////////////////////////
88/// calculate the cumulative distributions
89
91{
92 Initialize();
93
94 if (!IsEnabled() || IsCreated()) return kTRUE;
95
96 Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
97
98 UInt_t inputSize = fGet.size();
99 SetNVariables(inputSize);
100
101 if (inputSize > 200) {
102 Log() << kWARNING << "----------------------------------------------------------------------------"
103 << Endl;
104 Log() << kWARNING
105 << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
106 Log() << kWARNING << "----------------------------------------------------------------------------"
107 << Endl;
108 // return kFALSE;
109 }
110
111 GetCumulativeDist( events );
112
113 SetCreated( kTRUE );
114
115 return kTRUE;
116}
117
118////////////////////////////////////////////////////////////////////////////////
119/// apply the Gauss transformation
120
122{
123 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
124 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
125 //EVT if (cls <0 || cls > GetNClasses() ) {
126 //EVT cls = GetNClasses();
127 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
128 //EVT}
129 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
130 //EVT workaround end
131
132 // get the variable vector of the current event
133 UInt_t inputSize = fGet.size();
134
135 std::vector<Float_t> input(0);
136 std::vector<Float_t> output(0);
137
138 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
139 GetInput( ev, input, mask );
140
141 std::vector<Char_t>::iterator itMask = mask.begin();
142
143 // TVectorD vec( inputSize );
144 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
146 //transformation
147 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
148
149 if ( (*itMask) ){
150 ++itMask;
151 continue;
152 }
153
154 if (0 != fCumulativePDF[ivar][cls]) {
155 // first make it flat
156 if(fTMVAVersion>TMVA_VERSION(3,9,7))
157 cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
158 else
159 cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
160 cumulant = TMath::Min(cumulant,1.-10e-10);
161 cumulant = TMath::Max(cumulant,0.+10e-10);
162
163 if (fFlatNotGauss)
164 output.push_back( cumulant );
165 else {
166 // sanity correction for out-of-range values
167 Double_t maxErfInvArgRange = 0.99999999;
168 Double_t arg = 2.0*cumulant - 1.0;
169 arg = TMath::Min(+maxErfInvArgRange,arg);
170 arg = TMath::Max(-maxErfInvArgRange,arg);
171
172 output.push_back( 1.414213562*TMath::ErfInverse(arg) );
173 }
174 }
175 }
176
177 if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
178 if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
179 fTransformedEvent = new Event();
180 }
181
182 SetOutput( fTransformedEvent, output, mask, ev );
183
184 return fTransformedEvent;
185}
186
187////////////////////////////////////////////////////////////////////////////////
188/// apply the inverse Gauss or inverse uniform transformation
189
191{
192 if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
193 //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
194 //EVT if (cls <0 || cls > GetNClasses() ) {
195 //EVT cls = GetNClasses();
196 //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
197 //EVT}
198 if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
199 //EVT workaround end
200
201 // get the variable vector of the current event
202 UInt_t inputSize = fGet.size();
203
204 std::vector<Float_t> input(0);
205 std::vector<Float_t> output(0);
206
207 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
208 GetInput( ev, input, mask, kTRUE );
209
210 std::vector<Char_t>::iterator itMask = mask.begin();
211
212 // TVectorD vec( inputSize );
213 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
215 //transformation
216 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
217
218 if ( (*itMask) ){
219 ++itMask;
220 continue;
221 }
222
223 if (0 != fCumulativePDF[ivar][cls]) {
224 invCumulant = input.at(ivar);
225
226 // first de-gauss ist if gaussianized
227 if (!fFlatNotGauss)
228 invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
229
230 // then de-uniform the values
231 if(fTMVAVersion>TMVA_VERSION(4,0,0))
232 invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
233 else
234 Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
235
236 output.push_back(invCumulant);
237 }
238 }
239
240 if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
241
242 SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
243
244 return fBackTransformedEvent;
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// fill the cumulative distributions
249
250void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
251{
252 const UInt_t inputSize = fGet.size();
253 // const UInt_t nCls = GetNClasses();
254
255 // const UInt_t nvar = GetNVariables();
256 UInt_t nevt = events.size();
257
258 const UInt_t nClasses = GetNClasses();
259 UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
260
261 if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
262
263 UInt_t **nbins = new UInt_t*[numDist];
264
265 std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
266 std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
267 for (UInt_t i=0; i < numDist; i++) {
268 listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
269 vsForBinning[i] = new std::vector<Float_t> [inputSize];
270 nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
271 }
272
273 std::vector<Float_t> input;
274 std::vector<Char_t> mask; // entries with kTRUE must not be transformed
275
276 // perform event loop
280 for (UInt_t i=0; i<numDist; i++) {
281 sumOfWeights[i]=0;
282 minWeight[i]=10E10; // TODO: change this to std::max ?
283 maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
284 }
285 for (UInt_t ievt=0; ievt < nevt; ievt++) {
286 const Event* ev= events[ievt];
287 Int_t cls = ev->GetClass();
288 Float_t eventWeight = ev->GetWeight();
289 sumOfWeights[cls] += eventWeight;
290 if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
291 if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
292 if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
293
294 Bool_t hasMaskedEntries = GetInput( ev, input, mask );
295 if( hasMaskedEntries ){
296 Log() << kWARNING << "Incomplete event" << Endl;
297 std::ostringstream oss;
298 ev->Print(oss);
299 Log() << oss.str();
300 Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
301 }
302
303
304 Int_t ivar = 0;
305 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
306 Float_t value = (*itInput);
307 listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
308 if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
309 ++ivar;
310 }
311 }
312 if (numDist > 1) {
313 for (UInt_t icl=0; icl<numDist-1; icl++){
316 }
317 }
318
319 // Sorting the lists, getting nbins ...
320 const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
321 const UInt_t nbinsmax=2000; // maximum number of bins
322
323 for (UInt_t icl=0; icl< numDist; icl++){
324 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
325 listsForBinning[icl][ivar].sort();
326 std::list< TMVA::TMVAGaussPair >::iterator it;
329 Float_t sum=0;
332 const Float_t eps = 1.e-4;
333 vsForBinning[icl][ivar].push_back(ev_value-eps);
334 vsForBinning[icl][ivar].push_back(ev_value);
335
336 for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
337 sum+= it->GetWeight();
338 if (sum >= sumPerBin) {
339 ev_value=it->GetValue();
340 if (ev_value>lastev_value) { // protection against bin width of 0
341 vsForBinning[icl][ivar].push_back(ev_value);
342 sum = 0.;
344 }
345 }
346 }
347 if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
348 nbins[icl][ivar] = vsForBinning[icl][ivar].size();
349 }
350 }
351
352 delete[] sumOfWeights;
353 delete[] minWeight;
354 delete[] maxWeight;
355
356 // create histogram for the cumulative distribution.
357 fCumulativeDist.resize(inputSize);
358 for (UInt_t icls = 0; icls < numDist; icls++) {
359 for (UInt_t ivar=0; ivar < inputSize; ivar++){
360 Float_t* binnings = new Float_t[nbins[icls][ivar]];
361 //the binning for this particular histogram:
362 for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
363 binnings[k] = vsForBinning[icls][ivar][k];
364 }
365 fCumulativeDist[ivar].resize(numDist);
366 if (fCumulativeDist[ivar][icls] ) {
367 delete fCumulativeDist[ivar][icls];
368 }
369 fCumulativeDist[ivar][icls] = new TH1F(TString::Format("Cumulative_Var%d_cls%d",ivar,icls),
370 TString::Format("Cumulative_Var%d_cls%d",ivar,icls),
371 nbins[icls][ivar] -1, // class icls
372 binnings);
373 fCumulativeDist[ivar][icls]->SetDirectory(nullptr);
374 delete [] binnings;
375 }
376 }
377
378 // Deallocation
379 for (UInt_t i=0; i<numDist; i++) {
380 delete [] listsForBinning[numDist-i-1];
381 delete [] vsForBinning[numDist-i-1];
382 delete [] nbins[numDist-i-1];
383 }
384 delete [] listsForBinning;
385 delete [] vsForBinning;
386 delete [] nbins;
387
388 // perform event loop
389 std::vector<Int_t> ic(numDist);
390 for (UInt_t ievt=0; ievt<nevt; ievt++) {
391
392 const Event* ev= events[ievt];
393 Int_t cls = ev->GetClass();
394 Float_t eventWeight = ev->GetWeight();
395
396 GetInput( ev, input, mask );
397
398 Int_t ivar = 0;
399 for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
400 Float_t value = (*itInput);
401 fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
402 if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
403
404 ++ivar;
405 }
406 }
407
408 // clean up
409 CleanUpCumulativeArrays("PDF");
410
411 // now sum up in order to get the real cumulative distribution
412 Double_t sum = 0, total=0;
413 fCumulativePDF.resize(inputSize);
414 for (UInt_t ivar=0; ivar<inputSize; ivar++) {
415 // fCumulativePDF.resize(ivar+1);
416 for (UInt_t icls=0; icls<numDist; icls++) {
417 (fCumulativeDist[ivar][icls])->Smooth();
418 sum = 0;
419 total = 0.;
420 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
421 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
422 if (val>0) total += val;
423 }
424 for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
425 Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
426 if (val>0) sum += val;
427 (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
428 }
429 // create PDf
430 fCumulativePDF[ivar].push_back(new PDF( TString::Format("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
431 }
432 }
433}
434
435////////////////////////////////////////////////////////////////////////////////
436
438{
439 Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
440}
441
442////////////////////////////////////////////////////////////////////////////////
443/// clean up of cumulative arrays
444
446 if (opt == "ALL" || opt == "PDF"){
447 for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
448 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
449 if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
450 }
451 }
452 fCumulativePDF.clear();
453 }
454 if (opt == "ALL" || opt == "Dist"){
455 for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
456 for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
457 if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
458 }
459 }
460 fCumulativeDist.clear();
461 }
462}
463////////////////////////////////////////////////////////////////////////////////
464/// create XML description of Gauss transformation
465
467 void* trfxml = gTools().AddChild(parent, "Transform");
468 gTools().AddAttr(trfxml, "Name", "Gauss");
469 gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
470
472
473 UInt_t nvar = fGet.size();
474 for (UInt_t ivar=0; ivar<nvar; ivar++) {
475 void* varxml = gTools().AddChild( trfxml, "Variable");
476 // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
477 gTools().AddAttr( varxml, "VarIndex", ivar );
478
479 if ( fCumulativePDF[ivar][0]==0 ||
480 (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
481 Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
482
483 for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
484 void* pdfxml = gTools().AddChild( varxml, TString::Format("CumulativePDF_cls%d",icls));
485 (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
486 }
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// Read the transformation matrices from the xml node
492
494 // clean up first
495 CleanUpCumulativeArrays();
497
498 gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
499
500 if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
501 else fFlatNotGauss = kFALSE;
502
504
505 void* inpnode = NULL;
506
507 inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
508 if( inpnode!=NULL )
509 newFormat = kTRUE; // new xml format
510
511 void* varnode = NULL;
512 if( newFormat ){
513 // ------------- new format --------------------
514 // read input
516
518 }else
520
521 // Read the cumulative distribution
522
523 TString varname, histname, classname;
524 UInt_t ivar;
525 while(varnode) {
526 if( gTools().HasAttr(varnode,"Name") )
527 gTools().ReadAttr(varnode, "Name", varname);
528 gTools().ReadAttr(varnode, "VarIndex", ivar);
529
530 void* clsnode = gTools().GetChild( varnode);
531
532 while(clsnode) {
533 void* pdfnode = gTools().GetChild( clsnode);
534 PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
535 pdfToRead->ReadXML(pdfnode); // pdfnode
536 // push_back PDF
537 fCumulativePDF.resize( ivar+1 );
538 fCumulativePDF[ivar].push_back(pdfToRead);
540 }
541
543 }
544 SetCreated();
545}
546
547////////////////////////////////////////////////////////////////////////////////
548/// Read the cumulative distribution
549
551{
552 TDirectory::TContext dirCtx{nullptr}; // Don't register histograms to current directory
553 char buf[512];
554 istr.getline(buf,512);
555
556 TString strvar, dummy;
557
558 while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
559 char* p = buf;
560 while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
561 if (*p=='#' || *p=='\0') {
562 istr.getline(buf,512);
563 continue; // if comment or empty line, read the next line
564 }
565 std::stringstream sstr(buf);
566 sstr >> strvar;
567
568 if (strvar=="CumulativeHistogram") {
569 UInt_t type(0), ivar(0);
570 TString devnullS(""),hname("");
571 Int_t nbins(0);
572
573 // coverity[tainted_data_argument]
574 sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
575
576 Float_t *Binnings = new Float_t[nbins+1];
577 Float_t val;
578 istr >> devnullS; // read the line "BinBoundaries" ..
579 for (Int_t ibin=0; ibin<nbins+1; ibin++) {
580 istr >> val;
581 Binnings[ibin]=val;
582 }
583
584 if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
585 if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
586
587 TH1F * histToRead = fCumulativeDist[ivar][type];
588 if ( histToRead !=0 ) delete histToRead;
589 // recreate the cumulative histogram to be filled with the values read
590 histToRead = new TH1F( hname, hname, nbins, Binnings );
591 histToRead->SetDirectory(nullptr);
592 fCumulativeDist[ivar][type]=histToRead;
593
594 istr >> devnullS; // read the line "BinContent" ..
595 for (Int_t ibin=0; ibin<nbins; ibin++) {
596 istr >> val;
597 histToRead->SetBinContent(ibin+1,val);
598 }
599
600 PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
601 // push_back PDF
602 fCumulativePDF.resize(ivar+1);
603 fCumulativePDF[ivar].resize(type+1);
604 fCumulativePDF[ivar][type] = pdf;
605 delete [] Binnings;
606 }
607
608 // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
609 if (strvar=="Uniform") { // don't correct this spelling mistake
610 sstr >> fFlatNotGauss;
611 istr.getline(buf,512);
612 break;
613 }
614
615 istr.getline(buf,512); // reading the next line
616 }
617
618 UInt_t classIdx=(classname=="signal")?0:1;
619 for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
620 PDF* src = fCumulativePDF[ivar][classIdx];
621 fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
622 }
623
624 SetTMVAVersion(TMVA_VERSION(3,9,7));
625
626 SetCreated();
627}
628
629////////////////////////////////////////////////////////////////////////////////
630
632
633 Int_t bin = h->FindBin(x);
634 bin = TMath::Max(bin,1);
635 bin = TMath::Min(bin,h->GetNbinsX());
636
638 Double_t x0, x1, y0, y1;
639 Double_t total = h->GetNbinsX()*fElementsperbin;
640 Double_t supmin = 0.5/total;
641
642 x0 = h->GetBinLowEdge(TMath::Max(bin,1));
643 x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
644
645 y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
646 y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
647
648 if (bin == 0) {
649 y0 = supmin;
650 y1 = supmin;
651 }
652 if (bin == 1) {
653 y0 = supmin;
654 }
655 if (bin > h->GetNbinsX()) {
656 y0 = 1.-supmin;
657 y1 = 1.-supmin;
658 }
659 if (bin == h->GetNbinsX()) {
660 y1 = 1.-supmin;
661 }
662
663 if (x0 == x1) {
664 cumulant = y1;
665 } else {
666 cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
667 }
668
669 if (x <= h->GetBinLowEdge(1)){
671 }
672 if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
673 cumulant = 1-supmin;
674 }
675 return cumulant;
676}
677
678////////////////////////////////////////////////////////////////////////////////
679/// prints the transformation
680
682{
683 Int_t cls = 0;
684 Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
685 cls++;
686}
687
688////////////////////////////////////////////////////////////////////////////////
689/// creates the transformation function
690///
691
694{
695 const UInt_t nvar = fGet.size();
696 UInt_t numDist = GetNClasses() + 1;
697 Int_t nBins = -1;
698 for (UInt_t icls=0; icls<numDist; icls++) {
699 for (UInt_t ivar=0; ivar<nvar; ivar++) {
700 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
701 if (nbin > nBins) nBins=nbin;
702 }
703 }
704
705 // creates the gauss transformation function
706 if (part==1) {
707 fout << std::endl;
708 fout << " int nvar;" << std::endl;
709 fout << std::endl;
710 // declare variables
711 fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
712 fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
713 fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
714 fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
715 fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
716 }
717 if (part==2) {
718 fout << std::endl;
719 fout << "#include \"math.h\"" << std::endl;
720 fout << std::endl;
721 fout << "//_______________________________________________________________________" << std::endl;
722 fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
723 fout << "{" << std::endl;
724 fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
725 fout << " nvar=" << nvar << ";" << std::endl;
726 for (UInt_t icls=0; icls<numDist; icls++) {
727 for (UInt_t ivar=0; ivar<nvar; ivar++) {
728 Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
729 fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
730 }
731 }
732
733 // fill meat here
734 // loop over nvar , cls, loop over nBins
735 // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
736 for (UInt_t icls=0; icls<numDist; icls++) {
737 for (UInt_t ivar=0; ivar<nvar; ivar++) {
738 // Int_t idx = 0;
739 try{
740 // idx = fGet.at(ivar).second;
741 Char_t type = fGet.at(ivar).first;
742 if( type != 'v' ){
743 Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
744 }
745 }catch( std::out_of_range &){
746 Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
747 }
748
749 // Double_t xmn=Variables()[idx].GetMin();
750 // Double_t xmx=Variables()[idx].GetMax();
751 Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
752 Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
753
754 fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
755 fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
756 for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
757 fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
758 fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
759
760 }
761 }
762 }
763 fout << "}" << std::endl;
764 fout << std::endl;
765 fout << "//_______________________________________________________________________" << std::endl;
766 fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
767 fout << "{" << std::endl;
768 fout << " // Gauss/Uniform transformation" << std::endl;
769 fout << " int cls=clsIn;" << std::endl;
770 fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
771 fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
772 fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
773 fout << " }"<< std::endl;
774
775 fout << " // copy the variables which are going to be transformed "<< std::endl;
777 fout << " static std::vector<double> dv; "<< std::endl;
778 fout << " dv.resize(nvar); "<< std::endl;
779 fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
780 fout << " "<< std::endl;
781 fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
782 fout << " double cumulant; "<< std::endl;
783 fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
784 fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
785 fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
786 fout << " int ibin=0; "<< std::endl;
787 fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
788 fout << " "<< std::endl;
789 fout << " if (ibin<0) { ibin=0;} "<< std::endl;
790 fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
791 fout << " int nextbin = ibin; "<< std::endl;
792 fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
793 fout << " nextbin++; "<< std::endl;
794 fout << " else "<< std::endl;
795 fout << " nextbin--; "<< std::endl;
796 fout << " "<< std::endl;
797 fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
798 fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
799 fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
800 fout << " "<< std::endl;
801 fout << " "<< std::endl;
802 fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
803 fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
804 fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
805 fout << " else { "<< std::endl;
806 fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
807 fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
808 fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
809 fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
810 fout << " double inverf=0., stp=1. ; "<< std::endl;
811 fout << " while (stp >1.e-10){; "<< std::endl;
812 fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
813 fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
814 fout << " else inverf += stp; "<< std::endl;
815 fout << " } ; "<< std::endl;
816 fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
817 fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
818 fout << " } "<< std::endl;
819 fout << " } "<< std::endl;
820 fout << " // copy the transformed variables back "<< std::endl;
821 fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
822 fout << "} "<< std::endl;
823 }
824}
#define h(i)
Definition RSha256.hxx:106
#define e(i)
Definition RSha256.hxx:103
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
static unsigned int total
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t mask
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char y1
#define TMVA_VERSION(a, b, c)
Definition Version.h:48
const_iterator begin() const
const_iterator end() const
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
Class that contains all the data information.
Definition DataSetInfo.h:62
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
@ kSpline1
Definition PDF.h:70
@ kSpline0
Definition PDF.h:70
TString StringFromDouble(Double_t d)
string tools
Definition Tools.cxx:1208
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
VariableGaussTransform(DataSetInfo &dsi, TString strcor="")
constructor can only be applied one after the other when they are created.
void ReadFromXML(void *trfnode) override
Read the transformation matrices from the xml node.
const Event * InverseTransform(const Event *const, Int_t cls) const override
apply the inverse Gauss or inverse uniform transformation
void AttachXMLTo(void *parent) override
create XML description of Gauss transformation
const Event * Transform(const Event *const, Int_t cls) const override
apply the Gauss transformation
Bool_t PrepareTransformation(const std::vector< Event * > &) override
calculate the cumulative distributions
virtual ~VariableGaussTransform(void)
destructor
void WriteTransformationToStream(std::ostream &) const override
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
void ReadTransformationFromStream(std::istream &, const TString &) override
Read the cumulative distribution.
void PrintTransformation(std::ostream &o) override
prints the transformation
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls) override
creates the transformation function
Double_t OldCumulant(Float_t x, TH1 *h) const
Linear interpolation class.
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Basic string class.
Definition TString.h:138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Double_t x[n]
Definition legend1.C:17
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:249
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition TMath.cxx:190
Double_t ErfInverse(Double_t x)
Returns the inverse error function.
Definition TMath.cxx:208
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:197
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2338
static void output()