Logo ROOT   6.07/09
Reference Guide
VariableGaussTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableGaussTransform *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * U. of Bonn, Germany *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * (http://tmva.sourceforge.net/LICENSE) *
28  **********************************************************************************/
29 
30 ///////////////////////////////////////////////////////////////////////////
31 // //
32 // Gaussian Transformation of input variables. //
33 // //
34 ///////////////////////////////////////////////////////////////////////////
35 
37 
38 #include "TMVA/DataSetInfo.h"
39 #include "TMVA/MsgLogger.h"
40 #include "TMVA/PDF.h"
41 #include "TMVA/Tools.h"
42 #include "TMVA/Types.h"
43 #include "TMVA/Version.h"
44 
45 #include "TCanvas.h"
46 #include "TGraph.h"
47 #include "TH1F.h"
48 #include "TMath.h"
49 #include "TVectorF.h"
50 #include "TVectorD.h"
51 
52 #include <exception>
53 #include <iostream>
54 #include <iomanip>
55 #include <list>
56 #include <limits>
57 #include <stdexcept>
58 
60 
61 ////////////////////////////////////////////////////////////////////////////////
62 /// constructor
63 /// can only be applied one after the other when they are created. But in order to
64 /// determine the Gauss transformation
65 
67 : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
68  fFlatNotGauss(kFALSE),
69  fPdfMinSmooth(0),
70  fPdfMaxSmooth(0),
71  fElementsperbin(0)
72 {
73  if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
74  SetName("Uniform");
75  }
76 }
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// destructor
80 
82 {
84 }
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 
89 {
90 }
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 /// calculate the cumulative distributions
94 
96 {
97  Initialize();
98 
99  if (!IsEnabled() || IsCreated()) return kTRUE;
100 
101  Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
102 
103  UInt_t inputSize = fGet.size();
104  SetNVariables(inputSize);
105 
106  if (inputSize > 200) {
107  Log() << kWARNING << "----------------------------------------------------------------------------"
108  << Endl;
109  Log() << kWARNING
110  << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
111  Log() << kWARNING << "----------------------------------------------------------------------------"
112  << Endl;
113  // return kFALSE;
114  }
115 
116  GetCumulativeDist( events );
117 
118  SetCreated( kTRUE );
119 
120  return kTRUE;
121 }
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// apply the Gauss transformation
125 
127 {
128  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
129  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
130  //EVT if (cls <0 || cls > GetNClasses() ) {
131  //EVT cls = GetNClasses();
132  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
133  //EVT}
134  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
135  //EVT workaround end
136 
137  // get the variable vector of the current event
138  UInt_t inputSize = fGet.size();
139 
140  std::vector<Float_t> input(0);
141  std::vector<Float_t> output(0);
142 
143  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
144  GetInput( ev, input, mask );
145 
146  std::vector<Char_t>::iterator itMask = mask.begin();
147 
148  // TVectorD vec( inputSize );
149  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
150  Double_t cumulant;
151  //transformation
152  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
153 
154  if ( (*itMask) ){
155  ++itMask;
156  continue;
157  }
158 
159  if (0 != fCumulativePDF[ivar][cls]) {
160  // first make it flat
161  if(fTMVAVersion>TMVA_VERSION(3,9,7))
162  cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
163  else
164  cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
165  cumulant = TMath::Min(cumulant,1.-10e-10);
166  cumulant = TMath::Max(cumulant,0.+10e-10);
167 
168  if (fFlatNotGauss)
169  output.push_back( cumulant );
170  else {
171  // sanity correction for out-of-range values
172  Double_t maxErfInvArgRange = 0.99999999;
173  Double_t arg = 2.0*cumulant - 1.0;
174  arg = TMath::Min(+maxErfInvArgRange,arg);
175  arg = TMath::Max(-maxErfInvArgRange,arg);
176 
177  output.push_back( 1.414213562*TMath::ErfInverse(arg) );
178  }
179  }
180  }
181 
183  if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
184  fTransformedEvent = new Event();
185  }
186 
187  SetOutput( fTransformedEvent, output, mask, ev );
188 
189  return fTransformedEvent;
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// apply the inverse Gauss or inverse uniform transformation
194 
196 {
197  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
198  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
199  //EVT if (cls <0 || cls > GetNClasses() ) {
200  //EVT cls = GetNClasses();
201  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
202  //EVT}
203  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
204  //EVT workaround end
205 
206  // get the variable vector of the current event
207  UInt_t inputSize = fGet.size();
208 
209  std::vector<Float_t> input(0);
210  std::vector<Float_t> output(0);
211 
212  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
213  GetInput( ev, input, mask, kTRUE );
214 
215  std::vector<Char_t>::iterator itMask = mask.begin();
216 
217  // TVectorD vec( inputSize );
218  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
219  Double_t invCumulant;
220  //transformation
221  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
222 
223  if ( (*itMask) ){
224  ++itMask;
225  continue;
226  }
227 
228  if (0 != fCumulativePDF[ivar][cls]) {
229  invCumulant = input.at(ivar);
230 
231  // first de-gauss ist if gaussianized
232  if (!fFlatNotGauss)
233  invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
234 
235  // then de-uniform the values
236  if(fTMVAVersion>TMVA_VERSION(4,0,0))
237  invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
238  else
239  Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
240 
241  output.push_back(invCumulant);
242  }
243  }
244 
246 
247  SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
248 
249  return fBackTransformedEvent;
250 }
251 
252 ////////////////////////////////////////////////////////////////////////////////
253 /// fill the cumulative distributions
254 
255 void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
256 {
257  const UInt_t inputSize = fGet.size();
258  // const UInt_t nCls = GetNClasses();
259 
260  // const UInt_t nvar = GetNVariables();
261  UInt_t nevt = events.size();
262 
263  const UInt_t nClasses = GetNClasses();
264  UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes seperately + one where all classes are treated (added) together
265 
266  if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
267 
268  UInt_t **nbins = new UInt_t*[numDist];
269 
270  std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
271  std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
272  for (UInt_t i=0; i < numDist; i++) {
273  listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
274  vsForBinning[i] = new std::vector<Float_t> [inputSize];
275  nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
276  }
277 
278  std::vector<Float_t> input;
279  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
280 
281  // perform event loop
282  Float_t *sumOfWeights = new Float_t[numDist];
283  Float_t *minWeight = new Float_t[numDist];
284  Float_t *maxWeight = new Float_t[numDist];
285  for (UInt_t i=0; i<numDist; i++) {
286  sumOfWeights[i]=0;
287  minWeight[i]=10E10; // TODO: change this to std::max ?
288  maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
289  }
290  for (UInt_t ievt=0; ievt < nevt; ievt++) {
291  const Event* ev= events[ievt];
292  Int_t cls = ev->GetClass();
293  Float_t eventWeight = ev->GetWeight();
294  sumOfWeights[cls] += eventWeight;
295  if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
296  if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
297  if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
298 
299  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
300  if( hasMaskedEntries ){
301  Log() << kWARNING << "Incomplete event" << Endl;
302  std::ostringstream oss;
303  ev->Print(oss);
304  Log() << oss.str();
305  Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
306  }
307 
308 
309  Int_t ivar = 0;
310  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
311  Float_t value = (*itInput);
312  listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
313  if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
314  ++ivar;
315  }
316  }
317  if (numDist > 1) {
318  for (UInt_t icl=0; icl<numDist-1; icl++){
319  minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
320  maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
321  }
322  }
323 
324  // Sorting the lists, getting nbins ...
325  const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
326  const UInt_t nbinsmax=2000; // maximum number of bins
327 
328  for (UInt_t icl=0; icl< numDist; icl++){
329  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
330  listsForBinning[icl][ivar].sort();
331  std::list< TMVA::TMVAGaussPair >::iterator it;
332  Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
333  sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
334  Float_t sum=0;
335  Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
336  Float_t lastev_value=ev_value;
337  const Float_t eps = 1.e-4;
338  vsForBinning[icl][ivar].push_back(ev_value-eps);
339  vsForBinning[icl][ivar].push_back(ev_value);
340 
341  for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); it++){
342  sum+= it->GetWeight();
343  if (sum >= sumPerBin) {
344  ev_value=it->GetValue();
345  if (ev_value>lastev_value) { // protection against bin width of 0
346  vsForBinning[icl][ivar].push_back(ev_value);
347  sum = 0.;
348  lastev_value=ev_value;
349  }
350  }
351  }
352  if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
353  nbins[icl][ivar] = vsForBinning[icl][ivar].size();
354  }
355  }
356 
357  delete[] sumOfWeights;
358  delete[] minWeight;
359  delete[] maxWeight;
360 
361  // create histogram for the cumulative distribution.
362  fCumulativeDist.resize(inputSize);
363  for (UInt_t icls = 0; icls < numDist; icls++) {
364  for (UInt_t ivar=0; ivar < inputSize; ivar++){
365  Float_t* binnings = new Float_t[nbins[icls][ivar]];
366  //the binning for this particular histogram:
367  for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
368  binnings[k] = vsForBinning[icls][ivar][k];
369  }
370  fCumulativeDist[ivar].resize(numDist);
371  if (0 != fCumulativeDist[ivar][icls] ) {
372  delete fCumulativeDist[ivar][icls];
373  }
374  fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
375  Form("Cumulative_Var%d_cls%d",ivar,icls),
376  nbins[icls][ivar] -1, // class icls
377  binnings);
378  fCumulativeDist[ivar][icls]->SetDirectory(0);
379  delete [] binnings;
380  }
381  }
382 
383  // Deallocation
384  for (UInt_t i=0; i<numDist; i++) {
385  delete [] listsForBinning[numDist-i-1];
386  delete [] vsForBinning[numDist-i-1];
387  delete [] nbins[numDist-i-1];
388  }
389  delete [] listsForBinning;
390  delete [] vsForBinning;
391  delete [] nbins;
392 
393  // perform event loop
394  std::vector<Int_t> ic(numDist);
395  for (UInt_t ievt=0; ievt<nevt; ievt++) {
396 
397  const Event* ev= events[ievt];
398  Int_t cls = ev->GetClass();
399  Float_t eventWeight = ev->GetWeight();
400 
401  GetInput( ev, input, mask );
402 
403  Int_t ivar = 0;
404  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
405  Float_t value = (*itInput);
406  fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
407  if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
408 
409  ++ivar;
410  }
411  }
412 
413  // clean up
415 
416  // now sum up in order to get the real cumulative distribution
417  Double_t sum = 0, total=0;
418  fCumulativePDF.resize(inputSize);
419  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
420  // fCumulativePDF.resize(ivar+1);
421  for (UInt_t icls=0; icls<numDist; icls++) {
422  (fCumulativeDist[ivar][icls])->Smooth();
423  sum = 0;
424  total = 0.;
425  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
426  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
427  if (val>0) total += val;
428  }
429  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
430  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
431  if (val>0) sum += val;
432  (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
433  }
434  // create PDf
435  fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
436  }
437  }
438 }
439 
440 ////////////////////////////////////////////////////////////////////////////////
441 
443 {
444  Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
445 }
446 
447 ////////////////////////////////////////////////////////////////////////////////
448 /// clean up of cumulative arrays
449 
451  if (opt == "ALL" || opt == "PDF"){
452  for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
453  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
454  if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
455  }
456  }
457  fCumulativePDF.clear();
458  }
459  if (opt == "ALL" || opt == "Dist"){
460  for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
461  for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
462  if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
463  }
464  }
465  fCumulativeDist.clear();
466  }
467 }
468 ////////////////////////////////////////////////////////////////////////////////
469 /// create XML description of Gauss transformation
470 
472  void* trfxml = gTools().AddChild(parent, "Transform");
473  gTools().AddAttr(trfxml, "Name", "Gauss");
474  gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
475 
477 
478  UInt_t nvar = fGet.size();
479  for (UInt_t ivar=0; ivar<nvar; ivar++) {
480  void* varxml = gTools().AddChild( trfxml, "Variable");
481  // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
482  gTools().AddAttr( varxml, "VarIndex", ivar );
483 
484  if ( fCumulativePDF[ivar][0]==0 ||
485  (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
486  Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
487 
488  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
489  void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
490  (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
491  }
492  }
493 }
494 
495 ////////////////////////////////////////////////////////////////////////////////
496 /// Read the transformation matrices from the xml node
497 
499  // clean up first
501  TString FlatOrGauss;
502 
503  gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
504 
505  if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
506  else fFlatNotGauss = kFALSE;
507 
508  Bool_t newFormat = kFALSE;
509 
510  void* inpnode = NULL;
511 
512  inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
513  if( inpnode!=NULL )
514  newFormat = kTRUE; // new xml format
515 
516  void* varnode = NULL;
517  if( newFormat ){
518  // ------------- new format --------------------
519  // read input
521 
522  varnode = gTools().GetNextChild(inpnode);
523  }else
524  varnode = gTools().GetChild(trfnode);
525 
526  // Read the cumulative distribution
527 
528  TString varname, histname, classname;
529  UInt_t ivar;
530  while(varnode) {
531  if( gTools().HasAttr(varnode,"Name") )
532  gTools().ReadAttr(varnode, "Name", varname);
533  gTools().ReadAttr(varnode, "VarIndex", ivar);
534 
535  void* clsnode = gTools().GetChild( varnode);
536 
537  while(clsnode) {
538  void* pdfnode = gTools().GetChild( clsnode);
539  PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
540  pdfToRead->ReadXML(pdfnode); // pdfnode
541  // push_back PDF
542  fCumulativePDF.resize( ivar+1 );
543  fCumulativePDF[ivar].push_back(pdfToRead);
544  clsnode = gTools().GetNextChild(clsnode);
545  }
546 
547  varnode = gTools().GetNextChild(varnode);
548  }
549  SetCreated();
550 }
551 
552 ////////////////////////////////////////////////////////////////////////////////
553 /// Read the cumulative distribution
554 
555 void TMVA::VariableGaussTransform::ReadTransformationFromStream( std::istream& istr, const TString& classname)
556 {
557  Bool_t addDirStatus = TH1::AddDirectoryStatus();
558  TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
559  char buf[512];
560  istr.getline(buf,512);
561 
562  TString strvar, dummy;
563 
564  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
565  char* p = buf;
566  while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
567  if (*p=='#' || *p=='\0') {
568  istr.getline(buf,512);
569  continue; // if comment or empty line, read the next line
570  }
571  std::stringstream sstr(buf);
572  sstr >> strvar;
573 
574  if (strvar=="CumulativeHistogram") {
575  UInt_t type(0), ivar(0);
576  TString devnullS(""),hname("");
577  Int_t nbins(0);
578 
579  // coverity[tainted_data_argument]
580  sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
581 
582  Float_t *Binnings = new Float_t[nbins+1];
583  Float_t val;
584  istr >> devnullS; // read the line "BinBoundaries" ..
585  for (Int_t ibin=0; ibin<nbins+1; ibin++) {
586  istr >> val;
587  Binnings[ibin]=val;
588  }
589 
590  if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
591  if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
592 
593  TH1F * histToRead = fCumulativeDist[ivar][type];
594  if ( histToRead !=0 ) delete histToRead;
595  // recreate the cumulative histogram to be filled with the values read
596  histToRead = new TH1F( hname, hname, nbins, Binnings );
597  histToRead->SetDirectory(0);
598  fCumulativeDist[ivar][type]=histToRead;
599 
600  istr >> devnullS; // read the line "BinContent" ..
601  for (Int_t ibin=0; ibin<nbins; ibin++) {
602  istr >> val;
603  histToRead->SetBinContent(ibin+1,val);
604  }
605 
606  PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
607  // push_back PDF
608  fCumulativePDF.resize(ivar+1);
609  fCumulativePDF[ivar].resize(type+1);
610  fCumulativePDF[ivar][type] = pdf;
611  delete [] Binnings;
612  }
613 
614  // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
615  if (strvar=="Uniform") { // don't correct this spelling mistake
616  sstr >> fFlatNotGauss;
617  istr.getline(buf,512);
618  break;
619  }
620 
621  istr.getline(buf,512); // reading the next line
622  }
623  TH1::AddDirectory(addDirStatus);
624 
625  UInt_t classIdx=(classname=="signal")?0:1;
626  for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
627  PDF* src = fCumulativePDF[ivar][classIdx];
628  fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
629  }
630 
632 
633  SetCreated();
634 }
635 
637 
638  Int_t bin = h->FindBin(x);
639  bin = TMath::Max(bin,1);
640  bin = TMath::Min(bin,h->GetNbinsX());
641 
642  Double_t cumulant;
643  Double_t x0, x1, y0, y1;
645  Double_t supmin = 0.5/total;
646 
647  x0 = h->GetBinLowEdge(TMath::Max(bin,1));
648  x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
649 
650  y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
651  y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
652 
653  if (bin == 0) {
654  y0 = supmin;
655  y1 = supmin;
656  }
657  if (bin == 1) {
658  y0 = supmin;
659  }
660  if (bin > h->GetNbinsX()) {
661  y0 = 1.-supmin;
662  y1 = 1.-supmin;
663  }
664  if (bin == h->GetNbinsX()) {
665  y1 = 1.-supmin;
666  }
667 
668  if (x0 == x1) {
669  cumulant = y1;
670  } else {
671  cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
672  }
673 
674  if (x <= h->GetBinLowEdge(1)){
675  cumulant = supmin;
676  }
677  if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
678  cumulant = 1-supmin;
679  }
680  return cumulant;
681 }
682 
683 
684 
685 ////////////////////////////////////////////////////////////////////////////////
686 /// prints the transformation
687 
689 {
690  Int_t cls = 0;
691  Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
692  cls++;
693 }
694 
695 ////////////////////////////////////////////////////////////////////////////////
696 /// creates the transformation function
697 ///
698 
699 void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
700  Int_t part, UInt_t trCounter, Int_t )
701 {
702  const UInt_t nvar = fGet.size();
703  UInt_t numDist = GetNClasses() + 1;
704  Int_t nBins = -1;
705  for (UInt_t icls=0; icls<numDist; icls++) {
706  for (UInt_t ivar=0; ivar<nvar; ivar++) {
707  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
708  if (nbin > nBins) nBins=nbin;
709  }
710  }
711 
712  // creates the gauss transformation function
713  if (part==1) {
714  fout << std::endl;
715  fout << " int nvar;" << std::endl;
716  fout << std::endl;
717  // declare variables
718  fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
719  fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
720  fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
721  fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
722  fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
723  }
724  if (part==2) {
725  fout << std::endl;
726  fout << "#include \"math.h\"" << std::endl;
727  fout << std::endl;
728  fout << "//_______________________________________________________________________" << std::endl;
729  fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
730  fout << "{" << std::endl;
731  fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
732  fout << " nvar=" << nvar << ";" << std::endl;
733  for (UInt_t icls=0; icls<numDist; icls++) {
734  for (UInt_t ivar=0; ivar<nvar; ivar++) {
735  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
736  fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
737  }
738  }
739 
740  // fill meat here
741  // loop over nvar , cls, loop over nBins
742  // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
743  for (UInt_t icls=0; icls<numDist; icls++) {
744  for (UInt_t ivar=0; ivar<nvar; ivar++) {
745  // Int_t idx = 0;
746  try{
747  // idx = fGet.at(ivar).second;
748  Char_t type = fGet.at(ivar).first;
749  if( type != 'v' ){
750  Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
751  }
752  }catch( std::out_of_range except ){
753  Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
754  }
755 
756  // Double_t xmn=Variables()[idx].GetMin();
757  // Double_t xmx=Variables()[idx].GetMax();
758  Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
759  Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
760 
761  fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
762  fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
763  for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
764  fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
765  fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
766 
767  }
768  }
769  }
770  fout << "}" << std::endl;
771  fout << std::endl;
772  fout << "//_______________________________________________________________________" << std::endl;
773  fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
774  fout << "{" << std::endl;
775  fout << " // Gauss/Uniform transformation" << std::endl;
776  fout << " int cls=clsIn;" << std::endl;
777  fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
778  fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
779  fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
780  fout << " }"<< std::endl;
781 
782  fout << " // copy the variables which are going to be transformed "<< std::endl;
783  VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
784  fout << " static std::vector<double> dv; "<< std::endl;
785  fout << " dv.resize(nvar); "<< std::endl;
786  fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
787  fout << " "<< std::endl;
788  fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
789  fout << " double cumulant; "<< std::endl;
790  fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
791  fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
792  fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
793  fout << " int ibin=0; "<< std::endl;
794  fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
795  fout << " "<< std::endl;
796  fout << " if (ibin<0) { ibin=0;} "<< std::endl;
797  fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
798  fout << " int nextbin = ibin; "<< std::endl;
799  fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
800  fout << " nextbin++; "<< std::endl;
801  fout << " else "<< std::endl;
802  fout << " nextbin--; "<< std::endl;
803  fout << " "<< std::endl;
804  fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
805  fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
806  fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
807  fout << " "<< std::endl;
808  fout << " "<< std::endl;
809  fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
810  fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
811  fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
812  fout << " else { "<< std::endl;
813  fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
814  fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
815  fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
816  fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
817  fout << " double inverf=0., stp=1. ; "<< std::endl;
818  fout << " while (stp >1.e-10){; "<< std::endl;
819  fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
820  fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
821  fout << " else inverf += stp; "<< std::endl;
822  fout << " } ; "<< std::endl;
823  fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
824  fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
825  fout << " } "<< std::endl;
826  fout << " } "<< std::endl;
827  fout << " // copy the transformed variables back "<< std::endl;
828  fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
829  fout << "} "<< std::endl;
830  }
831 }
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3442
void SetTMVAVersion(TMVAVersion_t v)
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1<x<1
Definition: TMath.cxx:206
static long int sum(long int i)
Definition: Factory.cxx:1785
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the Gauss transformation
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:957
virtual ~VariableGaussTransform(void)
destructor
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4640
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8008
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
TH1 * h
Definition: legend2.C:5
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
Basic string class.
Definition: TString.h:137
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:700
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
int nbins[3]
virtual Int_t GetNbinsX() const
Definition: TH1.h:301
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1220
Tools & gTools()
Definition: Tools.cxx:79
Double_t x[n]
Definition: legend1.C:17
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8219
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Definition: PDF.h:71
void ReadTransformationFromStream(std::istream &, const TString &)
Read the cumulative distribution.
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:305
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1241
Bool_t PrepareTransformation(const std::vector< Event * > &)
calculate the cumulative distributions
virtual void SetOutput(Event *event, std::vector< Float_t > &output, std::vector< Char_t > &mask, const Event *oldEvent=0, Bool_t backTransform=kFALSE) const
select the values from the event
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8280
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
void Print(std::ostream &o) const
print method
Definition: Event.cxx:348
static unsigned int total
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
Double_t OldCumulant(Float_t x, TH1 *h) const
static const double x1[5]
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
static RooMathCoreReg dummy
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
static std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.icc:71
The TH1 histogram class.
Definition: TH1.h:80
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
UInt_t GetClass() const
Definition: Event.h:89
virtual Bool_t GetInput(const Event *event, std::vector< Float_t > &input, std::vector< Char_t > &mask, Bool_t backTransform=kFALSE) const
select the values from the event
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates the transformation function
char Char_t
Definition: RtypesCore.h:29
Abstract ClassifierFactory template that handles arbitrary types.
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
const char * GetName() const
Returns name of object.
Definition: PDF.h:124
#define NULL
Definition: Rtypes.h:82
TString()
TString default ctor.
Definition: TString.cxx:88
std::vector< std::vector< PDF * > > fCumulativePDF
The Cummulative distributions.
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
virtual void AttachXMLTo(void *parent)
create XML description of Gauss transformation
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the inverse Gauss or inverse uniform transformation
static void output(int code)
Definition: gifencode.c:226
const Bool_t kTRUE
Definition: Rtypes.h:91
const int except
virtual void PrintTransformation(std::ostream &o)
prints the transformation
gr SetName("gr")
std::vector< std::vector< TH1F * > > fCumulativeDist
void WriteTransformationToStream(std::ostream &) const