ROOT  6.06/09
Reference Guide
VariableGaussTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableGaussTransform *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * U. of Bonn, Germany *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * (http://tmva.sourceforge.net/LICENSE) *
28  **********************************************************************************/
29 
30 ///////////////////////////////////////////////////////////////////////////
31 // //
32 // Gaussian Transformation of input variables. //
33 // //
34 ///////////////////////////////////////////////////////////////////////////
35 
36 #include <iostream>
37 #include <iomanip>
38 #include <list>
39 #include <limits>
40 #include <exception>
41 #include <stdexcept>
42 
43 #include "TVectorF.h"
44 #include "TVectorD.h"
45 #include "TMath.h"
46 #include "TCanvas.h"
47 
49 #ifndef ROOT_TMVA_MsgLogger
50 #include "TMVA/MsgLogger.h"
51 #endif
52 #ifndef ROOT_TMVA_Tools
53 #include "TMVA/Tools.h"
54 #endif
55 #ifndef ROOT_TMVA_Version
56 #include "TMVA/Version.h"
57 #endif
58 
60 
61 ////////////////////////////////////////////////////////////////////////////////
62 /// constructor
63 /// can only be applied one after the other when they are created. But in order to
64 /// determine the Gauss transformation
65 
66 TMVA::VariableGaussTransform::VariableGaussTransform( DataSetInfo& dsi, TString strcor )
67  : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
68  fFlatNotGauss(kFALSE),
69  fPdfMinSmooth(0),
70  fPdfMaxSmooth(0),
71  fElementsperbin(0)
72 {
73  if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
74  SetName("Uniform");
75  }
76 }
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// destructor
80 
82 {
84 }
85 
86 ////////////////////////////////////////////////////////////////////////////////
87 
89 {
90 }
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 /// calculate the cumulative distributions
94 
96 {
97  Initialize();
98 
99  if (!IsEnabled() || IsCreated()) return kTRUE;
100 
101  Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
102 
103  UInt_t inputSize = fGet.size();
104  SetNVariables(inputSize);
105 
106  if (inputSize > 200) {
107  Log() << kWARNING << "----------------------------------------------------------------------------"
108  << Endl;
109  Log() << kWARNING
110  << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
111  Log() << kWARNING << "----------------------------------------------------------------------------"
112  << Endl;
113  // return kFALSE;
114  }
115 
116  GetCumulativeDist( events );
117 
118  SetCreated( kTRUE );
119 
120  return kTRUE;
121 }
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// apply the Gauss transformation
125 
127 {
128  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
129  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
130  //EVT if (cls <0 || cls > GetNClasses() ) {
131  //EVT cls = GetNClasses();
132  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
133  //EVT}
134  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
135  //EVT workaround end
136 
137  // get the variable vector of the current event
138  UInt_t inputSize = fGet.size();
139 
140  std::vector<Float_t> input(0);
141  std::vector<Float_t> output(0);
142 
143  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
144  GetInput( ev, input, mask );
145 
146  std::vector<Char_t>::iterator itMask = mask.begin();
147 
148 // TVectorD vec( inputSize );
149 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
150  Double_t cumulant;
151  //transformation
152  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
153 
154  if ( (*itMask) ){
155  ++itMask;
156  continue;
157  }
158 
159  if (0 != fCumulativePDF[ivar][cls]) {
160  // first make it flat
161  if(fTMVAVersion>TMVA_VERSION(3,9,7))
162  cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
163  else
164  cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
165  cumulant = TMath::Min(cumulant,1.-10e-10);
166  cumulant = TMath::Max(cumulant,0.+10e-10);
167 
168  if (fFlatNotGauss)
169  output.push_back( cumulant );
170  else {
171  // sanity correction for out-of-range values
172  Double_t maxErfInvArgRange = 0.99999999;
173  Double_t arg = 2.0*cumulant - 1.0;
174  arg = TMath::Min(+maxErfInvArgRange,arg);
175  arg = TMath::Max(-maxErfInvArgRange,arg);
176 
177  output.push_back( 1.414213562*TMath::ErfInverse(arg) );
178  }
179  }
180  }
181 
182  if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
183  if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
184  fTransformedEvent = new Event();
185  }
186 
187  SetOutput( fTransformedEvent, output, mask, ev );
188 
189  return fTransformedEvent;
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// apply the inverse Gauss or inverse uniform transformation
194 
196 {
197  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
198  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
199  //EVT if (cls <0 || cls > GetNClasses() ) {
200  //EVT cls = GetNClasses();
201  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
202  //EVT}
203  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
204  //EVT workaround end
205 
206  // get the variable vector of the current event
207  UInt_t inputSize = fGet.size();
208 
209  std::vector<Float_t> input(0);
210  std::vector<Float_t> output(0);
211 
212  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
213  GetInput( ev, input, mask, kTRUE );
214 
215  std::vector<Char_t>::iterator itMask = mask.begin();
216 
217 // TVectorD vec( inputSize );
218 // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
219  Double_t invCumulant;
220  //transformation
221  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
222 
223  if ( (*itMask) ){
224  ++itMask;
225  continue;
226  }
227 
228  if (0 != fCumulativePDF[ivar][cls]) {
229  invCumulant = input.at(ivar);
230 
231  // first de-gauss ist if gaussianized
232  if (!fFlatNotGauss)
233  invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
234 
235  // then de-uniform the values
236  if(fTMVAVersion>TMVA_VERSION(4,0,0))
237  invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
238  else
239  Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
240 
241  output.push_back(invCumulant);
242  }
243  }
244 
245  if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
246 
247  SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
248 
249  return fBackTransformedEvent;
250 }
251 
252 ////////////////////////////////////////////////////////////////////////////////
253 /// fill the cumulative distributions
254 
255 void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
256 {
257  const UInt_t inputSize = fGet.size();
258 // const UInt_t nCls = GetNClasses();
259 
260 // const UInt_t nvar = GetNVariables();
261  UInt_t nevt = events.size();
262 
263  const UInt_t nClasses = GetNClasses();
264  UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes seperately + one where all classes are treated (added) together
265 
266  if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
267 
268  UInt_t **nbins = new UInt_t*[numDist];
269 
270  std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
271  std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
272  for (UInt_t i=0; i < numDist; i++) {
273  listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
274  vsForBinning[i] = new std::vector<Float_t> [inputSize];
275  nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
276  }
277 
278  std::vector<Float_t> input;
279  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
280 
281  // perform event loop
282  Float_t *sumOfWeights = new Float_t[numDist];
283  Float_t *minWeight = new Float_t[numDist];
284  Float_t *maxWeight = new Float_t[numDist];
285  for (UInt_t i=0; i<numDist; i++) {
286  sumOfWeights[i]=0;
287  minWeight[i]=10E10; // TODO: change this to std::max ?
288  maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
289  }
290  for (UInt_t ievt=0; ievt < nevt; ievt++) {
291  const Event* ev= events[ievt];
292  Int_t cls = ev->GetClass();
293  Float_t eventWeight = ev->GetWeight();
294  sumOfWeights[cls] += eventWeight;
295  if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
296  if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
297  if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
298 
299  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
300  if( hasMaskedEntries ){
301  Log() << kWARNING << "Incomplete event" << Endl;
302  ev->Print(Log());
303  Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
304  }
305 
306 
307  Int_t ivar = 0;
308  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
309  Float_t value = (*itInput);
310  listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
311  if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
312  ++ivar;
313  }
314  }
315  if (numDist > 1) {
316  for (UInt_t icl=0; icl<numDist-1; icl++){
317  minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
318  maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
319  }
320  }
321 
322  // Sorting the lists, getting nbins ...
323  const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
324  const UInt_t nbinsmax=2000; // maximum number of bins
325 
326  for (UInt_t icl=0; icl< numDist; icl++){
327  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
328  listsForBinning[icl][ivar].sort();
329  std::list< TMVA::TMVAGaussPair >::iterator it;
330  Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
331  sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
332  Float_t sum=0;
333  Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
334  Float_t lastev_value=ev_value;
335  const Float_t eps = 1.e-4;
336  vsForBinning[icl][ivar].push_back(ev_value-eps);
337  vsForBinning[icl][ivar].push_back(ev_value);
338 
339  for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); it++){
340  sum+= it->GetWeight();
341  if (sum >= sumPerBin) {
342  ev_value=it->GetValue();
343  if (ev_value>lastev_value) { // protection against bin width of 0
344  vsForBinning[icl][ivar].push_back(ev_value);
345  sum = 0.;
346  lastev_value=ev_value;
347  }
348  }
349  }
350  if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
351  nbins[icl][ivar] = vsForBinning[icl][ivar].size();
352  }
353  }
354 
355  delete[] sumOfWeights;
356  delete[] minWeight;
357  delete[] maxWeight;
358 
359  // create histogram for the cumulative distribution.
360  fCumulativeDist.resize(inputSize);
361  for (UInt_t icls = 0; icls < numDist; icls++) {
362  for (UInt_t ivar=0; ivar < inputSize; ivar++){
363  Float_t* binnings = new Float_t[nbins[icls][ivar]];
364  //the binning for this particular histogram:
365  for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
366  binnings[k] = vsForBinning[icls][ivar][k];
367  }
368  fCumulativeDist[ivar].resize(numDist);
369  if (0 != fCumulativeDist[ivar][icls] ) {
370  delete fCumulativeDist[ivar][icls];
371  }
372  fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
373  Form("Cumulative_Var%d_cls%d",ivar,icls),
374  nbins[icls][ivar] -1, // class icls
375  binnings);
376  fCumulativeDist[ivar][icls]->SetDirectory(0);
377  delete [] binnings;
378  }
379  }
380 
381  // Deallocation
382  for (UInt_t i=0; i<numDist; i++) {
383  delete [] listsForBinning[numDist-i-1];
384  delete [] vsForBinning[numDist-i-1];
385  delete [] nbins[numDist-i-1];
386  }
387  delete [] listsForBinning;
388  delete [] vsForBinning;
389  delete [] nbins;
390 
391  // perform event loop
392  std::vector<Int_t> ic(numDist);
393  for (UInt_t ievt=0; ievt<nevt; ievt++) {
394 
395  const Event* ev= events[ievt];
396  Int_t cls = ev->GetClass();
397  Float_t eventWeight = ev->GetWeight();
398 
399  GetInput( ev, input, mask );
400 
401  Int_t ivar = 0;
402  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
403  Float_t value = (*itInput);
404  fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
405  if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
406 
407  ++ivar;
408  }
409  }
410 
411  // clean up
412  CleanUpCumulativeArrays("PDF");
413 
414  // now sum up in order to get the real cumulative distribution
415  Double_t sum = 0, total=0;
416  fCumulativePDF.resize(inputSize);
417  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
418 // fCumulativePDF.resize(ivar+1);
419  for (UInt_t icls=0; icls<numDist; icls++) {
420  (fCumulativeDist[ivar][icls])->Smooth();
421  sum = 0;
422  total = 0.;
423  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
424  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
425  if (val>0) total += val;
426  }
427  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
428  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
429  if (val>0) sum += val;
430  (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
431  }
432  // create PDf
433  fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
434  }
435  }
436 }
437 
438 ////////////////////////////////////////////////////////////////////////////////
439 
441 {
442  Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
443 }
444 
445 ////////////////////////////////////////////////////////////////////////////////
446 /// clean up of cumulative arrays
447 
449  if (opt == "ALL" || opt == "PDF"){
450  for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
451  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
452  if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
453  }
454  }
455  fCumulativePDF.clear();
456  }
457  if (opt == "ALL" || opt == "Dist"){
458  for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
459  for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
460  if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
461  }
462  }
463  fCumulativeDist.clear();
464  }
465 }
466 ////////////////////////////////////////////////////////////////////////////////
467 /// create XML description of Gauss transformation
468 
470  void* trfxml = gTools().AddChild(parent, "Transform");
471  gTools().AddAttr(trfxml, "Name", "Gauss");
472  gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
473 
475 
476  UInt_t nvar = fGet.size();
477  for (UInt_t ivar=0; ivar<nvar; ivar++) {
478  void* varxml = gTools().AddChild( trfxml, "Variable");
479 // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
480  gTools().AddAttr( varxml, "VarIndex", ivar );
481 
482  if ( fCumulativePDF[ivar][0]==0 ||
483  (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
484  Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
485 
486  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
487  void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
488  (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
489  }
490  }
491 }
492 
493 ////////////////////////////////////////////////////////////////////////////////
494 /// Read the transformation matrices from the xml node
495 
497  // clean up first
498  CleanUpCumulativeArrays();
499  TString FlatOrGauss;
500 
501  gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
502 
503  if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
504  else fFlatNotGauss = kFALSE;
505 
506  Bool_t newFormat = kFALSE;
507 
508  void* inpnode = NULL;
509 
510  inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
511  if( inpnode!=NULL )
512  newFormat = kTRUE; // new xml format
513 
514  void* varnode = NULL;
515  if( newFormat ){
516  // ------------- new format --------------------
517  // read input
519 
520  varnode = gTools().GetNextChild(inpnode);
521  }else
522  varnode = gTools().GetChild(trfnode);
523 
524  // Read the cumulative distribution
525 
526  TString varname, histname, classname;
527  UInt_t ivar;
528  while(varnode) {
529  if( gTools().HasAttr(varnode,"Name") )
530  gTools().ReadAttr(varnode, "Name", varname);
531  gTools().ReadAttr(varnode, "VarIndex", ivar);
532 
533  void* clsnode = gTools().GetChild( varnode);
534 
535  while(clsnode) {
536  void* pdfnode = gTools().GetChild( clsnode);
537  PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
538  pdfToRead->ReadXML(pdfnode); // pdfnode
539  // push_back PDF
540  fCumulativePDF.resize( ivar+1 );
541  fCumulativePDF[ivar].push_back(pdfToRead);
542  clsnode = gTools().GetNextChild(clsnode);
543  }
544 
545  varnode = gTools().GetNextChild(varnode);
546  }
547  SetCreated();
548 }
549 
550 ////////////////////////////////////////////////////////////////////////////////
551 /// Read the cumulative distribution
552 
553 void TMVA::VariableGaussTransform::ReadTransformationFromStream( std::istream& istr, const TString& classname)
554 {
555  Bool_t addDirStatus = TH1::AddDirectoryStatus();
556  TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
557  char buf[512];
558  istr.getline(buf,512);
559 
560  TString strvar, dummy;
561 
562  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
563  char* p = buf;
564  while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
565  if (*p=='#' || *p=='\0') {
566  istr.getline(buf,512);
567  continue; // if comment or empty line, read the next line
568  }
569  std::stringstream sstr(buf);
570  sstr >> strvar;
571 
572  if (strvar=="CumulativeHistogram") {
573  UInt_t type(0), ivar(0);
574  TString devnullS(""),hname("");
575  Int_t nbins(0);
576 
577  // coverity[tainted_data_argument]
578  sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
579 
580  Float_t *Binnings = new Float_t[nbins+1];
581  Float_t val;
582  istr >> devnullS; // read the line "BinBoundaries" ..
583  for (Int_t ibin=0; ibin<nbins+1; ibin++) {
584  istr >> val;
585  Binnings[ibin]=val;
586  }
587 
588  if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
589  if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
590 
591  TH1F * histToRead = fCumulativeDist[ivar][type];
592  if ( histToRead !=0 ) delete histToRead;
593  // recreate the cumulative histogram to be filled with the values read
594  histToRead = new TH1F( hname, hname, nbins, Binnings );
595  histToRead->SetDirectory(0);
596  fCumulativeDist[ivar][type]=histToRead;
597 
598  istr >> devnullS; // read the line "BinContent" ..
599  for (Int_t ibin=0; ibin<nbins; ibin++) {
600  istr >> val;
601  histToRead->SetBinContent(ibin+1,val);
602  }
603 
604  PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
605  // push_back PDF
606  fCumulativePDF.resize(ivar+1);
607  fCumulativePDF[ivar].resize(type+1);
608  fCumulativePDF[ivar][type] = pdf;
609  delete [] Binnings;
610  }
611 
612  // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
613  if (strvar=="Uniform") { // don't correct this spelling mistake
614  sstr >> fFlatNotGauss;
615  istr.getline(buf,512);
616  break;
617  }
618 
619  istr.getline(buf,512); // reading the next line
620  }
621  TH1::AddDirectory(addDirStatus);
622 
623  UInt_t classIdx=(classname=="signal")?0:1;
624  for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
625  PDF* src = fCumulativePDF[ivar][classIdx];
626  fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
627  }
628 
629  SetTMVAVersion(TMVA_VERSION(3,9,7));
630 
631  SetCreated();
632 }
633 
635 
636  Int_t bin = h->FindBin(x);
637  bin = TMath::Max(bin,1);
638  bin = TMath::Min(bin,h->GetNbinsX());
639 
640  Double_t cumulant;
641  Double_t x0, x1, y0, y1;
642  Double_t total = h->GetNbinsX()*fElementsperbin;
643  Double_t supmin = 0.5/total;
644 
645  x0 = h->GetBinLowEdge(TMath::Max(bin,1));
646  x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
647 
648  y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
649  y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
650 
651  if (bin == 0) {
652  y0 = supmin;
653  y1 = supmin;
654  }
655  if (bin == 1) {
656  y0 = supmin;
657  }
658  if (bin > h->GetNbinsX()) {
659  y0 = 1.-supmin;
660  y1 = 1.-supmin;
661  }
662  if (bin == h->GetNbinsX()) {
663  y1 = 1.-supmin;
664  }
665 
666  if (x0 == x1) {
667  cumulant = y1;
668  } else {
669  cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
670  }
671 
672  if (x <= h->GetBinLowEdge(1)){
673  cumulant = supmin;
674  }
675  if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
676  cumulant = 1-supmin;
677  }
678  return cumulant;
679 }
680 
681 
682 
683 ////////////////////////////////////////////////////////////////////////////////
684 /// prints the transformation
685 
687 {
688  Int_t cls = 0;
689  Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
690  cls++;
691 }
692 
693 ////////////////////////////////////////////////////////////////////////////////
694 /// creates the transformation function
695 ///
696 
697 void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
698  Int_t part, UInt_t trCounter, Int_t )
699 {
700  const UInt_t nvar = fGet.size();
701  UInt_t numDist = GetNClasses() + 1;
702  Int_t nBins = -1;
703  for (UInt_t icls=0; icls<numDist; icls++) {
704  for (UInt_t ivar=0; ivar<nvar; ivar++) {
705  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
706  if (nbin > nBins) nBins=nbin;
707  }
708  }
709 
710  // creates the gauss transformation function
711  if (part==1) {
712  fout << std::endl;
713  fout << " int nvar;" << std::endl;
714  fout << std::endl;
715  // declare variables
716  fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
717  fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
718  fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
719  fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
720  fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
721  }
722  if (part==2) {
723  fout << std::endl;
724  fout << "#include \"math.h\"" << std::endl;
725  fout << std::endl;
726  fout << "//_______________________________________________________________________" << std::endl;
727  fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
728  fout << "{" << std::endl;
729  fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
730  fout << " nvar=" << nvar << ";" << std::endl;
731  for (UInt_t icls=0; icls<numDist; icls++) {
732  for (UInt_t ivar=0; ivar<nvar; ivar++) {
733  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
734  fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
735  }
736  }
737 
738  // fill meat here
739  // loop over nvar , cls, loop over nBins
740  // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
741  for (UInt_t icls=0; icls<numDist; icls++) {
742  for (UInt_t ivar=0; ivar<nvar; ivar++) {
743  // Int_t idx = 0;
744  try{
745  // idx = fGet.at(ivar).second;
746  Char_t type = fGet.at(ivar).first;
747  if( type != 'v' ){
748  Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
749  }
750  }catch( std::out_of_range except ){
751  Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
752  }
753 
754 // Double_t xmn=Variables()[idx].GetMin();
755 // Double_t xmx=Variables()[idx].GetMax();
756  Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
757  Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
758 
759  fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
760  fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
761  for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
762  fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
763  fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
764 
765  }
766  }
767  }
768  fout << "}" << std::endl;
769  fout << std::endl;
770  fout << "//_______________________________________________________________________" << std::endl;
771  fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
772  fout << "{" << std::endl;
773  fout << " // Gauss/Uniform transformation" << std::endl;
774  fout << " int cls=clsIn;" << std::endl;
775  fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
776  fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
777  fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
778  fout << " }"<< std::endl;
779 
780  fout << " // copy the variables which are going to be transformed "<< std::endl;
781  VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
782  fout << " static std::vector<double> dv; "<< std::endl;
783  fout << " dv.resize(nvar); "<< std::endl;
784  fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
785  fout << " "<< std::endl;
786  fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
787  fout << " double cumulant; "<< std::endl;
788  fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
789  fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
790  fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
791  fout << " int ibin=0; "<< std::endl;
792  fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
793  fout << " "<< std::endl;
794  fout << " if (ibin<0) { ibin=0;} "<< std::endl;
795  fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
796  fout << " int nextbin = ibin; "<< std::endl;
797  fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
798  fout << " nextbin++; "<< std::endl;
799  fout << " else "<< std::endl;
800  fout << " nextbin--; "<< std::endl;
801  fout << " "<< std::endl;
802  fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
803  fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
804  fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
805  fout << " "<< std::endl;
806  fout << " "<< std::endl;
807  fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
808  fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
809  fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
810  fout << " else { "<< std::endl;
811  fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
812  fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
813  fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
814  fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
815  fout << " double inverf=0., stp=1. ; "<< std::endl;
816  fout << " while (stp >1.e-10){; "<< std::endl;
817  fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
818  fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
819  fout << " else inverf += stp; "<< std::endl;
820  fout << " } ; "<< std::endl;
821  fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
822  fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
823  fout << " } "<< std::endl;
824  fout << " } "<< std::endl;
825  fout << " // copy the transformed variables back "<< std::endl;
826  fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
827  fout << "} "<< std::endl;
828  }
829 }
void GetCumulativeDist(const std::vector< Event * > &)
fill the cumulative distributions
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3478
Double_t ErfInverse(Double_t x)
returns the inverse error function x must be <-1
Definition: TMath.cxx:206
virtual const Event * Transform(const Event *const, Int_t cls) const
apply the Gauss transformation
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:952
virtual ~VariableGaussTransform(void)
destructor
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4629
float Float_t
Definition: RtypesCore.h:53
virtual void SetDirectory(TDirectory *dir)
By default when an histogram is created, it is added to the list of histogram objects in the current ...
Definition: TH1.cxx:8266
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)=0
getinput and setoutput equivalent
TH1 * h
Definition: legend2.C:5
ClassImp(TMVA::VariableGaussTransform) TMVA
constructor can only be applied one after the other when they are created.
virtual void AttachXMLTo(void *parent)=0
create XML description the transformation (write out info of selected variables)
THist< 1, float > TH1F
Definition: THist.h:315
Basic string class.
Definition: TString.h:137
static Bool_t AddDirectoryStatus()
static function: cannot be inlined on Windows/NT
Definition: TH1.cxx:709
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:570
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
int nbins[3]
virtual Int_t GetNbinsX() const
Definition: TH1.h:296
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:308
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1231
Tools & gTools()
Definition: Tools.cxx:79
Double_t x[n]
Definition: legend1.C:17
virtual Double_t GetBinLowEdge(Int_t bin) const
return bin lower edge for 1D historam Better to use h1.GetXaxis().GetBinLowEdge(bin) ...
Definition: TH1.cxx:8481
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
Definition: PDF.h:71
void ReadTransformationFromStream(std::istream &, const TString &)
Read the cumulative distribution.
virtual void ReadFromXML(void *trfnode)=0
Read the input variables from the XML node.
UInt_t GetNVariables() const
accessor to the number of variables
Definition: Event.cxx:303
TString StringFromDouble(Double_t d)
string tools
Definition: Tools.cxx:1241
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Bool_t PrepareTransformation(const std::vector< Event * > &)
calculate the cumulative distributions
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition: TMath.cxx:187
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8543
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:295
void Print(std::ostream &o) const
print method
Definition: Event.cxx:346
static unsigned int total
#define TMVA_VERSION(a, b, c)
Definition: Version.h:48
Double_t OldCumulant(Float_t x, TH1 *h) const
static const double x1[5]
double f(double x)
double Double_t
Definition: RtypesCore.h:55
int type
Definition: TGX11.cxx:120
static RooMathCoreReg dummy
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
The TH1 histogram class.
Definition: TH1.h:80
UInt_t GetClass() const
Definition: Event.h:86
virtual void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part, UInt_t trCounter, Int_t cls)
creates the transformation function
char Char_t
Definition: RtypesCore.h:29
Abstract ClassifierFactory template that handles arbitrary types.
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
const char * GetName() const
Returns name of object.
Definition: PDF.h:124
#define NULL
Definition: Rtypes.h:82
virtual void ReadFromXML(void *trfnode)
Read the transformation matrices from the xml node.
void CleanUpCumulativeArrays(TString opt="ALL")
clean up of cumulative arrays
virtual void AttachXMLTo(void *parent)
create XML description of Gauss transformation
virtual const Event * InverseTransform(const Event *const, Int_t cls) const
apply the inverse Gauss or inverse uniform transformation
static void output(int code)
Definition: gifencode.c:226
const Bool_t kTRUE
Definition: Rtypes.h:91
float value
Definition: math.cpp:443
const int except
virtual void PrintTransformation(std::ostream &o)
prints the transformation
gr SetName("gr")
Definition: math.cpp:60
void WriteTransformationToStream(std::ostream &) const