Logo ROOT   6.10/09
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableTransformBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
32 #include "TMVA/VariableInfo.h"
36 
37 #include "TMVA/Config.h"
38 #include "TMVA/DataSetInfo.h"
39 #include "TMVA/MsgLogger.h"
40 #include "TMVA/Ranking.h"
41 #include "TMVA/Tools.h"
42 #include "TMVA/Types.h"
43 #include "TMVA/VariableInfo.h"
44 #include "TMVA/Version.h"
46 #include "TMVA/MsgLogger.h"
47 
48 #include "TH1.h"
49 #include "TH2.h"
50 #include "THashTable.h"
51 #include "TList.h"
52 #include "TMath.h"
53 #include "TProfile.h"
54 #include "TVectorD.h"
55 
56 #include <algorithm>
57 #include <cassert>
58 #include <exception>
59 #include <iomanip>
60 #include <stdexcept>
61 #include <set>
62 
63 ////////////////////////////////////////////////////////////////////////////////
64 /// create variable transformations
65 
66 namespace TMVA {
67 void CreateVariableTransforms( const TString& trafoDefinitionIn,
68  TMVA::DataSetInfo& dataInfo,
69  TMVA::TransformationHandler& transformationHandler,
71 {
72  TString trafoDefinition(trafoDefinitionIn);
73  if (trafoDefinition == "None") return; // no transformations
74 
75  // workaround for transformations to complicated to be handled by makeclass
76  // count number of transformations with incomplete set of variables
77  TString trafoDefinitionCheck(trafoDefinitionIn);
78  int npartial = 0, ntrafo=0;
79  for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
80  TString ch = trafoDefinition(pos,1);
81  if ( ch == "(" ) npartial++;
82  if ( ch == "+" || ch == ",") ntrafo++;
83  }
84  if (npartial>1) {
85  log << kWARNING << "The use of multiple partial variable transformations during the application phase can be properly invoked via the \"Reader\", but it is not yet implemented in \"MakeClass\", the creation mechanism for standalone C++ application classes. The standalone C++ class produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! The transformation in question is: " << trafoDefinitionIn << Endl; // ToDo make info and do not write the standalone class
86  //
87  // this does not work since this function is static
88  // fDisableWriting=true; // disable creation of stand-alone class
89  // ToDo we need to tell the transformation that it cannot write itself
90  }
91  // workaround end
92 
93  Int_t parenthesisCount = 0;
94  for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
95  TString ch = trafoDefinition(position,1);
96  if (ch == "(") ++parenthesisCount;
97  else if (ch == ")") --parenthesisCount;
98  else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
99  }
100 
101  TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
102  TListIter trIt(trList);
103  while (TObjString* os = (TObjString*)trIt()) {
104  TString tdef = os->GetString();
105  Int_t idxCls = -1;
106 
107  TString variables = "";
108  if (tdef.Contains("(")) { // contains selection of variables
109  Ssiz_t parStart = tdef.Index( "(" );
110  Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
111 
112  variables = tdef(parStart,parLen);
113  tdef.Remove(parStart,parLen);
114  variables.Remove(parLen-1,1);
115  variables.Remove(0,1);
116  }
117 
118  TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
119  TListIter trClsIt(trClsList);
120  if (trClsList->GetSize() < 1) log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
121  const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
122 
123  if (trClsList->GetEntries() > 1) {
124  TString trCls = "AllClasses";
125  ClassInfo *ci = NULL;
126  trCls = ((TObjString*)trClsList->At(1))->GetString();
127  if (trCls != "AllClasses") {
128  ci = dataInfo.GetClassInfo( trCls );
129  if (ci == NULL)
130  log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
131  << trName << ", please check." << Endl;
132  else
133  idxCls = ci->GetNumber();
134  }
135  }
136 
137  VariableTransformBase* transformation = NULL;
138  if (trName == "I" || trName == "Ident" || trName == "Identity") {
139  if (variables.Length() == 0) variables = "_V_";
140  transformation = new VariableIdentityTransform( dataInfo);
141  }
142  else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
143  if (variables.Length() == 0) variables = "_V_";
144  transformation = new VariableDecorrTransform( dataInfo);
145  }
146  else if (trName == "P" || trName == "PCA") {
147  if (variables.Length() == 0) variables = "_V_";
148  transformation = new VariablePCATransform ( dataInfo);
149  }
150  else if (trName == "U" || trName == "Uniform") {
151  if (variables.Length() == 0) variables = "_V_,_T_";
152  transformation = new VariableGaussTransform ( dataInfo, "Uniform" );
153  }
154  else if (trName == "G" || trName == "Gauss") {
155  if (variables.Length() == 0) variables = "_V_";
156  transformation = new VariableGaussTransform ( dataInfo);
157  }
158  else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
159  if (variables.Length() == 0) variables = "_V_,_T_";
160  transformation = new VariableNormalizeTransform( dataInfo);
161  }
162  else log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "<ProcessOptions> Variable transform '"
163  << trName << "' unknown." << Endl;
164 
165 
166  if (transformation) {
167  ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
168  if (clsInfo )
169  log << kHEADER <<Form("[%s] : ",dataInfo.GetName())
170  << "Create Transformation \"" << trName << "\" with reference class "
171  << clsInfo->GetName() << "=("<< idxCls <<")"<<Endl << Endl;
172 else
173  log << kHEADER <<Form("[%s] : ",dataInfo.GetName())
174  << "Create Transformation \"" << trName << "\" with events from all classes." << Endl << Endl;
175 
176  transformation->SelectInput( variables );
177  transformationHandler.AddTransformation(transformation, idxCls);
178  }
179  }
180  return;
181 }
182 
183 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Collectable string class.
Definition: TObjString.h:28
virtual Int_t GetEntries() const
Definition: TCollection.h:86
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:587
Basic string class.
Definition: TString.h:129
int Int_t
Definition: RtypesCore.h:41
#define NULL
Definition: RtypesCore.h:88
Iterator of linked list.
Definition: TList.h:183
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:630
Class that contains all the information of a class.
Definition: ClassInfo.h:49
Class that contains all the data information.
Definition: DataSetInfo.h:60
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
A doubly linked list.
Definition: TList.h:43
Linear interpolation class.
ClassInfo * GetClassInfo(Int_t clNum) const
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:388
Linear interpolation class.
Class that contains all the data information.
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:315
Tools & gTools()
Gaussian Transformation of input variables.
Linear interpolation class.
TString & Remove(Ssiz_t pos)
Definition: TString.h:621
int Ssiz_t
Definition: RtypesCore.h:63
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1308
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:572
UInt_t GetNumber() const
Definition: ClassInfo.h:65
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Abstract ClassifierFactory template that handles arbitrary types.
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:412
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation ...
virtual Int_t GetSize() const
Definition: TCollection.h:89
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
double log(double)
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)