Logo ROOT  
Reference Guide
VariableTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableTransformBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * MPI-K Heidelberg, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
31#include "TMVA/VariableInfo.h"
35
36#include "TMVA/Config.h"
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/Ranking.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
44
45#include "THashTable.h"
46#include "TList.h"
47#include "TObjString.h"
48
49#include <algorithm>
50#include <cassert>
51#include <exception>
52#include <stdexcept>
53#include <set>
54
55////////////////////////////////////////////////////////////////////////////////
56/// create variable transformations
57
58namespace TMVA {
59void CreateVariableTransforms(const TString& trafoDefinitionIn,
60 TMVA::DataSetInfo& dataInfo,
61 TMVA::TransformationHandler& transformationHandler,
63{
64 TString trafoDefinition(trafoDefinitionIn);
65 if (trafoDefinition == "None") return; // no transformations
66
67 // workaround for transformations to complicated to be handled by makeclass
68 // count number of transformations with incomplete set of variables
69 TString trafoDefinitionCheck(trafoDefinitionIn);
70 int npartial = 0, ntrafo = 0;
71 for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
72 TString ch = trafoDefinition(pos,1);
73 if ( ch == "(" ) npartial++;
74 if ( ch == "+" || ch == ",") ntrafo++;
75 }
76 if (npartial>1) {
77 log << kWARNING
78 << "The use of multiple partial variable transformations during the "
79 "application phase can be properly invoked via the \"Reader\", but "
80 "it is not yet implemented in \"MakeClass\", the creation mechanism "
81 "for standalone C++ application classes. The standalone C++ class "
82 "produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! "
83 "The transformation in question is: " << trafoDefinitionIn << Endl;
84 // ToDo make info and do not write the standalone class
85 //
86 // this does not work since this function is static
87 // fDisableWriting=true; // disable creation of stand-alone class
88 // ToDo we need to tell the transformation that it cannot write itself
89 }
90 // workaround end
91
92 Int_t parenthesisCount = 0;
93 for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
94 TString ch = trafoDefinition(position,1);
95 if (ch == "(") ++parenthesisCount;
96 else if (ch == ")") --parenthesisCount;
97 else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
98 }
99
100 TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
101 TListIter trIt(trList);
102 while (TObjString* os = (TObjString*)trIt()) {
103 TString tdef = os->GetString();
104 Int_t idxCls = -1;
105
106 TString variables = "";
107 if (tdef.Contains("(")) { // contains selection of variables
108 Ssiz_t parStart = tdef.Index( "(" );
109 Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
110
111 variables = tdef(parStart,parLen);
112 tdef.Remove(parStart,parLen);
113 variables.Remove(parLen-1,1);
114 variables.Remove(0,1);
115 }
116
117 TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
118 TListIter trClsIt(trClsList);
119 if (trClsList->GetSize() < 1)
120 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
121 const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
122
123 if (trClsList->GetEntries() > 1) {
124 TString trCls = "AllClasses";
125 ClassInfo *ci = NULL;
126 trCls = ((TObjString*)trClsList->At(1))->GetString();
127 if (trCls != "AllClasses") {
128 ci = dataInfo.GetClassInfo( trCls );
129 if (ci == NULL)
130 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
131 << trName << ", please check." << Endl;
132 else
133 idxCls = ci->GetNumber();
134 }
135 }
136
137 VariableTransformBase* transformation = NULL;
138 if (trName == "I" || trName == "Ident" || trName == "Identity") {
139 if (variables.Length() == 0) variables = "_V_";
140 transformation = new VariableIdentityTransform(dataInfo);
141 }
142 else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
143 if (variables.Length() == 0) variables = "_V_";
144 transformation = new VariableDecorrTransform(dataInfo);
145 }
146 else if (trName == "P" || trName == "PCA") {
147 if (variables.Length() == 0) variables = "_V_";
148 transformation = new VariablePCATransform(dataInfo);
149 }
150 else if (trName == "U" || trName == "Uniform") {
151 if (variables.Length() == 0) variables = "_V_,_T_";
152 transformation = new VariableGaussTransform(dataInfo, "Uniform" );
153 }
154 else if (trName == "G" || trName == "Gauss") {
155 if (variables.Length() == 0) variables = "_V_";
156 transformation = new VariableGaussTransform(dataInfo);
157 }
158 else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
159 if (variables.Length() == 0) variables = "_V_,_T_";
160 transformation = new VariableNormalizeTransform(dataInfo);
161 }
162 else
163 log << kFATAL << Form("Dataset[%s] : ",dataInfo.GetName())
164 << "<ProcessOptions> Variable transform '"
165 << trName << "' unknown." << Endl;
166
167
168 if (transformation) {
169 ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
170 if (clsInfo)
171 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
172 << "Create Transformation \"" << trName << "\" with reference class "
173 << clsInfo->GetName() << "=("<< idxCls <<")" << Endl << Endl;
174 else
175 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
176 << "Create Transformation \"" << trName << "\" with events from all classes."
177 << Endl << Endl;
178
179 transformation->SelectInput(variables);
180 transformationHandler.AddTransformation(transformation, idxCls);
181 }
182 }
183}
184
185}
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition: TString.cxx:2447
virtual Int_t GetEntries() const
Definition: TCollection.h:179
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:184
Iterator of linked list.
Definition: TList.h:191
A doubly linked list.
Definition: TList.h:38
TObject * At(Int_t idx) const override
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:357
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Class that contains all the data information.
Definition: DataSetInfo.h:62
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:71
ClassInfo * GetClassInfo(Int_t clNum) const
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:57
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:401
Class that contains all the data information.
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
Linear interpolation class.
Gaussian Transformation of input variables.
Linear interpolation class.
Linear interpolation class.
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation
@ kHEADER
Definition: Types.h:63
@ kWARNING
Definition: Types.h:59
@ kFATAL
Definition: Types.h:61
const char * GetName() const override
Returns name of object.
Definition: TNamed.h:47
Collectable string class.
Definition: TObjString.h:28
Basic string class.
Definition: TString.h:136
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition: TString.h:682
TString & Remove(Ssiz_t pos)
Definition: TString.h:673
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition: TString.cxx:1359
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:624
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:639
RVec< PromoteType< T > > log(const RVec< T > &v)
Definition: RVec.hxx:1748
create variable transformations
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148