Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
VariableTransform.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Omar Zapata
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : VariableTransformBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * MPI-K Heidelberg, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
31#include "TMVA/VariableInfo.h"
35
36#include "TMVA/Config.h"
37#include "TMVA/DataSetInfo.h"
38#include "TMVA/MsgLogger.h"
39#include "TMVA/Ranking.h"
40#include "TMVA/Tools.h"
41#include "TMVA/Types.h"
42#include "TMVA/Version.h"
44
45#include "THashTable.h"
46#include "TList.h"
47#include "TObjString.h"
48
49#include <algorithm>
50#include <cassert>
51#include <exception>
52#include <stdexcept>
53#include <set>
54
55////////////////////////////////////////////////////////////////////////////////
56/// create variable transformations
57
58namespace TMVA {
59void CreateVariableTransforms(const TString& trafoDefinitionIn,
60 TMVA::DataSetInfo& dataInfo,
61 TMVA::TransformationHandler& transformationHandler,
62 TMVA::MsgLogger& log)
63{
64 TString trafoDefinition(trafoDefinitionIn);
65 if (trafoDefinition == "None") return; // no transformations
66
67 // workaround for transformations to complicated to be handled by makeclass
68 // count number of transformations with incomplete set of variables
69 TString trafoDefinitionCheck(trafoDefinitionIn);
70 int npartial = 0;
71 for (Int_t pos = 0, siz = trafoDefinition.Sizeof(); pos < siz; ++pos) {
72 TString ch = trafoDefinition(pos,1);
73 if ( ch == "(" ) npartial++;
74 }
75 if (npartial>1) {
76 log << kWARNING
77 << "The use of multiple partial variable transformations during the "
78 "application phase can be properly invoked via the \"Reader\", but "
79 "it is not yet implemented in \"MakeClass\", the creation mechanism "
80 "for standalone C++ application classes. The standalone C++ class "
81 "produced by this training job is thus INCOMPLETE AND MUST NOT BE USED! "
82 "The transformation in question is: " << trafoDefinitionIn << Endl;
83 // ToDo make info and do not write the standalone class
84 //
85 // this does not work since this function is static
86 // fDisableWriting=true; // disable creation of stand-alone class
87 // ToDo we need to tell the transformation that it cannot write itself
88 }
89 // workaround end
90
91 Int_t parenthesisCount = 0;
92 for (Int_t position = 0, size = trafoDefinition.Sizeof(); position < size; ++position) {
93 TString ch = trafoDefinition(position,1);
94 if (ch == "(") ++parenthesisCount;
95 else if (ch == ")") --parenthesisCount;
96 else if (ch == "," && parenthesisCount == 0) trafoDefinition.Replace(position,1,'+');
97 }
98
99 TList* trList = gTools().ParseFormatLine( trafoDefinition, "+" );
100 TListIter trIt(trList);
101 while (TObjString* os = (TObjString*)trIt()) {
102 TString tdef = os->GetString();
103 Int_t idxCls = -1;
104
105 TString variables = "";
106 if (tdef.Contains("(")) { // contains selection of variables
107 Ssiz_t parStart = tdef.Index( "(" );
108 Ssiz_t parLen = tdef.Index( ")", parStart )-parStart+1;
109
110 variables = tdef(parStart,parLen);
111 tdef.Remove(parStart,parLen);
112 variables.Remove(parLen-1,1);
113 variables.Remove(0,1);
114 }
115
116 TList* trClsList = gTools().ParseFormatLine( tdef, "_" ); // split entry to get trf-name and class-name
117 TListIter trClsIt(trClsList);
118 if (trClsList->GetSize() < 1)
119 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Incorrect transformation string provided." << Endl;
120 const TString& trName = ((TObjString*)trClsList->At(0))->GetString();
121
122 if (trClsList->GetEntries() > 1) {
123 TString trCls = "AllClasses";
124 ClassInfo *ci = NULL;
125 trCls = ((TObjString*)trClsList->At(1))->GetString();
126 if (trCls != "AllClasses") {
127 ci = dataInfo.GetClassInfo( trCls );
128 if (ci == NULL)
129 log << kFATAL <<Form("Dataset[%s] : ",dataInfo.GetName())<< "Class " << trCls << " not known for variable transformation "
130 << trName << ", please check." << Endl;
131 else
132 idxCls = ci->GetNumber();
133 }
134 }
135
136 VariableTransformBase* transformation = NULL;
137 if (trName == "I" || trName == "Ident" || trName == "Identity") {
138 if (variables.Length() == 0) variables = "_V_";
139 transformation = new VariableIdentityTransform(dataInfo);
140 }
141 else if (trName == "D" || trName == "Deco" || trName == "Decorrelate") {
142 if (variables.Length() == 0) variables = "_V_";
143 transformation = new VariableDecorrTransform(dataInfo);
144 }
145 else if (trName == "P" || trName == "PCA") {
146 if (variables.Length() == 0) variables = "_V_";
147 transformation = new VariablePCATransform(dataInfo);
148 }
149 else if (trName == "U" || trName == "Uniform") {
150 if (variables.Length() == 0) variables = "_V_,_T_";
151 transformation = new VariableGaussTransform(dataInfo, "Uniform" );
152 }
153 else if (trName == "G" || trName == "Gauss") {
154 if (variables.Length() == 0) variables = "_V_";
155 transformation = new VariableGaussTransform(dataInfo);
156 }
157 else if (trName == "N" || trName == "Norm" || trName == "Normalise" || trName == "Normalize") {
158 if (variables.Length() == 0) variables = "_V_,_T_";
159 transformation = new VariableNormalizeTransform(dataInfo);
160 }
161 else
162 log << kFATAL << Form("Dataset[%s] : ",dataInfo.GetName())
163 << "<ProcessOptions> Variable transform '"
164 << trName << "' unknown." << Endl;
165
166
167 if (transformation) {
168 ClassInfo* clsInfo = dataInfo.GetClassInfo(idxCls);
169 if (clsInfo)
170 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
171 << "Create Transformation \"" << trName << "\" with reference class "
172 << clsInfo->GetName() << "=("<< idxCls <<")" << Endl << Endl;
173 else
174 log << kHEADER << Form("[%s] : ",dataInfo.GetName())
175 << "Create Transformation \"" << trName << "\" with events from all classes."
176 << Endl << Endl;
177
178 transformation->SelectInput(variables);
179 transformationHandler.AddTransformation(transformation, idxCls);
180 }
181 }
182}
183
184}
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2467
virtual Int_t GetEntries() const
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Iterator of linked list.
Definition TList.h:191
A doubly linked list.
Definition TList.h:38
TObject * At(Int_t idx) const override
Returns the object at position idx. Returns 0 if idx is out of range.
Definition TList.cxx:357
Class that contains all the information of a class.
Definition ClassInfo.h:49
UInt_t GetNumber() const
Definition ClassInfo.h:65
Class that contains all the data information.
Definition DataSetInfo.h:62
virtual const char * GetName() const
Returns name of object.
Definition DataSetInfo.h:71
ClassInfo * GetClassInfo(Int_t clNum) const
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:401
Class that contains all the data information.
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
Linear interpolation class.
Gaussian Transformation of input variables.
Linear interpolation class.
Linear interpolation class.
virtual void SelectInput(const TString &inputVariables, Bool_t putIntoVariables=kFALSE)
select the variables/targets/spectators which serve as input to the transformation
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Collectable string class.
Definition TObjString.h:28
Basic string class.
Definition TString.h:139
TString & Replace(Ssiz_t pos, Ssiz_t n, const char *s)
Definition TString.h:694
TString & Remove(Ssiz_t pos)
Definition TString.h:685
virtual Int_t Sizeof() const
Returns size string will occupy on I/O buffer.
Definition TString.cxx:1379
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:636
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:651
create variable transformations
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
void variables(TString dataset, TString fin="TMVA.root", TString dirName="InputVariables_Id", TString title="TMVA Input Variables", Bool_t isRegression=kFALSE, Bool_t useTMVAStyle=kTRUE)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148