Logo ROOT  
Reference Guide
MethodDL.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski, Saurav Shekhar
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDL *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network Method *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#ifndef ROOT_TMVA_MethodDL
29#define ROOT_TMVA_MethodDL
30
31//////////////////////////////////////////////////////////////////////////
32// //
33// MethodDL //
34// //
35// Method class for all Deep Learning Networks //
36// //
37//////////////////////////////////////////////////////////////////////////
38
39#include "TString.h"
40
41#include "TMVA/MethodBase.h"
42#include "TMVA/Types.h"
43
45
46//#ifdef R__HAS_TMVACPU
48//#endif
49
50#ifdef R__HAS_TMVAGPU
52#ifdef R__HAS_CUDNN
54#endif
55#endif
56
57#include "TMVA/DNN/Functions.h"
58#include "TMVA/DNN/DeepNet.h"
59
60#include <vector>
61
62namespace TMVA {
63
64/*! All of the options that can be specified in the training string */
66 size_t batchSize;
69 size_t maxEpochs;
76 std::vector<Double_t> dropoutProbabilities;
78};
79
80
81class MethodDL : public MethodBase {
82
83private:
84 // Key-Value vector type, contining the values for the training options
85 using KeyValueVector_t = std::vector<std::map<TString, TString>>;
86// #ifdef R__HAS_TMVAGPU
87// #ifdef R__HAS_CUDNN
88// using ArchitectureImpl_t = TMVA::DNN::TCudnn<Float_t>;
89// #else
90// using ArchitectureImpl_t = TMVA::DNN::TCuda<Float_t>;
91// #endif
92// #else
93// do not use arch GPU for evaluation. It is too slow for batch size=1
95// #else
96// using ArchitectureImpl_t = TMVA::DNN::TReference<Float_t>;
97// #endif
98//#endif
104
105 /*! The option handling methods */
106 void DeclareOptions();
107 void ProcessOptions();
108
109 void Init();
110
111 // Function to parse the layout of the input
112 void ParseInputLayout();
113 void ParseBatchLayout();
114
115 /*! After calling the ProcesOptions(), all of the options are parsed,
116 * so using the parsed options, and given the architecture and the
117 * type of the layers, we build the Deep Network passed as
118 * a reference in the function. */
119 template <typename Architecture_t, typename Layer_t>
121 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets);
122
123 template <typename Architecture_t, typename Layer_t>
125 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
126
127 template <typename Architecture_t, typename Layer_t>
129 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
130
131 template <typename Architecture_t, typename Layer_t>
133 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
134 TString delim);
135
136 template <typename Architecture_t, typename Layer_t>
138 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
139 TString delim);
140
141 template <typename Architecture_t, typename Layer_t>
143 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
144 TString delim);
145
146
147 template <typename Architecture_t, typename Layer_t>
149 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
150
151 template <typename Architecture_t, typename Layer_t>
153 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
154
155 /// train of deep neural network using the defined architecture
156 template <typename Architecture_t>
157 void TrainDeepNet();
158
159 /// perform prediction of the deep neural network
160 /// using batches (called by GetMvaValues)
161 template <typename Architecture_t>
162 std::vector<Double_t> PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress);
163
164 /// parce the validation string and return the number of event data used for validation
166
167 // cudnn implementation needs this format
168 /** Contains the batch size (no. of images in the batch), input depth (no. channels)
169 * and furhter input dimensios of the data (image height, width ...)*/
170 std::vector<size_t> fInputShape;
171
172 // The size of the batch, i.e. the number of images that are contained in the batch, is either set to be the depth
173 // or the height of the batch
174 size_t fBatchDepth; ///< The depth of the batch used to train the deep net.
175 size_t fBatchHeight; ///< The height of the batch used to train the deep net.
176 size_t fBatchWidth; ///< The width of the batch used to train the deep net.
177
178 size_t fRandomSeed; ///<The random seed used to initialize the weights and shuffling batches (default is zero)
179
180 DNN::EInitialization fWeightInitialization; ///< The initialization method
181 DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions
182 DNN::ELossFunction fLossFunction; ///< The loss function
183
184 TString fInputLayoutString; ///< The string defining the layout of the input
185 TString fBatchLayoutString; ///< The string defining the layout of the batch
186 TString fLayoutString; ///< The string defining the layout of the deep net
187 TString fErrorStrategy; ///< The string defining the error strategy for training
188 TString fTrainingStrategyString; ///< The string defining the training strategy
189 TString fWeightInitializationString; ///< The string defining the weight initialization method
190 TString fArchitectureString; ///< The string defining the architecure: CPU or GPU
191 TString fNumValidationString; ///< The string defining the number (or percentage) of training data used for validation
193 bool fBuildNet; ///< Flag to control whether to build fNet, the stored network used for the evaluation
194
195 KeyValueVector_t fSettings; ///< Map for the training strategy
196 std::vector<TTrainingSettings> fTrainingSettings; ///< The vector defining each training strategy
197
198 TensorImpl_t fXInput; // input tensor used to evaluate fNet
199 HostBufferImpl_t fXInputBuffer; // input hist buffer corresponding to X (needed for GPU implementation)
200 std::unique_ptr<MatrixImpl_t> fYHat; // output prediction matrix of fNet
201 std::unique_ptr<DeepNetImpl_t> fNet;
202
203
204 ClassDef(MethodDL, 0);
205
206protected:
207 // provide a help message
208 void GetHelpMessage() const;
209
210 virtual std::vector<Double_t> GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress);
211
212
213public:
214 /*! Constructor */
215 MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption);
216
217 /*! Constructor */
218 MethodDL(DataSetInfo &theData, const TString &theWeightFile);
219
220 /*! Virtual Destructor */
221 virtual ~MethodDL();
222
223 /*! Function for parsing the training settings, provided as a string
224 * in a key-value form. */
225 KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim);
226
227 /*! Check the type of analysis the deep learning network can do */
228 Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);
229
230 /*! Methods for training the deep learning network */
231 void Train();
232
233 Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);
234 virtual const std::vector<Float_t>& GetRegressionValues();
235 virtual const std::vector<Float_t>& GetMulticlassValues();
236
237 /*! Methods for writing and reading weights */
239 void AddWeightsXMLTo(void *parent) const;
240 void ReadWeightsFromXML(void *wghtnode);
241 void ReadWeightsFromStream(std::istream &);
242
243 /* Create ranking */
244 const Ranking *CreateRanking();
245
246 /* Getters */
247 size_t GetInputDepth() const { return fInputShape[1]; } //< no. of channels for an image
248 size_t GetInputHeight() const { return fInputShape[2]; }
249 size_t GetInputWidth() const { return fInputShape[3]; }
250 size_t GetInputDim() const { return fInputShape.size() - 2; }
251 std::vector<size_t> GetInputShape() const { return fInputShape; }
252
253 size_t GetBatchSize() const { return fInputShape[0]; }
254 size_t GetBatchDepth() const { return fBatchDepth; }
255 size_t GetBatchHeight() const { return fBatchHeight; }
256 size_t GetBatchWidth() const { return fBatchWidth; }
257
258 const DeepNetImpl_t & GetDeepNet() const { return *fNet; }
259
263
271
272 const std::vector<TTrainingSettings> &GetTrainingSettings() const { return fTrainingSettings; }
273 std::vector<TTrainingSettings> &GetTrainingSettings() { return fTrainingSettings; }
276
277 /** Setters */
278 void SetInputDepth (int inputDepth) { fInputShape[1] = inputDepth; }
279 void SetInputHeight(int inputHeight) { fInputShape[2] = inputHeight; }
280 void SetInputWidth (int inputWidth) { fInputShape[3] = inputWidth; }
281 void SetInputShape (std::vector<size_t> inputShape) { fInputShape = std::move(inputShape); }
282
283 void SetBatchSize (size_t batchSize) { fInputShape[0] = batchSize; }
284 void SetBatchDepth (size_t batchDepth) { fBatchDepth = batchDepth; }
285 void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
286 void SetBatchWidth (size_t batchWidth) { fBatchWidth = batchWidth; }
287
289 {
290 fWeightInitialization = weightInitialization;
291 }
292 void SetOutputFunction (DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; }
293 void SetErrorStrategyString (TString errorStrategy) { fErrorStrategy = errorStrategy; }
294 void SetTrainingStrategyString (TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; }
295 void SetWeightInitializationString(TString weightInitializationString)
296 {
297 fWeightInitializationString = weightInitializationString;
298 }
299 void SetArchitectureString (TString architectureString) { fArchitectureString = architectureString; }
300 void SetLayoutString (TString layoutString) { fLayoutString = layoutString; }
301};
302
303} // namespace TMVA
304
305#endif
unsigned int UInt_t
Definition: RtypesCore.h:42
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
#define ClassDef(name, id)
Definition: Rtypes.h:326
int type
Definition: TGX11.cxx:120
The TCpu architecture class.
Definition: Cpu.h:63
AReal Scalar_t
Definition: Cpu.h:68
TCpuTensor< AReal > Tensor_t
Definition: Cpu.h:69
TCpuBuffer< AReal > HostBuffer_t
Definition: Cpu.h:71
TCpuMatrix< AReal > Matrix_t
Definition: Cpu.h:70
Generic Deep Neural Network class.
Definition: DeepNet.h:72
Class that contains all the data information.
Definition: DataSetInfo.h:60
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
KeyValueVector_t & GetKeyValueSettings()
Definition: MethodDL.h:275
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:101
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:175
void GetHelpMessage() const
Definition: MethodDL.cxx:2314
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:182
size_t GetInputDim() const
Definition: MethodDL.h:250
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodDL.cxx:1987
TString GetErrorStrategyString() const
Definition: MethodDL.h:267
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition: MethodDL.h:170
void SetErrorStrategyString(TString errorStrategy)
Definition: MethodDL.h:293
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:186
std::vector< TTrainingSettings > & GetTrainingSettings()
Definition: MethodDL.h:273
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:278
std::vector< size_t > GetInputShape() const
Definition: MethodDL.h:251
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:200
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1631
size_t GetBatchHeight() const
Definition: MethodDL.h:255
TString GetTrainingStrategyString() const
Definition: MethodDL.h:268
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class.
Definition: MethodDL.cxx:2013
void ParseRnnLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:930
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:189
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:767
TensorImpl_t fXInput
Definition: MethodDL.h:198
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:178
TString fArchitectureString
The string defining the architecure: CPU or GPU.
Definition: MethodDL.h:190
void Init()
default initializations
Definition: MethodDL.cxx:448
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1010
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1155
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:272
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:261
void ParseLstmLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate lstm layer.
Definition: MethodDL.cxx:991
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:588
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:265
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:280
void SetArchitectureString(TString architectureString)
Definition: MethodDL.h:299
void ProcessOptions()
Definition: MethodDL.cxx:226
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:199
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:176
size_t GetInputDepth() const
Definition: MethodDL.h:247
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodDL.cxx:1949
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:201
TString GetWeightInitializationString() const
Definition: MethodDL.h:269
TString GetInputLayoutString() const
Definition: MethodDL.h:264
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:285
size_t GetInputHeight() const
Definition: MethodDL.h:248
TString GetArchitectureString() const
Definition: MethodDL.h:270
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:487
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:889
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2302
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2122
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:191
const KeyValueVector_t & GetKeyValueSettings() const
Definition: MethodDL.h:274
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:85
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:181
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:180
void SetOutputFunction(DNN::EOutputFunction outputFunction)
Definition: MethodDL.h:292
size_t GetBatchDepth() const
Definition: MethodDL.h:254
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:196
size_t GetInputWidth() const
Definition: MethodDL.h:249
void SetInputShape(std::vector< size_t > inputShape)
Definition: MethodDL.h:281
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:262
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:185
void SetWeightInitializationString(TString weightInitializationString)
Definition: MethodDL.h:295
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1082
size_t GetBatchSize() const
Definition: MethodDL.h:253
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:668
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:828
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:188
void SetTrainingStrategyString(TString trainingStrategyString)
Definition: MethodDL.h:294
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2307
void SetLayoutString(TString layoutString)
Definition: MethodDL.h:300
const DeepNetImpl_t & GetDeepNet() const
Definition: MethodDL.h:258
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:103
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:284
KeyValueVector_t fSettings
Map for the training strategy.
Definition: MethodDL.h:195
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1043
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:286
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1819
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:260
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:283
void SetWeightInitialization(DNN::EInitialization weightInitialization)
Definition: MethodDL.h:288
TString GetLayoutString() const
Definition: MethodDL.h:266
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:174
size_t GetBatchWidth() const
Definition: MethodDL.h:256
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2061
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:100
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1697
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1036
typename ArchitectureImpl_t::Scalar_t ScalarImpl_t
Definition: MethodDL.h:102
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:455
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:193
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:279
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:534
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:187
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:161
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:184
Ranking for variables in method (implementation)
Definition: Ranking.h:48
EAnalysisType
Definition: Types.h:127
Basic string class.
Definition: TString.h:131
EInitialization
Definition: Functions.h:70
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:80
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:44
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:55
create variable transformations
All of the options that can be specified in the training string.
Definition: MethodDL.h:65
DNN::EOptimizer optimizer
Definition: MethodDL.h:71
DNN::ERegularization regularization
Definition: MethodDL.h:70
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:76