Logo ROOT  
Reference Guide
MethodDL.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski, Saurav Shekhar
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDL *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Deep Neural Network Method *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
16 * *
17 * Copyright (c) 2005-2015: *
18 * CERN, Switzerland *
19 * U. of Victoria, Canada *
20 * MPI-K Heidelberg, Germany *
21 * U. of Bonn, Germany *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#ifndef ROOT_TMVA_MethodDL
29#define ROOT_TMVA_MethodDL
30
31//////////////////////////////////////////////////////////////////////////
32// //
33// MethodDL //
34// //
35// Method class for all Deep Learning Networks //
36// //
37//////////////////////////////////////////////////////////////////////////
38
39#include "TString.h"
40
41#include "TMVA/MethodBase.h"
42#include "TMVA/Types.h"
43
45
46//#ifdef R__HAS_TMVACPU
48//#endif
49
50#if 0
51#ifdef R__HAS_TMVAGPU
53#ifdef R__HAS_CUDNN
55#endif
56#endif
57#endif
58
59#include "TMVA/DNN/Functions.h"
60#include "TMVA/DNN/DeepNet.h"
61
62#include <vector>
63#include <map>
64
65#ifdef R__HAS_TMVAGPU
66//#define USE_GPU_INFERENCE
67#endif
68
69namespace TMVA {
70
71/*! All of the options that can be specified in the training string */
73 size_t batchSize;
76 size_t maxEpochs;
83 std::vector<Double_t> dropoutProbabilities;
84 std::map<TString,double> optimizerParams;
86};
87
88
89class MethodDL : public MethodBase {
90
91private:
92 // Key-Value vector type, contining the values for the training options
93 using KeyValueVector_t = std::vector<std::map<TString, TString>>;
94
95// #ifdef R__HAS_TMVAGPU
96// #ifdef R__HAS_CUDNN
97// using ArchitectureImpl_t = TMVA::DNN::TCudnn<Float_t>;
98// #else
99// using ArchitectureImpl_t = TMVA::DNN::TCuda<Float_t>;
100// #endif
101// #else
102// do not use GPU architecture for evaluation. It is too slow for batch size=1
104// #endif
105
111
112 /*! The option handling methods */
113 void DeclareOptions();
114 void ProcessOptions();
115
116 void Init();
117
118 // Function to parse the layout of the input
119 void ParseInputLayout();
120 void ParseBatchLayout();
121
122 /*! After calling the ProcesOptions(), all of the options are parsed,
123 * so using the parsed options, and given the architecture and the
124 * type of the layers, we build the Deep Network passed as
125 * a reference in the function. */
126 template <typename Architecture_t, typename Layer_t>
128 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets);
129
130 template <typename Architecture_t, typename Layer_t>
132 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
133
134 template <typename Architecture_t, typename Layer_t>
136 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
137
138 template <typename Architecture_t, typename Layer_t>
140 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
141 TString delim);
142
143 template <typename Architecture_t, typename Layer_t>
145 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
146 TString delim);
147
148 template <typename Architecture_t, typename Layer_t>
150 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
151 TString delim);
152
154 template <typename Architecture_t, typename Layer_t>
156 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
157
158
159 /// train of deep neural network using the defined architecture
160 template <typename Architecture_t>
161 void TrainDeepNet();
162
163 /// perform prediction of the deep neural network
164 /// using batches (called by GetMvaValues)
165 template <typename Architecture_t>
166 std::vector<Double_t> PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress);
167
168 /// Get the input event tensor for evaluation
169 /// Internal function to fill the fXInput tensor with the correct shape from TMVA current Event class
170 void FillInputTensor();
171
172 /// parce the validation string and return the number of event data used for validation
174
175 // cudnn implementation needs this format
176 /** Contains the batch size (no. of images in the batch), input depth (no. channels)
177 * and further input dimensions of the data (image height, width ...)*/
178 std::vector<size_t> fInputShape;
179
180 // The size of the batch, i.e. the number of images that are contained in the batch, is either set to be the depth
181 // or the height of the batch
182 size_t fBatchDepth; ///< The depth of the batch used to train the deep net.
183 size_t fBatchHeight; ///< The height of the batch used to train the deep net.
184 size_t fBatchWidth; ///< The width of the batch used to train the deep net.
185
186 size_t fRandomSeed; ///<The random seed used to initialize the weights and shuffling batches (default is zero)
187
188 DNN::EInitialization fWeightInitialization; ///< The initialization method
189 DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions
190 DNN::ELossFunction fLossFunction; ///< The loss function
191
192 TString fInputLayoutString; ///< The string defining the layout of the input
193 TString fBatchLayoutString; ///< The string defining the layout of the batch
194 TString fLayoutString; ///< The string defining the layout of the deep net
195 TString fErrorStrategy; ///< The string defining the error strategy for training
196 TString fTrainingStrategyString; ///< The string defining the training strategy
197 TString fWeightInitializationString; ///< The string defining the weight initialization method
198 TString fArchitectureString; ///< The string defining the architecture: CPU or GPU
199 TString fNumValidationString; ///< The string defining the number (or percentage) of training data used for validation
201 bool fBuildNet; ///< Flag to control whether to build fNet, the stored network used for the evaluation
202
203 KeyValueVector_t fSettings; ///< Map for the training strategy
204 std::vector<TTrainingSettings> fTrainingSettings; ///< The vector defining each training strategy
205
206 TensorImpl_t fXInput; // input tensor used to evaluate fNet
207 HostBufferImpl_t fXInputBuffer; // input host buffer corresponding to X (needed for GPU implementation)
208 std::unique_ptr<MatrixImpl_t> fYHat; // output prediction matrix of fNet
209 std::unique_ptr<DeepNetImpl_t> fNet;
210
211
213
214protected:
215 // provide a help message
216 void GetHelpMessage() const;
217
218 virtual std::vector<Double_t> GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress);
219
220
221public:
222 /*! Constructor */
223 MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption);
224
225 /*! Constructor */
226 MethodDL(DataSetInfo &theData, const TString &theWeightFile);
227
228 /*! Virtual Destructor */
229 virtual ~MethodDL();
230
231 /*! Function for parsing the training settings, provided as a string
232 * in a key-value form. */
233 KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim);
234
235 /*! Check the type of analysis the deep learning network can do */
236 Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);
237
238 /*! Methods for training the deep learning network */
239 void Train();
240
241 Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);
242 virtual const std::vector<Float_t>& GetRegressionValues();
243 virtual const std::vector<Float_t>& GetMulticlassValues();
244
245 /*! Methods for writing and reading weights */
247 void AddWeightsXMLTo(void *parent) const;
248 void ReadWeightsFromXML(void *wghtnode);
249 void ReadWeightsFromStream(std::istream &);
250
251 /* Create ranking */
252 const Ranking *CreateRanking();
253
254 /* Getters */
255 size_t GetInputDepth() const { return fInputShape[1]; } //< no. of channels for an image
256 size_t GetInputHeight() const { return fInputShape[2]; }
257 size_t GetInputWidth() const { return fInputShape[3]; }
258 size_t GetInputDim() const { return fInputShape.size() - 2; }
259 std::vector<size_t> GetInputShape() const { return fInputShape; }
260
261 size_t GetBatchSize() const { return fInputShape[0]; }
262 size_t GetBatchDepth() const { return fBatchDepth; }
263 size_t GetBatchHeight() const { return fBatchHeight; }
264 size_t GetBatchWidth() const { return fBatchWidth; }
265
266 const DeepNetImpl_t & GetDeepNet() const { return *fNet; }
267
271
279
280 const std::vector<TTrainingSettings> &GetTrainingSettings() const { return fTrainingSettings; }
281 std::vector<TTrainingSettings> &GetTrainingSettings() { return fTrainingSettings; }
284
285 /** Setters */
286 void SetInputDepth (int inputDepth) { fInputShape[1] = inputDepth; }
287 void SetInputHeight(int inputHeight) { fInputShape[2] = inputHeight; }
288 void SetInputWidth (int inputWidth) { fInputShape[3] = inputWidth; }
289 void SetInputShape (std::vector<size_t> inputShape) { fInputShape = std::move(inputShape); }
290
291 void SetBatchSize (size_t batchSize) { fInputShape[0] = batchSize; }
292 void SetBatchDepth (size_t batchDepth) { fBatchDepth = batchDepth; }
293 void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
294 void SetBatchWidth (size_t batchWidth) { fBatchWidth = batchWidth; }
295
297 {
298 fWeightInitialization = weightInitialization;
299 }
300 void SetOutputFunction (DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; }
301 void SetErrorStrategyString (TString errorStrategy) { fErrorStrategy = errorStrategy; }
302 void SetTrainingStrategyString (TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; }
303 void SetWeightInitializationString(TString weightInitializationString)
304 {
305 fWeightInitializationString = weightInitializationString;
306 }
307 void SetArchitectureString (TString architectureString) { fArchitectureString = architectureString; }
308 void SetLayoutString (TString layoutString) { fLayoutString = layoutString; }
309};
310
311} // namespace TMVA
312
313#endif
bool Bool_t
Definition: RtypesCore.h:63
long long Long64_t
Definition: RtypesCore.h:80
#define ClassDef(name, id)
Definition: Rtypes.h:335
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The TCpu architecture class.
Definition: Cpu.h:65
AReal Scalar_t
Definition: Cpu.h:69
TCpuTensor< AReal > Tensor_t
Definition: Cpu.h:70
TCpuBuffer< AReal > HostBuffer_t
Definition: Cpu.h:72
TCpuMatrix< AReal > Matrix_t
Definition: Cpu.h:71
Generic Deep Neural Network class.
Definition: DeepNet.h:73
Class that contains all the data information.
Definition: DataSetInfo.h:62
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
KeyValueVector_t & GetKeyValueSettings()
Definition: MethodDL.h:283
typename ArchitectureImpl_t::Tensor_t TensorImpl_t
Definition: MethodDL.h:108
size_t fBatchHeight
The height of the batch used to train the deep net.
Definition: MethodDL.h:183
void GetHelpMessage() const
Definition: MethodDL.cxx:2344
DNN::ELossFunction fLossFunction
The loss function.
Definition: MethodDL.h:190
size_t GetInputDim() const
Definition: MethodDL.h:258
TString GetErrorStrategyString() const
Definition: MethodDL.h:275
std::vector< size_t > fInputShape
Contains the batch size (no.
Definition: MethodDL.h:178
void SetErrorStrategyString(TString errorStrategy)
Definition: MethodDL.h:301
TString fLayoutString
The string defining the layout of the deep net.
Definition: MethodDL.h:194
std::vector< TTrainingSettings > & GetTrainingSettings()
Definition: MethodDL.h:281
void SetInputDepth(int inputDepth)
Setters.
Definition: MethodDL.h:286
std::vector< size_t > GetInputShape() const
Definition: MethodDL.h:259
std::unique_ptr< MatrixImpl_t > fYHat
Definition: MethodDL.h:208
void Train()
Methods for training the deep learning network.
Definition: MethodDL.cxx:1661
size_t GetBatchHeight() const
Definition: MethodDL.h:263
TString GetTrainingStrategyString() const
Definition: MethodDL.h:276
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
Evaluate the DeepNet on a vector of input values stored in the TMVA Event class Here we will evaluate...
Definition: MethodDL.cxx:2024
TString fWeightInitializationString
The string defining the weight initialization method.
Definition: MethodDL.h:197
void ParseMaxPoolLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate max pool layer.
Definition: MethodDL.cxx:770
TensorImpl_t fXInput
Definition: MethodDL.h:206
size_t fRandomSeed
The random seed used to initialize the weights and shuffling batches (default is zero)
Definition: MethodDL.h:186
virtual const std::vector< Float_t > & GetMulticlassValues()
TString fArchitectureString
The string defining the architecture: CPU or GPU.
Definition: MethodDL.h:198
void Init()
default initializations
Definition: MethodDL.cxx:432
MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
Constructor.
Definition: MethodDL.cxx:1021
void TrainDeepNet()
train of deep neural network using the defined architecture
Definition: MethodDL.cxx:1166
const std::vector< TTrainingSettings > & GetTrainingSettings() const
Definition: MethodDL.h:280
DNN::EOutputFunction GetOutputFunction() const
Definition: MethodDL.h:269
void ParseDenseLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate dense layer.
Definition: MethodDL.cxx:583
UInt_t GetNumValidationSamples()
parce the validation string and return the number of event data used for validation
TString GetBatchLayoutString() const
Definition: MethodDL.h:273
void SetInputWidth(int inputWidth)
Definition: MethodDL.h:288
void SetArchitectureString(TString architectureString)
Definition: MethodDL.h:307
void ProcessOptions()
Definition: MethodDL.cxx:219
HostBufferImpl_t fXInputBuffer
Definition: MethodDL.h:207
size_t fBatchWidth
The width of the batch used to train the deep net.
Definition: MethodDL.h:184
size_t GetInputDepth() const
Definition: MethodDL.h:255
std::unique_ptr< DeepNetImpl_t > fNet
Definition: MethodDL.h:209
TString GetWeightInitializationString() const
Definition: MethodDL.h:277
TString GetInputLayoutString() const
Definition: MethodDL.h:272
void SetBatchHeight(size_t batchHeight)
Definition: MethodDL.h:293
size_t GetInputHeight() const
Definition: MethodDL.h:256
TString GetArchitectureString() const
Definition: MethodDL.h:278
void ParseBatchLayout()
Parse the input layout.
Definition: MethodDL.cxx:482
void ParseBatchNormLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:892
void ReadWeightsFromStream(std::istream &)
Definition: MethodDL.cxx:2332
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDL.cxx:2114
TString fNumValidationString
The string defining the number (or percentage) of training data used for validation.
Definition: MethodDL.h:199
const KeyValueVector_t & GetKeyValueSettings() const
Definition: MethodDL.h:282
std::vector< std::map< TString, TString > > KeyValueVector_t
Definition: MethodDL.h:93
DNN::EOutputFunction fOutputFunction
The output function for making the predictions.
Definition: MethodDL.h:189
DNN::EInitialization fWeightInitialization
The initialization method.
Definition: MethodDL.h:188
void SetOutputFunction(DNN::EOutputFunction outputFunction)
Definition: MethodDL.h:300
size_t GetBatchDepth() const
Definition: MethodDL.h:262
void ParseRecurrentLayer(ERecurrentLayerType type, DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate rnn layer.
Definition: MethodDL.cxx:933
std::vector< TTrainingSettings > fTrainingSettings
The vector defining each training strategy.
Definition: MethodDL.h:204
size_t GetInputWidth() const
Definition: MethodDL.h:257
void SetInputShape(std::vector< size_t > inputShape)
Definition: MethodDL.h:289
DNN::ELossFunction GetLossFunction() const
Definition: MethodDL.h:270
TString fBatchLayoutString
The string defining the layout of the batch.
Definition: MethodDL.h:193
void SetWeightInitializationString(TString weightInitializationString)
Definition: MethodDL.h:303
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
Check the type of analysis the deep learning network can do.
Definition: MethodDL.cxx:1093
size_t GetBatchSize() const
Definition: MethodDL.h:261
void ParseConvLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate convolutional layer.
Definition: MethodDL.cxx:671
void ParseReshapeLayer(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets, TString layerString, TString delim)
Pases the layer string and creates the appropriate reshape layer.
Definition: MethodDL.cxx:831
virtual const std::vector< Float_t > & GetRegressionValues()
TString fTrainingStrategyString
The string defining the training strategy.
Definition: MethodDL.h:196
void SetTrainingStrategyString(TString trainingStrategyString)
Definition: MethodDL.h:302
const Ranking * CreateRanking()
Definition: MethodDL.cxx:2337
void SetLayoutString(TString layoutString)
Definition: MethodDL.h:308
const DeepNetImpl_t & GetDeepNet() const
Definition: MethodDL.h:266
typename ArchitectureImpl_t::HostBuffer_t HostBufferImpl_t
Definition: MethodDL.h:110
void SetBatchDepth(size_t batchDepth)
Definition: MethodDL.h:292
KeyValueVector_t fSettings
Map for the training strategy.
Definition: MethodDL.h:203
KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim)
Function for parsing the training settings, provided as a string in a key-value form.
Definition: MethodDL.cxx:1054
void SetBatchWidth(size_t batchWidth)
Definition: MethodDL.h:294
std::vector< Double_t > PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
perform prediction of the deep neural network using batches (called by GetMvaValues)
Definition: MethodDL.cxx:1830
DNN::EInitialization GetWeightInitialization() const
Definition: MethodDL.h:268
void SetBatchSize(size_t batchSize)
Definition: MethodDL.h:291
void SetWeightInitialization(DNN::EInitialization weightInitialization)
Definition: MethodDL.h:296
TString GetLayoutString() const
Definition: MethodDL.h:274
size_t fBatchDepth
The depth of the batch used to train the deep net.
Definition: MethodDL.h:182
size_t GetBatchWidth() const
Definition: MethodDL.h:264
void AddWeightsXMLTo(void *parent) const
Definition: MethodDL.cxx:2053
typename ArchitectureImpl_t::Matrix_t MatrixImpl_t
Definition: MethodDL.h:107
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Definition: MethodDL.cxx:1774
virtual ~MethodDL()
Virtual Destructor.
Definition: MethodDL.cxx:1047
typename ArchitectureImpl_t::Scalar_t ScalarImpl_t
Definition: MethodDL.h:109
void ParseInputLayout()
Parse the input layout.
Definition: MethodDL.cxx:439
void FillInputTensor()
Get the input event tensor for evaluation Internal function to fill the fXInput tensor with the corre...
Definition: MethodDL.cxx:1706
bool fBuildNet
Flag to control whether to build fNet, the stored network used for the evaluation.
Definition: MethodDL.h:201
void SetInputHeight(int inputHeight)
Definition: MethodDL.h:287
void CreateDeepNet(DNN::TDeepNet< Architecture_t, Layer_t > &deepNet, std::vector< DNN::TDeepNet< Architecture_t, Layer_t > > &nets)
After calling the ProcesOptions(), all of the options are parsed, so using the parsed options,...
Definition: MethodDL.cxx:529
TString fErrorStrategy
The string defining the error strategy for training.
Definition: MethodDL.h:195
void DeclareOptions()
The option handling methods.
Definition: MethodDL.cxx:167
TString fInputLayoutString
The string defining the layout of the input.
Definition: MethodDL.h:192
Ranking for variables in method (implementation)
Definition: Ranking.h:48
EAnalysisType
Definition: Types.h:126
Basic string class.
Definition: TString.h:136
EInitialization
Definition: Functions.h:72
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:46
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:65
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:57
create variable transformations
All of the options that can be specified in the training string.
Definition: MethodDL.h:72
std::map< TString, double > optimizerParams
Definition: MethodDL.h:84
DNN::EOptimizer optimizer
Definition: MethodDL.h:78
DNN::ERegularization regularization
Definition: MethodDL.h:77
std::vector< Double_t > dropoutProbabilities
Definition: MethodDL.h:83