Logo ROOT   6.12/07
Reference Guide
Cuda.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 05/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ///////////////////////////////////////////////////////////////////
13 // Definition of the TCuda architecture class, which provides an //
14 // implementation of the low-level functionality for neural //
15 // networks for the CUDA computing architectures. //
16 ///////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_ARCHITECTURES_CUDA
19 #define TMVA_DNN_ARCHITECTURES_CUDA
20 
21 #include "cuda.h"
22 #include "Cuda/CudaBuffers.h"
23 #include "Cuda/CudaMatrix.h"
24 #include "TMVA/DNN/DataLoader.h"
25 #include <utility>
26 
27 namespace TMVA
28 {
29 namespace DNN
30 {
31 
32 /** The TCuda architecture class.
33  *
34  * Low-level interface class for CUDA computing architectures. Contains as
35  * public types the declaration of the scalar, matrix and buffer types
36  * for this architecture as well as the remaining functions in the low-level
37  * interface in the form of static members.
38  */
39 template<typename AFloat = Real_t>
40 class TCuda
41 {
42 
43 public:
44 
45  using Scalar_t = AFloat;
49 
50  //____________________________________________________________________________
51  //
52  // Propagation
53  //____________________________________________________________________________
54 
55  /** @name Forward Propagation
56  * Low-level functions required for the forward propagation of activations
57  * through the network.
58  */
59  ///@{
60  /** Matrix-multiply \p input with the transpose of \pweights and
61  * write the results into \p output. */
63  const TCudaMatrix<AFloat> &input,
64  const TCudaMatrix<AFloat> &weights);
65  /** Add the vectors biases row-wise to the matrix output */
67  const TCudaMatrix<AFloat> &biases);
68  ///@}
69 
70  /** @name Backward Propagation
71  * Low-level functions required for the forward propagation of activations
72  * through the network.
73  */
74  ///@{
75  /** Perform the complete backward propagation step. If the provided
76  * \p activationGradientsBackward matrix is not empty, compute the
77  * gradients of the objective function with respect to the activations
78  * of the previous layer (backward direction).
79  * Also compute the weight and the bias gradients. Modifies the values
80  * in \p df and thus produces only a valid result, if it is applied the
81  * first time after the corresponding forward propagation has been per-
82  * formed. */
83  static void Backward(TCudaMatrix<AFloat> & activationGradientsBackward,
84  TCudaMatrix<AFloat> & weightGradients,
85  TCudaMatrix<AFloat> & biasGradients,
87  const TCudaMatrix<AFloat> & activationGradients,
88  const TCudaMatrix<AFloat> & weights,
89  const TCudaMatrix<AFloat> & activationBackward);
90  /** Adds a the elements in matrix B scaled by c to the elements in
91  * the matrix A. This is required for the weight update in the gradient
92  * descent step.*/
93  static void ScaleAdd(TCudaMatrix<AFloat> & A,
94  const TCudaMatrix<AFloat> & B,
95  Scalar_t beta = 1.0);
96  /** Copy the elements of matrix A into matrix B. */
97  static void Copy(TCudaMatrix<AFloat> & B,
98  const TCudaMatrix<AFloat> & A);
99  ///@}
100 
101  //____________________________________________________________________________
102  //
103  // Activation Functions
104  //____________________________________________________________________________
105 
106  /** @name Activation Functions
107  * For each activation function, the low-level interface contains two routines.
108  * One that applies the acitvation function to a matrix and one that evaluate
109  * the derivatives of the activation function at the elements of a given matrix
110  * and writes the results into the result matrix.
111  */
112  ///@{
113  static void Identity(TCudaMatrix<AFloat> & B);
115  const TCudaMatrix<AFloat> & A);
116 
117  static void Relu(TCudaMatrix<AFloat> & B);
118  static void ReluDerivative(TCudaMatrix<AFloat> & B,
119  const TCudaMatrix<AFloat> & A);
120 
121  static void Sigmoid(TCudaMatrix<AFloat> & B);
123  const TCudaMatrix<AFloat> & A);
124 
125  static void Tanh(TCudaMatrix<AFloat> & B);
126  static void TanhDerivative(TCudaMatrix<AFloat> & B,
127  const TCudaMatrix<AFloat> & A);
128 
129  static void SymmetricRelu(TCudaMatrix<AFloat> & B);
131  const TCudaMatrix<AFloat> & A);
132 
133  static void SoftSign(TCudaMatrix<AFloat> & B);
135  const TCudaMatrix<AFloat> & A);
136 
137  static void Gauss(TCudaMatrix<AFloat> & B);
138  static void GaussDerivative(TCudaMatrix<AFloat> & B,
139  const TCudaMatrix<AFloat> & A);
140  ///@}
141 
142  //____________________________________________________________________________
143  //
144  // Loss Functions
145  //____________________________________________________________________________
146 
147  /** @name Loss Functions
148  * Loss functions compute a scalar value given the \p output of the network
149  * for a given training input and the expected network prediction \p Y that
150  * quantifies the quality of the prediction. For each function also a routing
151  * that computes the gradients (suffixed by Gradients) must be provided for
152  * the starting of the backpropagation algorithm.
153  */
154  ///@{
155 
156  static AFloat MeanSquaredError(const TCudaMatrix<AFloat> &Y, const TCudaMatrix<AFloat> &output,
157  const TCudaMatrix<AFloat> &weights);
159  const TCudaMatrix<AFloat> &output, const TCudaMatrix<AFloat> &weights);
160 
161  /** Sigmoid transformation is implicitly applied, thus \p output should
162  * hold the linear activations of the last layer in the net. */
163  static AFloat CrossEntropy(const TCudaMatrix<AFloat> &Y, const TCudaMatrix<AFloat> &output,
164  const TCudaMatrix<AFloat> &weights);
165 
167  const TCudaMatrix<AFloat> &output, const TCudaMatrix<AFloat> &weights);
168 
169  /** Softmax transformation is implicitly applied, thus \p output should
170  * hold the linear activations of the last layer in the net. */
171  static AFloat SoftmaxCrossEntropy(const TCudaMatrix<AFloat> &Y, const TCudaMatrix<AFloat> &output,
172  const TCudaMatrix<AFloat> &weights);
174  const TCudaMatrix<AFloat> &output, const TCudaMatrix<AFloat> &weights);
175  ///@}
176 
177  //____________________________________________________________________________
178  //
179  // Output Functions
180  //____________________________________________________________________________
181 
182  /** @name Output Functions
183  * Output functions transform the activations \p output of the
184  * output layer in the network to a valid prediction \p YHat for
185  * the desired usage of the network, e.g. the identity function
186  * for regression or the sigmoid transformation for two-class
187  * classification.
188  */
189  ///@{
190  static void Sigmoid(TCudaMatrix<AFloat> &YHat,
191  const TCudaMatrix<AFloat> & );
192  static void Softmax(TCudaMatrix<AFloat> &YHat,
193  const TCudaMatrix<AFloat> & );
194  ///@}
195 
196  //____________________________________________________________________________
197  //
198  // Regularization
199  //____________________________________________________________________________
200 
201  /** @name Regularization
202  * For each regularization type two functions are required, one named
203  * <tt><Type>Regularization</tt> that evaluates the corresponding
204  * regularization functional for a given weight matrix and the
205  * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
206  * component in the gradients to the provided matrix.
207  */
208  ///@{
209 
210  static AFloat L1Regularization(const TCudaMatrix<AFloat> & W);
212  const TCudaMatrix<AFloat> & W,
213  AFloat weightDecay);
214 
215  static AFloat L2Regularization(const TCudaMatrix<AFloat> & W);
217  const TCudaMatrix<AFloat> & W,
218  AFloat weightDecay);
219  ///@}
220 
221  //____________________________________________________________________________
222  //
223  // Initialization
224  //____________________________________________________________________________
225 
226  /** @name Initialization
227  * For each initialization method, one function in the low-level interface
228  * is provided. The naming scheme is <p>Initialize<Type></p> for a given
229  * initialization method Type.
230  */
231  ///@{
232 
233  static void InitializeGauss(TCudaMatrix<AFloat> & A);
234  static void InitializeUniform(TCudaMatrix<AFloat> & A);
236  static void InitializeZero(TCudaMatrix<AFloat> & A);
237 
238  ///@}
239 
240  //____________________________________________________________________________
241  //
242  // Dropout
243  //____________________________________________________________________________
244 
245  /** @name Dropout
246  */
247  ///@{
248 
249  /** Apply dropout with activation probability \p p to the given
250  * matrix \p A and scale the result by reciprocal of \p p. */
251  static void Dropout(TCudaMatrix<AFloat> & A, AFloat p);
252 
253  ///@}
254 
255  //____________________________________________________________________________
256  //
257  // Additional Arithmetic Functions
258  //____________________________________________________________________________
259 
260  /** @name Additional Arithmetic Functions
261  *
262  * Additional arithmetic on CUDA matrices used to implement the low-level
263  * interface.
264  */
265  ///@{
266 
267  /** Standard multiplication of two matrices \p A and \p B with the result being
268  * written into C.
269  */
270  static void Multiply(TCudaMatrix<AFloat> & C,
271  const TCudaMatrix<AFloat> & A,
272  const TCudaMatrix<AFloat> & B);
273  /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
274  * result being written into C.
275  */
277  const TCudaMatrix<AFloat> & input,
278  const TCudaMatrix<AFloat> & Weights);
279  /** In-place Hadamard (element-wise) product of matrices \p A and \p B
280  * with the result being written into \p A.
281  */
282  static void Hadamard(TCudaMatrix<AFloat> & A, const TCudaMatrix<AFloat> & B);
283 
284  /** Sum columns of (m x n) matrixx \p A and write the results into the first
285  * m elements in \p A.
286  */
287  static void SumColumns(TCudaMatrix<AFloat> & B, const TCudaMatrix<AFloat> & A);
288 
289  /** Compute the sum of all elements in \p A */
290  static AFloat Sum(const TCudaMatrix<AFloat> &A);
291 };
292 
293 } // namespace DNN
294 } // namespace TMVA
295 
296 #endif
static double B[]
static void SymmetricReluDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
static void GaussDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
TCudaDeviceBuffer.
Definition: CudaBuffers.h:27
static void MultiplyTranspose(TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &input, const TCudaMatrix< AFloat > &weights)
Matrix-multiply input with the transpose of and write the results into output.
static void Softmax(TCudaMatrix< AFloat > &YHat, const TCudaMatrix< AFloat > &)
The TCuda architecture class.
Definition: Cuda.h:40
static void ScaleAdd(TCudaMatrix< AFloat > &A, const TCudaMatrix< AFloat > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static double A[]
static void Hadamard(TCudaMatrix< AFloat > &A, const TCudaMatrix< AFloat > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A...
double beta(double x, double y)
Calculates the beta function.
AFloat Scalar_t
Definition: Cuda.h:45
static void IdentityDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
static void Dropout(TCudaMatrix< AFloat > &A, AFloat p)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:497
static void SoftmaxCrossEntropyGradients(TCudaMatrix< AFloat > &dY, const TCudaMatrix< AFloat > &Y, const TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &weights)
static void Relu(TCudaMatrix< AFloat > &B)
static void AddRowWise(TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &biases)
Add the vectors biases row-wise to the matrix output.
static AFloat SoftmaxCrossEntropy(const TCudaMatrix< AFloat > &Y, const TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
TCudaHostBuffer.
Definition: CudaBuffers.h:41
static void SoftSignDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
static void Gauss(TCudaMatrix< AFloat > &B)
static void SymmetricRelu(TCudaMatrix< AFloat > &B)
static double C[]
static void TanhDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
static void Tanh(TCudaMatrix< AFloat > &B)
static void Multiply(TCudaMatrix< AFloat > &C, const TCudaMatrix< AFloat > &A, const TCudaMatrix< AFloat > &B)
Standard multiplication of two matrices A and B with the result being written into C...
static void InitializeUniform(TCudaMatrix< AFloat > &A)
static void AddL2RegularizationGradients(TCudaMatrix< AFloat > &A, const TCudaMatrix< AFloat > &W, AFloat weightDecay)
static void Sigmoid(TCudaMatrix< AFloat > &B)
static AFloat L2Regularization(const TCudaMatrix< AFloat > &W)
static void CrossEntropyGradients(TCudaMatrix< AFloat > &dY, const TCudaMatrix< AFloat > &Y, const TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &weights)
static void InitializeIdentity(TCudaMatrix< AFloat > &A)
static void ReluDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
static void MeanSquaredErrorGradients(TCudaMatrix< AFloat > &dY, const TCudaMatrix< AFloat > &Y, const TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &weights)
static AFloat Sum(const TCudaMatrix< AFloat > &A)
Compute the sum of all elements in A.
static AFloat MeanSquaredError(const TCudaMatrix< AFloat > &Y, const TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &weights)
static void Backward(TCudaMatrix< AFloat > &activationGradientsBackward, TCudaMatrix< AFloat > &weightGradients, TCudaMatrix< AFloat > &biasGradients, TCudaMatrix< AFloat > &df, const TCudaMatrix< AFloat > &activationGradients, const TCudaMatrix< AFloat > &weights, const TCudaMatrix< AFloat > &activationBackward)
Perform the complete backward propagation step.
static void AddL1RegularizationGradients(TCudaMatrix< AFloat > &A, const TCudaMatrix< AFloat > &W, AFloat weightDecay)
static void SigmoidDerivative(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
static void InitializeZero(TCudaMatrix< AFloat > &A)
static void SoftSign(TCudaMatrix< AFloat > &B)
Abstract ClassifierFactory template that handles arbitrary types.
static void TransposeMultiply(TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &input, const TCudaMatrix< AFloat > &Weights)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C...
static void Identity(TCudaMatrix< AFloat > &B)
static void SumColumns(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
static AFloat L1Regularization(const TCudaMatrix< AFloat > &W)
static void Copy(TCudaMatrix< AFloat > &B, const TCudaMatrix< AFloat > &A)
Copy the elements of matrix A into matrix B.
static AFloat CrossEntropy(const TCudaMatrix< AFloat > &Y, const TCudaMatrix< AFloat > &output, const TCudaMatrix< AFloat > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
TCudaMatrix Class.
Definition: CudaMatrix.h:98
static void InitializeGauss(TCudaMatrix< AFloat > &A)