Logo ROOT   6.12/07
Reference Guide
Cpu.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 05/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12  //////////////////////////////////////////////////////////////////
13 // Definition of the TCpu architecture, which provides a //
14  // multi-threaded CPU implementation of the low-level interface //
15  // networks for Cpus using BLAS and Roots TThreadExecutor //
16  //////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_ARCHITECTURES_CPU
19 #define TMVA_DNN_ARCHITECTURES_CPU
20 
21 #include "Cpu/CpuBuffer.h"
22 #include "Cpu/CpuMatrix.h"
23 
24 namespace TMVA
25 {
26 namespace DNN
27 {
28 
29 /** The TCpu architecture class.
30  *
31  * Low-level interface class for multi-threaded CPU architectures. Contains as
32  * public types the declaration of the scalar, matrix and data loader types
33  * for this architecture as well as the remaining functions in the low-level
34  * interface in the form of static members.
35  */
36 template<typename AReal = Real_t>
37 class TCpu
38 {
39 public:
40 
41  using Scalar_t = AReal;
45 
46  //____________________________________________________________________________
47  //
48  // Propagation
49  //____________________________________________________________________________
50 
51  /** @name Forward Propagation
52  * Low-level functions required for the forward propagation of activations
53  * through the network.
54  */
55  ///@{
56  /** Matrix-multiply \p input with the transpose of \pweights and
57  * write the results into \p output. */
59  const TCpuMatrix<Scalar_t> &input,
60  const TCpuMatrix<Scalar_t> &weights);
61  /** Add the vectors biases row-wise to the matrix output */
63  const TCpuMatrix<Scalar_t> &biases);
64  ///@}
65 
66  /** @name Backward Propagation
67  * Low-level functions required for the forward propagation of activations
68  * through the network.
69  */
70  ///@{
71  /** Perform the complete backward propagation step. If the provided
72  * \p activationGradientsBackward matrix is not empty, compute the
73  * gradients of the objective function with respect to the activations
74  * of the previous layer (backward direction).
75  * Also compute the weight and the bias gradients. Modifies the values
76  * in \p df and thus produces only a valid result, if it is applied the
77  * first time after the corresponding forward propagation has been per-
78  * formed. */
79  static void Backward(TCpuMatrix<Scalar_t> & activationGradientsBackward,
80  TCpuMatrix<Scalar_t> & weightGradients,
81  TCpuMatrix<Scalar_t> & biasGradients,
83  const TCpuMatrix<Scalar_t> & activationGradients,
84  const TCpuMatrix<Scalar_t> & weights,
85  const TCpuMatrix<Scalar_t> & activationBackward);
86  /** Adds a the elements in matrix B scaled by c to the elements in
87  * the matrix A. This is required for the weight update in the gradient
88  * descent step.*/
89  static void ScaleAdd(TCpuMatrix<Scalar_t> & A,
90  const TCpuMatrix<Scalar_t> & B,
91  Scalar_t beta = 1.0);
92 
93  static void Copy(TCpuMatrix<Scalar_t> & B,
94  const TCpuMatrix<Scalar_t> & A);
95  ///@}
96 
97  //____________________________________________________________________________
98  //
99  // Activation Functions
100  //____________________________________________________________________________
101 
102  /** @name Activation Functions
103  * For each activation function, the low-level interface contains two routines.
104  * One that applies the acitvation function to a matrix and one that evaluate
105  * the derivatives of the activation function at the elements of a given matrix
106  * and writes the results into the result matrix.
107  */
108  ///@{
110  const TCpuMatrix<Scalar_t> &A);
111 
112  static void Relu(TCpuMatrix<Scalar_t> & B);
113  static void ReluDerivative(TCpuMatrix<Scalar_t> & B,
114  const TCpuMatrix<Scalar_t> & A);
115 
116  static void Sigmoid(TCpuMatrix<Scalar_t> & B);
118  const TCpuMatrix<Scalar_t> & A);
119 
120  static void Tanh(TCpuMatrix<Scalar_t> & B);
121  static void TanhDerivative(TCpuMatrix<Scalar_t> & B,
122  const TCpuMatrix<Scalar_t> & A);
123 
124  static void SymmetricRelu(TCpuMatrix<Scalar_t> & B);
126  const TCpuMatrix<Scalar_t> & A);
127 
128  static void SoftSign(TCpuMatrix<Scalar_t> & B);
130  const TCpuMatrix<Scalar_t> & A);
131 
132  static void Gauss(TCpuMatrix<Scalar_t> & B);
133  static void GaussDerivative(TCpuMatrix<Scalar_t> & B,
134  const TCpuMatrix<Scalar_t> & A);
135  ///@}
136 
137  //____________________________________________________________________________
138  //
139  // Loss Functions
140  //____________________________________________________________________________
141 
142  /** @name Loss Functions
143  * Loss functions compute a scalar value given the \p output of the network
144  * for a given training input and the expected network prediction \p Y that
145  * quantifies the quality of the prediction. For each function also a routing
146  * that computes the gradients (suffixed by Gradients) must be provided for
147  * the starting of the backpropagation algorithm.
148  */
149  ///@{
150 
152  const TCpuMatrix<Scalar_t> &weights);
154  const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
155 
156  /** Sigmoid transformation is implicitly applied, thus \p output should
157  * hold the linear activations of the last layer in the net. */
159  const TCpuMatrix<Scalar_t> &weights);
160 
162  const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
163 
164  /** Softmax transformation is implicitly applied, thus \p output should
165  * hold the linear activations of the last layer in the net. */
167  const TCpuMatrix<Scalar_t> &weights);
169  const TCpuMatrix<Scalar_t> &output, const TCpuMatrix<Scalar_t> &weights);
170  ///@}
171 
172  //____________________________________________________________________________
173  //
174  // Output Functions
175  //____________________________________________________________________________
176 
177  /** @name Output Functions
178  * Output functions transform the activations \p output of the
179  * output layer in the network to a valid prediction \p YHat for
180  * the desired usage of the network, e.g. the identity function
181  * for regression or the sigmoid transformation for two-class
182  * classification.
183  */
184  ///@{
185  static void Sigmoid(TCpuMatrix<Scalar_t> &YHat,
186  const TCpuMatrix<Scalar_t> & );
187  static void Softmax(TCpuMatrix<Scalar_t> &YHat,
188  const TCpuMatrix<Scalar_t> & );
189  ///@}
190 
191  //____________________________________________________________________________
192  //
193  // Regularization
194  //____________________________________________________________________________
195 
196  /** @name Regularization
197  * For each regularization type two functions are required, one named
198  * <tt><Type>Regularization</tt> that evaluates the corresponding
199  * regularization functional for a given weight matrix and the
200  * <tt>Add<Type>RegularizationGradients</tt>, that adds the regularization
201  * component in the gradients to the provided matrix.
202  */
203  ///@{
204 
207  const TCpuMatrix<Scalar_t> & W,
209 
212  const TCpuMatrix<Scalar_t> & W,
214  ///@}
215 
216  //____________________________________________________________________________
217  //
218  // Initialization
219  //____________________________________________________________________________
220 
221  /** @name Initialization
222  * For each initialization method, one function in the low-level interface
223  * is provided. The naming scheme is <p>Initialize<Type></p> for a given
224  * initialization method Type.
225  */
226  ///@{
227 
228  static void InitializeGauss(TCpuMatrix<Scalar_t> & A);
231  static void InitializeZero(TCpuMatrix<Scalar_t> & A);
232 
233  ///@}
234 
235  //____________________________________________________________________________
236  //
237  // Dropout
238  //____________________________________________________________________________
239 
240  /** @name Dropout
241  */
242  ///@{
243 
244  /** Apply dropout with activation probability \p p to the given
245  * matrix \p A and scale the result by reciprocal of \p p. */
246  static void Dropout(TCpuMatrix<Scalar_t> & A, Scalar_t p);
247 
248  ///@}
249 
250  //____________________________________________________________________________
251  //
252  // Additional Arithmetic Functions
253  //____________________________________________________________________________
254 
255  /** @name Additional Arithmetic Functions
256  *
257  * Additional arithmetic on CUDA matrices used to implement the low-level
258  * interface.
259  */
260  ///@{
261 
262  /** Standard multiplication of two matrices \p A and \p B with the result being
263  * written into C.
264  */
265  static void Multiply(TCpuMatrix<Scalar_t> &C,
266  const TCpuMatrix<Scalar_t> &A,
267  const TCpuMatrix<Scalar_t> &B);
268  /** Matrix multiplication of two matrices \p A and \p B^T (transposed) with the
269  * result being written into C.
270  */
272  const TCpuMatrix<Scalar_t> &input,
273  const TCpuMatrix<Scalar_t> &Weights);
274  /** In-place Hadamard (element-wise) product of matrices \p A and \p B
275  * with the result being written into \p A.
276  */
277  static void Hadamard(TCpuMatrix<Scalar_t> &A,
278  const TCpuMatrix<Scalar_t> &B);
279 
280  /** Sum columns of (m x n) matrixx \p A and write the results into the first
281  * m elements in \p A.
282  */
283  static void SumColumns(TCpuMatrix<Scalar_t> &B,
284  const TCpuMatrix<Scalar_t> &A);
285 
286  /** Compute the sum of all elements in \p A */
287  static Scalar_t Sum(const TCpuMatrix<Scalar_t> &A);
288 
289 };
290 
291 } // namespace DNN
292 } // namespace TMVA
293 
294 #endif
static double B[]
static void Sigmoid(TCpuMatrix< Scalar_t > &B)
static void MeanSquaredErrorGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
The TCpuMatrix class.
Definition: CpuMatrix.h:46
The TCpu architecture class.
Definition: Cpu.h:37
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
Definition: Propagation.cxx:27
static void TanhDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static Scalar_t CrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
static void InitializeIdentity(TCpuMatrix< Scalar_t > &A)
static void InitializeUniform(TCpuMatrix< Scalar_t > &A)
static double A[]
static Scalar_t MeanSquaredError(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
double beta(double x, double y)
Calculates the beta function.
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:50
static Scalar_t L1Regularization(const TCpuMatrix< Scalar_t > &W)
static void SoftSign(TCpuMatrix< Scalar_t > &B)
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:497
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:68
static void Multiply(TCpuMatrix< Scalar_t > &C, const TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
Standard multiplication of two matrices A and B with the result being written into C...
Definition: Arithmetic.cxx:28
static void InitializeGauss(TCpuMatrix< Scalar_t > &A)
static void SymmetricReluDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void AddL1RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
static void Hadamard(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A...
Definition: Arithmetic.cxx:76
static void Dropout(TCpuMatrix< Scalar_t > &A, Scalar_t p)
Apply dropout with activation probability p to the given matrix A and scale the result by reciprocal ...
Definition: Dropout.cxx:24
static double C[]
static void ReluDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
TCpuBuffer.
Definition: CpuBuffer.h:43
static void SumColumns(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
Sum columns of (m x n) matrixx A and write the results into the first m elements in A...
Definition: Arithmetic.cxx:93
static void AddL2RegularizationGradients(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &W, Scalar_t weightDecay)
static void InitializeZero(TCpuMatrix< Scalar_t > &A)
static void Copy(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
Definition: Arithmetic.cxx:129
static void SymmetricRelu(TCpuMatrix< Scalar_t > &B)
static void IdentityDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void SoftSignDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static Scalar_t L2Regularization(const TCpuMatrix< Scalar_t > &W)
static Scalar_t SoftmaxCrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
Abstract ClassifierFactory template that handles arbitrary types.
static void SoftmaxCrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
static void Softmax(TCpuMatrix< Scalar_t > &YHat, const TCpuMatrix< Scalar_t > &)
static void ScaleAdd(TCpuMatrix< Scalar_t > &A, const TCpuMatrix< Scalar_t > &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
Definition: Arithmetic.cxx:114
static void Gauss(TCpuMatrix< Scalar_t > &B)
static void Tanh(TCpuMatrix< Scalar_t > &B)
static void Relu(TCpuMatrix< Scalar_t > &B)
AReal Scalar_t
Definition: Cpu.h:41
static void CrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
static void SigmoidDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static Scalar_t Sum(const TCpuMatrix< Scalar_t > &A)
Compute the sum of all elements in A.
static void GaussDerivative(TCpuMatrix< Scalar_t > &B, const TCpuMatrix< Scalar_t > &A)
static void TransposeMultiply(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &Weights)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C...
Definition: Arithmetic.cxx:52