Logo ROOT   6.07/09
Reference Guide
TestActivationFunctions.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Simon Pfreundschuh
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////
13 // Generic tests of the layer activation functions //
14 // //
15 // Contains tests for each of the layer activation functions that //
16 // test the evaluation of the function using the evaluate(...) //
17 // method and the computation of the derivatives using //
18 // evaluate_derivative(...) on a randomly generated matrix. Each //
19 // function returns the maximum relative error between the expected //
20 // result and the result obtained for the given arcthitecture. //
21 //////////////////////////////////////////////////////////////////////
22 
23 #ifndef TMVA_TEST_DNN_TEST_ACTIVATION_FUNCTIONS
24 #define TMVA_TEST_DNN_TEST_ACTIVATION_FUNCTIONS
25 
26 #include "TMatrixT.h"
28 #include "TMVA/DNN/Functions.h"
29 #include "TMVA/DNN/Net.h"
30 #include "Utility.h"
31 
32 using namespace TMVA::DNN;
33 
34 //______________________________________________________________________________
35 //
36 // Identity Activation Function
37 //______________________________________________________________________________
38 
39 /*! Test application of identity function to matrix. */
40 //______________________________________________________________________________
41 template <typename Architecture>
42 auto testIdentity(size_t ntests)
43 -> typename Architecture::Scalar_t
44 {
45  using Matrix_t = typename Architecture::Matrix_t;
46  Double_t maximumError = 0.0;
47 
48  for (size_t i = 0; i < ntests; i++) {
49  size_t m = rand() % 100 + 1;
50  size_t n = rand() % 100 + 1;
51 
52  TMatrixT<Double_t> ARef(m, n);
53  randomMatrix(ARef);
54  Matrix_t AArch(ARef);
55 
56  evaluate<Architecture>(AArch, EActivationFunction::kIdentity);
57 
58  TMatrixT<Double_t> A = AArch;
59  Double_t error = maximumRelativeError(A, ARef);
60  maximumError = std::max(error, maximumError);
61  }
62  return maximumError;
63 }
64 
65 /*! Test computation of the first derivative of the identity function. */
66 //______________________________________________________________________________
67 template <typename Architecture>
68 auto testIdentityDerivative(size_t ntests)
69  -> typename Architecture::Scalar_t
70 {
71  using Matrix_t = typename Architecture::Matrix_t;
72  Double_t maximumError = 0.0;
73 
74  for (size_t i = 0; i < ntests; i++) {
75  size_t m = rand() % 100 + 1;
76  size_t n = rand() % 100 + 1;
77 
78  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
79  randomMatrix(ARef);
80  Matrix_t AArch(ARef), BArch(BRef);
81 
82  evaluateDerivative<Architecture>(BArch, EActivationFunction::kIdentity, AArch);
83  evaluateDerivative<TReference<Double_t>>(BRef, EActivationFunction::kIdentity,
84  ARef);
85 
86  TMatrixT<Double_t> B = BArch;
87  Double_t error = maximumRelativeError(B, BRef);
88  maximumError = std::max(error, maximumError);
89  }
90  return maximumError;
91 }
92 
93 //______________________________________________________________________________
94 //
95 // ReLU Activation Function
96 //______________________________________________________________________________
97 
98 /*! Test application of ReLU function to matrix. */
99 //______________________________________________________________________________
100 template <typename Architecture>
101 auto testRelu(size_t ntests)
102 -> typename Architecture::Scalar_t
103 {
104  using Matrix_t = typename Architecture::Matrix_t;
105  Double_t maximumError = 0.0;
106 
107  for (size_t i = 0; i < ntests; i++) {
108  size_t m = rand() % 100 + 1;
109  size_t n = rand() % 100 + 1;
110 
111  TMatrixT<Double_t> ARef(m, n);
112  randomMatrix(ARef);
113  Matrix_t AArch(ARef);
114 
115  evaluate<Architecture>(AArch, EActivationFunction::kRelu);
116  applyMatrix(ARef, [](double x){return x < 0.0 ? 0.0 : x;});
117 
118  TMatrixT<Double_t> A = AArch;
119  Double_t error = maximumRelativeError(A, ARef);
120  maximumError = std::max(error, maximumError);
121  }
122  return maximumError;
123 }
124 
125 /*! Test computation of the first derivative of the ReLU function. */
126 //______________________________________________________________________________
127 template <typename Architecture>
128 auto testReluDerivative(size_t ntests)
129 -> typename Architecture::Scalar_t
130 {
131  using Matrix_t = typename Architecture::Matrix_t;
132  Double_t maximumError = 0.0;
133 
134  for (size_t i = 0; i < ntests; i++) {
135  size_t m = rand() % 100 + 1;
136  size_t n = rand() % 100 + 1;
137 
138  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
139  randomMatrix(ARef);
140  Matrix_t AArch(ARef), BArch(BRef);
141 
142  evaluateDerivative<Architecture>(BArch, EActivationFunction::kRelu, AArch);
143  applyMatrix(ARef, [](double x){return x > 0.0 ? 1.0 : 0.0;});
144 
145  TMatrixT<Double_t> B = BArch;
146  Double_t error = maximumRelativeError(B, ARef);
147  maximumError = std::max(error, maximumError);
148  }
149  return maximumError;
150 }
151 
152 //______________________________________________________________________________
153 //
154 // Sigmoid Activation Function
155 //______________________________________________________________________________
156 
157 /*! Test application of Sigmoid function to matrix. */
158 //______________________________________________________________________________
159 template <typename Architecture>
160 auto testSigmoid(size_t ntests)
161 -> typename Architecture::Scalar_t
162 {
163  using Matrix_t = typename Architecture::Matrix_t;
164  Double_t maximumError = 0.0;
165 
166  for (size_t i = 0; i < ntests; i++) {
167  size_t m = rand() % 100 + 1;
168  size_t n = rand() % 100 + 1;
169 
170  TMatrixT<Double_t> ARef(m, n);
171  randomMatrix(ARef);
172  Matrix_t AArch(ARef);
173 
174  evaluate<Architecture>(AArch, EActivationFunction::kSigmoid);
175  applyMatrix(ARef, [](double x){return 1.0 / (1.0 + std::exp(-x));});
176 
177  TMatrixT<Double_t> A = AArch;
178  Double_t error = maximumRelativeError(A, ARef);
179  maximumError = std::max(error, maximumError);
180  }
181  return maximumError;
182 }
183 
184 /*! Test computation of the first derivative of the ReLU function. */
185 //______________________________________________________________________________
186 template <typename Architecture>
187 auto testSigmoidDerivative(size_t ntests)
188 -> typename Architecture::Scalar_t
189 {
190  using Matrix_t = typename Architecture::Matrix_t;
191  Double_t maximumError = 0.0;
192 
193  for (size_t i = 0; i < ntests; i++) {
194  size_t m = rand() % 100 + 1;
195  size_t n = rand() % 100 + 1;
196 
197  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
198  randomMatrix(ARef);
199  Matrix_t AArch(ARef), BArch(BRef);
200 
201  evaluateDerivative<Architecture>(BArch, EActivationFunction::kSigmoid, AArch);
202  applyMatrix(ARef, [](Double_t x){
203  Double_t sig = 1.0 / (1.0 + std::exp(-x));
204  return sig * (1.0 - sig);
205  });
206 
207  TMatrixT<Double_t> B = BArch;
208  Double_t error = maximumRelativeError(B, ARef);
209  maximumError = std::max(error, maximumError);
210  }
211  return maximumError;
212 }
213 
214 //______________________________________________________________________________
215 //
216 // Tanh Activation Function
217 //______________________________________________________________________________
218 
219 /*! Test application of tanh function to matrix. */
220 //______________________________________________________________________________
221 template <typename Architecture>
222 auto testTanh(size_t ntests)
223 -> typename Architecture::Scalar_t
224 {
225  using Matrix_t = typename Architecture::Matrix_t;
226  Double_t maximumError = 0.0;
227 
228  for (size_t i = 0; i < ntests; i++) {
229  size_t m = rand() % 100 + 1;
230  size_t n = rand() % 100 + 1;
231 
232  TMatrixT<Double_t> ARef(m, n);
233  randomMatrix(ARef);
234  Matrix_t AArch(ARef);
235 
236  evaluate<Architecture>(AArch, EActivationFunction::kTanh);
237  applyMatrix(ARef, [](double x){return tanh(x);});
238 
239  TMatrixT<Double_t> A = AArch;
240  Double_t error = maximumRelativeError(A, ARef);
241  maximumError = std::max(error, maximumError);
242  }
243  return maximumError;
244 }
245 
246 /*! Test computation of the first derivative of the tanh function. */
247 //______________________________________________________________________________
248 template <typename Architecture>
249 auto testTanhDerivative(size_t ntests)
250 -> typename Architecture::Scalar_t
251 {
252  using Matrix_t = typename Architecture::Matrix_t;
253  Double_t maximumError = 0.0;
254 
255  for (size_t i = 0; i < ntests; i++) {
256  size_t m = rand() % 100 + 1;
257  size_t n = rand() % 100 + 1;
258 
259  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
260  randomMatrix(ARef);
261  Matrix_t AArch(ARef), BArch(BRef);
262 
263  evaluateDerivative<Architecture>(BArch, EActivationFunction::kTanh, AArch);
264  applyMatrix(ARef, [](Double_t x){
265  Double_t t = tanh(x);
266  return 1 - t * t;
267  });
268 
269  TMatrixT<Double_t> B = BArch;
270  Double_t error = maximumRelativeError(B, ARef);
271  maximumError = std::max(error, maximumError);
272  }
273  return maximumError;
274 }
275 
276 //______________________________________________________________________________
277 //
278 // Symmetric ReLU Activation Function
279 //______________________________________________________________________________
280 
281 /*! Test application of symmetric ReLU function to matrix. */
282 //______________________________________________________________________________
283 template <typename Architecture>
284 auto testSymmetricRelu(size_t ntests)
285 -> typename Architecture::Scalar_t
286 {
287  using Matrix_t = typename Architecture::Matrix_t;
288  Double_t maximumError = 0.0;
289 
290  for (size_t i = 0; i < ntests; i++) {
291  size_t m = rand() % 100 + 1;
292  size_t n = rand() % 100 + 1;
293 
294  TMatrixT<Double_t> ARef(m, n);
295  randomMatrix(ARef);
296  Matrix_t AArch(ARef);
297 
298  evaluate<Architecture>(AArch, EActivationFunction::kSymmRelu);
299  applyMatrix(ARef, [](double x){return fabs(x);});
300 
301  TMatrixT<Double_t> A = AArch;
302  Double_t error = maximumRelativeError(A, ARef);
303  maximumError = std::max(error, maximumError);
304  }
305  return maximumError;
306 }
307 
308 /*! Test computation of the first derivative of the symmetric ReLU function. */
309 //______________________________________________________________________________
310 template <typename Architecture>
311 auto testSymmetricReluDerivative(size_t ntests)
312 -> typename Architecture::Scalar_t
313 {
314  using Matrix_t = typename Architecture::Matrix_t;
315  Double_t maximumError = 0.0;
316 
317  for (size_t i = 0; i < ntests; i++) {
318  size_t m = rand() % 100 + 1;
319  size_t n = rand() % 100 + 1;
320 
321  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
322  randomMatrix(ARef);
323  Matrix_t AArch(ARef), BArch(BRef);
324 
325  evaluateDerivative<Architecture>(BArch, EActivationFunction::kSymmRelu, AArch);
326  applyMatrix(ARef, [](Double_t x){
327  return (x < 0) ? -1.0 : 1.0;
328  });
329 
330  TMatrixT<Double_t> B = BArch;
331  Double_t error = maximumRelativeError(B, ARef);
332  maximumError = std::max(error, maximumError);
333  }
334  return maximumError;
335 }
336 
337 //______________________________________________________________________________
338 //
339 // Soft Sign Activation Function
340 //______________________________________________________________________________
341 
342 /*! Test application of symmetric soft sign function to matrix. */
343 //______________________________________________________________________________
344 template <typename Architecture>
345 auto testSoftSign(size_t ntests)
346 -> typename Architecture::Scalar_t
347 {
348  using Matrix_t = typename Architecture::Matrix_t;
349  Double_t maximumError = 0.0;
350 
351  for (size_t i = 0; i < ntests; i++) {
352  size_t m = rand() % 100 + 1;
353  size_t n = rand() % 100 + 1;
354 
355  TMatrixT<Double_t> ARef(m, n);
356  randomMatrix(ARef);
357  Matrix_t AArch(ARef);
358 
359  evaluate<Architecture>(AArch, EActivationFunction::kSoftSign);
360  applyMatrix(ARef, [](double x){return x / (1 + fabs(x));});
361 
362  TMatrixT<Double_t> A = AArch;
363  Double_t error = maximumRelativeError(A, ARef);
364  maximumError = std::max(error, maximumError);
365  }
366  return maximumError;
367 }
368 
369 /*! Test computation of the first derivative of the soft sign function. */
370 //______________________________________________________________________________
371 template <typename Architecture>
372 auto testSoftSignDerivative(size_t ntests)
373 -> typename Architecture::Scalar_t
374 {
375  using Matrix_t = typename Architecture::Matrix_t;
376  Double_t maximumError = 0.0;
377 
378  for (size_t i = 0; i < ntests; i++) {
379  size_t m = rand() % 100 + 1;
380  size_t n = rand() % 100 + 1;
381 
382  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
383  randomMatrix(ARef);
384  Matrix_t AArch(ARef), BArch(BRef);
385 
386  evaluateDerivative<Architecture>(BArch, EActivationFunction::kSoftSign, AArch);
387  applyMatrix(ARef, [](Double_t x){
388  Double_t y = 1 + fabs(x);
389  return 1.0 / (y * y);
390  });
391 
392  TMatrixT<Double_t> B = BArch;
393  Double_t error = maximumRelativeError(B, ARef);
394  maximumError = std::max(error, maximumError);
395  }
396  return maximumError;
397 }
398 
399 //______________________________________________________________________________
400 //
401 // Gauss Activation Functions
402 //______________________________________________________________________________
403 
404 /*! Test application of Gauss activation function to matrix. */
405 //______________________________________________________________________________
406 template <typename Architecture>
407 auto testGauss(size_t ntests)
408 -> typename Architecture::Scalar_t
409 {
410  using Matrix_t = typename Architecture::Matrix_t;
411  Double_t maximumError = 0.0;
412 
413  for (size_t i = 0; i < ntests; i++) {
414  size_t m = rand() % 100 + 1;
415  size_t n = rand() % 100 + 1;
416 
417  TMatrixT<Double_t> ARef(m, n);
418  randomMatrix(ARef);
419  Matrix_t AArch(ARef);
420 
421  evaluate<Architecture>(AArch, EActivationFunction::kGauss);
422  applyMatrix(ARef, [](double x){return exp(- x * x);});
423 
424  TMatrixT<Double_t> A = AArch;
425  Double_t error = maximumRelativeError(A, ARef);
426  maximumError = std::max(error, maximumError);
427  }
428  return maximumError;
429 }
430 
431 /*! Test computation of the first derivative of the Gauss activation function. */
432 //______________________________________________________________________________
433 template <typename Architecture>
434 auto testGaussDerivative(size_t ntests)
435 -> typename Architecture::Scalar_t
436 {
437  using Matrix_t = typename Architecture::Matrix_t;
438  Double_t maximumError = 0.0;
439 
440  for (size_t i = 0; i < ntests; i++) {
441  size_t m = rand() % 100 + 1;
442  size_t n = rand() % 100 + 1;
443 
444  TMatrixT<Double_t> ARef(m, n), BRef(m, n);
445  randomMatrix(ARef);
446  Matrix_t AArch(ARef), BArch(BRef);
447 
448  evaluateDerivative<Architecture>(BArch, EActivationFunction::kGauss, AArch);
449  applyMatrix(ARef, [](Double_t x){return -2.0 * x * exp(- x * x);});
450 
451  TMatrixT<Double_t> B = BArch;
452  Double_t error = maximumRelativeError(B, ARef);
453  maximumError = std::max(error, maximumError);
454  }
455  return maximumError;
456 }
457 #endif
auto testSigmoid(size_t ntests) -> typename Architecture::Scalar_t
Test application of Sigmoid function to matrix.
static double B[]
void randomMatrix(AMatrix &X)
Fill matrix with random, Gaussian-distributed values.
Definition: Utility.h:59
auto maximumRelativeError(const AMatrix &X, const AMatrix &Y) -> decltype(X(0, 0))
Compute the maximum, element-wise relative error of the matrices X and Y normalized by the element of...
Definition: Utility.h:213
void applyMatrix(AMatrix &X, F f)
Apply functional to each element in the matrix.
Definition: Utility.h:103
double tanh(double)
auto testGauss(size_t ntests) -> typename Architecture::Scalar_t
Test application of Gauss activation function to matrix.
Definition: Blas.h:58
static double A[]
auto testSymmetricReluDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the symmetric ReLU function.
Double_t x[n]
Definition: legend1.C:17
auto testReluDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the ReLU function.
auto testSoftSign(size_t ntests) -> typename Architecture::Scalar_t
Test application of symmetric soft sign function to matrix.
auto testSymmetricRelu(size_t ntests) -> typename Architecture::Scalar_t
Test application of symmetric ReLU function to matrix.
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
auto testGaussDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the Gauss activation function.
auto testTanh(size_t ntests) -> typename Architecture::Scalar_t
Test application of tanh function to matrix.
auto testSoftSignDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the soft sign function.
auto testTanhDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the tanh function.
auto testSigmoidDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the ReLU function.
TMarker * m
Definition: textangle.C:8
auto testRelu(size_t ntests) -> typename Architecture::Scalar_t
Test application of ReLU function to matrix.
auto testIdentityDerivative(size_t ntests) -> typename Architecture::Scalar_t
Test computation of the first derivative of the identity function.
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
double exp(double)
const Int_t n
Definition: legend1.C:16
auto testIdentity() -> typename Architecture_t::Scalar_t
Test the data loader by loading identical input and output data, running it through an identity neura...