Logo ROOT   6.10/09
Reference Guide
TestBackpropagation.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Simon Pfreundschuh
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////////////
13 // Generic tests of the backpropagation algorithm. //
14 // //
15 // All tests randomly generate a net with identity activation //
16 // functions, i.e. which is completely linear and then tests the //
17 // computed gradients for each layer using numerical //
18 // derivation. The restriction to linear nets is to avoid the //
19 // required division by the finite difference interval used to //
20 // approximate the numerical derivatives, which would otherwise //
21 // cause precision loss. //
22 ////////////////////////////////////////////////////////////////////
23 
24 #include <iostream>
25 #include "TMVA/DNN/Functions.h"
26 #include "TMVA/DNN/Net.h"
27 #include "Utility.h"
28 
29 using namespace TMVA::DNN;
30 
31 /*! Compute the loss of the net as a function of the weight at index (i,j) in
32  * layer l. dx is added as an offset to the current value of the weight. */
33 //______________________________________________________________________________
34 template <typename Architecture>
36  typename Architecture::Matrix_t &X,
37  const typename Architecture::Matrix_t &Y,
38  size_t l,
39  size_t i,
40  size_t j,
41  typename Architecture::Scalar_t dx)
42  -> typename Architecture::Scalar_t
43 {
44  using Scalar_t = typename Architecture::Scalar_t;
45 
46  net.GetLayer(l).GetWeights().operator()(i,j) += dx;
47  Scalar_t res = net.Loss(X, Y);
48  net.GetLayer(l).GetWeights().operator()(i,j) -= dx;
49  return res;
50 }
51 
52 /*! Compute the loss of the net as a function of the weight at index i in
53  * layer l. dx is added as an offset to the current value of the weight. */
54 //______________________________________________________________________________
55 template <typename Architecture>
57  typename Architecture::Matrix_t &X,
58  const typename Architecture::Matrix_t &Y,
59  size_t l,
60  size_t i,
61  typename Architecture::Scalar_t dx)
62  -> typename Architecture::Scalar_t
63 {
64  using Scalar_t = typename Architecture::Scalar_t;
65 
66  net.GetLayer(l).GetBiases().operator()(i,0) += dx;
67  Scalar_t res = net.Loss(X, Y);
68  net.GetLayer(l).GetBiases().operator()(i,0) -= dx;
69  return res;
70 }
71 
72 /*! Generate a random net, perform forward and backward propagation and check
73  * the weight gradients using numerical differentiation. Returns the maximum
74  * relative gradient error and also prints it to stdout. */
75 //______________________________________________________________________________
76 template <typename Architecture>
77 auto testBackpropagationWeightsLinear(typename Architecture::Scalar_t dx)
78 -> typename Architecture::Scalar_t
79 {
80  using Scalar_t = typename Architecture::Scalar_t;
81  using Matrix_t = typename Architecture::Matrix_t;
82  using Net_t = TNet<Architecture>;
83 
84 
85  Net_t net(50, 50, ELossFunction::kMeanSquaredError);
86 
87  // Random net.
89  net.Initialize(EInitialization::kGauss);
90 
91  // Random training data.
92  Matrix_t X(50, 50);
93  randomBatch(X);
94 
95  Matrix_t Y(50, net.GetOutputWidth());
96  randomMatrix(Y);
97 
98  net.Forward(X);
99  net.Backward(X,Y);
100 
101  Scalar_t maximum_error = 0.0;
102 
103  // Compute derivatives for all weights using finite differences and
104  // compare to result obtained from backpropagation.
105  for (size_t l = 0; l < net.GetDepth(); l++)
106  {
107  std::cout << "\rTesting weight gradients: layer: "
108  << l << " / " << net.GetDepth();
109  std::cout << std::flush;
110  auto & layer = net.GetLayer(l);
111  auto & W = layer.GetWeightGradients();
112 
113  for (size_t i = 0; i < layer.GetWidth(); i++)
114  {
115  for (size_t j = 0; j < layer.GetInputWidth(); j++)
116  {
117  auto f = [& net, & X, &Y, l, i, j](Scalar_t x)
118  {
119  return evaluate_net_weight(net, X, Y, l, i, j, x);
120  };
121  Scalar_t dy = finiteDifference(f, dx) / (2.0 * dx);
122  Scalar_t dy_ref = W(i,j);
123 
124  // Compute the relative error if dy != 0.
125  Scalar_t error;
126  if (std::fabs(dy_ref) > 1e-15)
127  {
128  error = std::fabs((dy - dy_ref) / dy_ref);
129  }
130  else
131  {
132  error = std::fabs(dy - dy_ref);
133  }
134 
135  maximum_error = std::max(error, maximum_error);
136  }
137  }
138  }
139 
140  std::cout << "\rTesting weight gradients: ";
141  std::cout << "maximum relative error: " << print_error(maximum_error) << std::endl;
142  return maximum_error;
143 }
144 
145 /*! Generate a random, linear net, perform forward and backward propagation with
146  * L1 regularization and check the weight gradients using numerical
147  * differentiation. Returns the maximum relative gradient error and
148  * also prints it to stdout. */
149 //______________________________________________________________________________
150 template <typename Architecture>
151 auto testBackpropagationL1Regularization(typename Architecture::Scalar_t dx)
152 -> typename Architecture::Scalar_t
153 {
154  using Scalar_t = typename Architecture::Scalar_t;
155  using Matrix_t = typename Architecture::Matrix_t;
156  using Net_t = TNet<Architecture>;
157 
159 
160  // Random net.
162  net.Initialize(EInitialization::kGauss);
163 
164  // Random training data.
165  Matrix_t X(50, 50);
166  randomBatch(X);
167 
168  Matrix_t Y(50, net.GetOutputWidth());
169  randomMatrix(Y);
170 
171  net.Forward(X);
172  net.Backward(X,Y);
173 
174  Scalar_t maximum_error = 0.0;
175 
176  // Compute derivatives for all weights using finite differences and
177  // compare to result obtained from backpropagation.
178  for (size_t l = 0; l < net.GetDepth(); l++)
179  {
180  std::cout << "\rTesting weight gradients (L1): layer: "
181  << l << " / " << net.GetDepth();
182  std::cout << std::flush;
183  auto & layer = net.GetLayer(l);
184  auto & W = layer.GetWeights();
185  auto & dW = layer.GetWeightGradients();
186 
187  for (size_t i = 0; i < layer.GetWidth(); i++) {
188  for (size_t j = 0; j < layer.GetInputWidth(); j++) {
189  // Avoid running into the non-derivable point at 0.0.
190  if (std::abs(W(i,j)) > dx) {
191  auto f = [& net, & X, &Y, l, i, j](Scalar_t x)
192  {
193  return evaluate_net_weight(net, X, Y, l, i, j, x);
194  };
195  Scalar_t dy = finiteDifference(f, dx) / (2.0 * dx);
196  Scalar_t dy_ref = dW(i,j);
197 
198  // Compute the relative error if dy != 0.
199  Scalar_t error;
200  if (std::fabs(dy_ref) > 1e-15)
201  {
202  error = std::fabs((dy - dy_ref) / dy_ref);
203  }
204  else
205  {
206  error = std::fabs(dy - dy_ref);
207  }
208 
209  maximum_error = std::max(error, maximum_error);
210  }
211  }
212  }
213  }
214 
215  std::cout << "\rTesting weight gradients (L1): ";
216  std::cout << "maximum relative error: " << print_error(maximum_error) << std::endl;
217  return maximum_error;
218 }
219 
220 /*! Generate a random, linear net, perform forward and backward propagation with
221  * L2 regularization and check the weight gradients using numerical
222  * differentiation. Returns the maximum relative gradient error and
223  * also prints it to stdout. */
224 //______________________________________________________________________________
225 template <typename Architecture>
226 auto testBackpropagationL2Regularization(typename Architecture::Scalar_t dx)
227 -> typename Architecture::Scalar_t
228 {
229  using Scalar_t = typename Architecture::Scalar_t;
230  using Matrix_t = typename Architecture::Matrix_t;
231  using Net_t = TNet<Architecture>;
232 
234 
235  // Random net.
237  net.Initialize(EInitialization::kGauss);
238 
239  // Random training data.
240  Matrix_t X(50, 50);
241  randomBatch(X);
242 
243  Matrix_t Y(50, net.GetOutputWidth());
244  randomMatrix(Y);
245 
246  net.Forward(X);
247  net.Backward(X,Y);
248 
249  Scalar_t maximum_error = 0.0;
250 
251  // Compute derivatives for all weights using finite differences and
252  // compare to result obtained from backpropagation.
253  for (size_t l = 0; l < net.GetDepth(); l++)
254  {
255  std::cout << "\rTesting weight gradients (L2): layer: "
256  << l << " / " << net.GetDepth();
257  std::cout << std::flush;
258  auto & layer = net.GetLayer(l);
259  auto & W = layer.GetWeightGradients();
260 
261  for (size_t i = 0; i < layer.GetWidth(); i++)
262  {
263  for (size_t j = 0; j < layer.GetInputWidth(); j++)
264  {
265  auto f = [& net, & X, &Y, l, i, j](Scalar_t x)
266  {
267  return evaluate_net_weight(net, X, Y, l, i, j, x);
268  };
269  Scalar_t dy = finiteDifference(f, dx) / (2.0 * dx);
270  Scalar_t dy_ref = W(i,j);
271 
272  // Compute the relative error if dy != 0.
273  Scalar_t error;
274  if (std::fabs(dy_ref) > 1e-15)
275  {
276  error = std::fabs((dy - dy_ref) / dy_ref);
277  }
278  else
279  {
280  error = std::fabs(dy - dy_ref);
281  }
282 
283  maximum_error = std::max(error, maximum_error);
284  }
285  }
286  }
287 
288  std::cout << "\rTesting weight gradients (L2): ";
289  std::cout << "maximum relative error: " << print_error(maximum_error) << std::endl;
290  return maximum_error;
291 }
292 
293 /*! Generate a random net, perform forward and backward propagation and check
294  * the bias gradients using numerical differentiation. Returns the maximum
295  * relative gradient error and also prints it to stdout. */
296 //______________________________________________________________________________
297 template <typename Architecture>
298 auto testBackpropagationBiasesLinear(typename Architecture::Scalar_t dx)
299 -> typename Architecture::Scalar_t
300 {
301  using Net_t = TNet<Architecture>;
302  using Scalar_t = typename Architecture::Scalar_t;
303  using Matrix_t = typename Architecture::Matrix_t;
304 
305 
306  Net_t net(50, 50, ELossFunction::kMeanSquaredError);
307 
308  // Random net.
310  net.Initialize(EInitialization::kGauss);
311 
312  // Random training data.
313  Matrix_t X(50, 50);
314  randomBatch(X);
315 
316  Matrix_t Y(50, net.GetOutputWidth());
317  randomMatrix(Y);
318 
319  net.Forward(X);
320  net.Backward(X,Y);
321 
322  Scalar_t maximum_error = 0.0;
323 
324  // Compute derivatives for all bias terms using finite differences and
325  // compare to result obtained from backpropagation.
326  for (size_t l = 0; l < net.GetDepth(); l++)
327  {
328  std::cout << "\rTesting bias gradients: layer: "
329  << l << " / " << net.GetDepth();
330  std::cout << std::flush;
331  auto & layer = net.GetLayer(l);
332  auto & dtheta = layer.GetBiasGradients();
333 
334  for (size_t i = 0; i < layer.GetWidth(); i++)
335  {
336  auto f = [& net, & X, &Y, l, i](Scalar_t x)
337  {
338  return evaluate_net_bias(net, X, Y, l, i, x);
339  };
340  Scalar_t dy = finiteDifference(f, dx);
341  Scalar_t dy_ref = dtheta(i,0) * 2.0 * dx;
342 
343  // Compute the relative error if dy != 0.
344  Scalar_t error;
345  if (std::fabs(dy_ref) > 1e-10)
346  {
347  error = std::fabs((dy - dy_ref) / dy_ref);
348  }
349  else
350  {
351  error = std::fabs(dy - dy_ref);
352  }
353 
354  maximum_error = std::max(error, maximum_error);
355  }
356  }
357 
358  std::cout << "\rTesting bias gradients: ";
359  std::cout << "maximum relative error: " << print_error(maximum_error) << std::endl;
360  return maximum_error;
361 }
void randomMatrix(AMatrix &X)
Fill matrix with random, Gaussian-distributed values.
Definition: Utility.h:60
void constructRandomLinearNet(TNet< AArchitecture > &net)
Construct a random linear neural network with up to five layers.
Definition: Utility.h:22
auto evaluate_net_bias(TNet< Architecture > &net, typename Architecture::Matrix_t &X, const typename Architecture::Matrix_t &Y, size_t l, size_t i, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Compute the loss of the net as a function of the weight at index i in layer l.
Definition: Blas.h:58
Double_t x[n]
Definition: legend1.C:17
std::string print_error(AFloat &e)
Color code error.
Definition: Utility.h:232
AFloat finiteDifference(F f, AFloat dx)
Numerically compute the derivative of the functional f using finite differences.
Definition: Utility.h:224
Generic neural network class.
Definition: Net.h:49
auto testBackpropagationL2Regularization(typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Generate a random, linear net, perform forward and backward propagation with L2 regularization and ch...
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
void randomBatch(AMatrix &X)
Generate a random batch as input for a neural net.
Definition: Utility.h:80
TLine * l
Definition: textangle.C:4
auto testBackpropagationL1Regularization(typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Generate a random, linear net, perform forward and backward propagation with L1 regularization and ch...
auto testBackpropagationBiasesLinear(typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Generate a random net, perform forward and backward propagation and check the bias gradients using nu...
double f(double x)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
auto testBackpropagationWeightsLinear(typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Generate a random net, perform forward and backward propagation and check the weight gradients using ...
auto evaluate_net_weight(TNet< Architecture > &net, typename Architecture::Matrix_t &X, const typename Architecture::Matrix_t &Y, size_t l, size_t i, size_t j, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Compute the loss of the net as a function of the weight at index (i,j) in layer l.