27#ifndef TMVA_DNN_DLMINIMIZERS
28#define TMVA_DNN_DLMINIMIZERS
64template <
typename Architecture_t>
68 using Scalar_t =
typename Architecture_t::Scalar_t;
69 using Matrix_t =
typename Architecture_t::Matrix_t;
162template <
typename Architecture_t>
164 : fBatchSize(0), fStepCount(0), fConvergenceSteps(0), fConvergenceCount(0), fTestInterval(0), fLearningRate(0),
165 fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
171template <
typename Architecture_t>
174 : fBatchSize(0), fStepCount(0), fConvergenceSteps(convergenceSteps), fConvergenceCount(0),
175 fTestInterval(testInterval), fLearningRate(learningRate), fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
181template <
typename Architecture_t>
188 deepNet.
Update(fLearningRate);
192template <
typename Architecture_t>
200 for (
size_t i = 0; i < deepNet.
GetDepth(); i++) {
203 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
205 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
211template <
typename Architecture_t>
217 deepNet.Update(fLearningRate);
223template <
typename Architecture_t>
229 fTrainingError = loss;
232 for (
size_t i = 0; i < deepNet.GetDepth(); i++) {
233 auto *layer = deepNet.GetLayerAt(i);
235 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
237 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
245template <
typename Architecture_t>
250 master.ParallelForward(nets, batches);
251 master.ParallelBackward(nets, batches, fLearningRate);
255template <
typename Architecture_t>
260 master.ParallelForward(nets, batches);
261 master.ParallelBackwardMomentum(nets, batches, fLearningRate, momentum);
265template <
typename Architecture_t>
270 master.ParallelForward(nets, batches);
271 master.ParallelBackwardNestorov(nets, batches, fLearningRate, momentum);
275template <
typename Architecture_t>
278 if (fTestError < fMinimumError * 0.999) {
279 fConvergenceCount = 0;
280 fMinimumError = fTestError;
285 return (fConvergenceCount >= fConvergenceSteps);
289template <
typename Architecture_t>
292 fTestError = testError;
293 if (fTestError < fMinimumError * 0.999) {
294 fConvergenceCount = 0;
295 fMinimumError = fTestError;
297 fConvergenceCount += fTestInterval;
299 return (fConvergenceCount >= fConvergenceSteps);
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Scalar_t fMinimumError
The minimum loss achieved on the training set during the current training session.
void SetBatchSize(Scalar_t rate)
typename Architecture_t::Scalar_t Scalar_t
Scalar_t fTestError
Holds the most recently computed test loss.
void StepNesterov(DeepNet_t &master, std::vector< DeepNet_t > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses Nesterov momentum.
size_t GetConvergenceSteps() const
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
void SetTestInterval(size_t interval)
Scalar_t StepReducedWeightsLoss(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Similar to StepReducedWeights(...) but also evaluates the loss.
size_t fStepCount
Number of steps performed in the current training session.
void Reset()
Reset minimizer object to default state.
size_t fBatchSize
Batch size to use for the training.
void StepReducedWeights(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Does not evaluate the loss and therefore not trigger a possible synchronization with the device.
void SetConvergenceSteps(size_t steps)
Setters.
size_t fConvergenceCount
Current number of training epochs without.
void SetLearningRate(Scalar_t rate)
Scalar_t StepLoss(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Same as Step(...) but also evaluate the loss on the given training data.
size_t fConvergenceSteps
Number of training epochs without considerable.
TDeepNet< Architecture_t > DeepNet_t
size_t GetTestInterval() const
size_t GetConvergenceCount() const
Getters.
size_t fTestInterval
Interval for the computation of the test error.
typename Architecture_t::Matrix_t Matrix_t
Scalar_t GetTrainingError() const
Scalar_t fLearningRate
Learning rate .
Scalar_t fTrainingError
Holds the most recently computed training loss.
void Step(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Scalar_t GetTestError() const
void StepMomentum(DeepNet_t &master, std::vector< DeepNet_t > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Generic Deep Neural Network class.
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at position i.
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
create variable transformations