11#ifndef TMVA_DNN_MINIMIZERS
12#define TMVA_DNN_MINIMIZERS
54template<
typename Architecture_t>
58 using Scalar_t =
typename Architecture_t::Scalar_t;
59 using Matrix_t =
typename Architecture_t::Matrix_t;
77 size_t convergenceSteps,
90 template <
typename Data_t,
typename Net_t>
96 template <
typename Data_t,
typename Net_t>
106 template <
typename Net_t>
111 template <
typename Net_t>
120 template <
typename Net_t>
122 std::vector<Net_t> &
nets,
126 template <
typename Net_t>
128 std::vector<Net_t> &
nets,
131 template <
typename Net_t>
136 std::vector<Net_t> &
nets,
143 template <
typename Net_t>
148 template <
typename Net_t>
174template <
typename Architecture_t>
176 : fBatchSize(0), fStepCount(0), fConvergenceSteps(0), fConvergenceCount(0), fTestInterval(0),
177 fTrainingError(0), fTestError(0), fLearningRate(0),
178 fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
184template <
typename Architecture_t>
186 : fBatchSize(0), fStepCount(0), fConvergenceSteps(convergenceSteps), fConvergenceCount(0),
187 fTestInterval(testInterval), fTrainingError(0), fTestError(0),
188 fLearningRate(learningRate), fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
194template<
typename Architecture_t>
195template <
typename Data_t,
typename Net_t>
215 net.GetOutputWidth());
216 std::vector<Net_t>
nets{};
218 for (
size_t i = 0; i <
nThreads; i++) {
220 for (
size_t j = 0;
j < net.GetDepth();
j++)
224 Architecture_t::Copy(
layer.GetWeights(),
226 Architecture_t::Copy(
layer.GetBiases(),
232 std::vector<TBatch<Architecture_t>>
batches{};
236 for (fStepCount = 0; fStepCount < fTestInterval; fStepCount++) {
246 auto inputMatrix =
b.GetInput();
247 auto outputMatrix =
b.GetOutput();
248 auto weightMatrix =
b.GetWeights();
249 fTestError =
testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
251 }
while (!HasConverged());
253 return fMinimumError;
257template<
typename Architecture_t>
258template <
typename Data_t,
typename Net_t>
275 auto testNet = net.CreateClone(net.GetBatchSize());
279 net.GetOutputWidth());
281 net.InitializeGradients();
282 std::vector<Net_t>
nets{};
284 for (
size_t i = 0; i <
nThreads; i++) {
286 for (
size_t j = 0;
j < net.GetDepth();
j++)
290 Architecture_t::Copy(
layer.GetWeights(),
292 Architecture_t::Copy(
layer.GetBiases(),
298 std::vector<TBatch<Architecture_t>>
batches{};
302 for (fStepCount = 0; fStepCount < fTestInterval; fStepCount++) {
307 if (momentum != 0.0) {
318 auto inputMatrix =
b.GetInput();
319 auto outputMatrix =
b.GetOutput();
320 auto weightMatrix =
b.GetWeights();
321 fTestError +=
testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
324 }
while (!HasConverged());
325 return fMinimumError;
329template <
typename Architecture_t>
330template <
typename Net_t>
334 net.Forward(
input,
true);
337 for (
size_t i = 0; i < net.GetDepth(); i++)
339 auto &
layer = net.GetLayer(i);
340 Architecture_t::ScaleAdd(
layer.GetWeights(),
341 layer.GetWeightGradients(),
343 Architecture_t::ScaleAdd(
layer.GetBiases(),
344 layer.GetBiasGradients(),
350template <
typename Architecture_t>
351template <
typename Net_t>
358 for (
size_t i = 0; i < net.GetDepth(); i++)
360 auto &
layer = net.GetLayer(i);
361 Architecture_t::ScaleAdd(
layer.GetWeights(),
362 layer.GetWeightGradients(),
364 Architecture_t::ScaleAdd(
layer.GetBiases(),
365 layer.GetBiasGradients(),
372template<
typename Architecture_t>
373 template <
typename Net_t>
376 std::vector<Net_t> &
nets,
379 typename Architecture_t::Matrix_t dummy(0,0);
383 for (
size_t j = 0;
j <
nets.size();
j++) {
387 for (
size_t i = 1; i <
depth; i++)
389 for (
size_t j = 0;
j <
nets.size();
j++) {
390 nets[
j].GetLayer(i).Forward(
nets[
j].GetLayer(i-1).GetOutput(),
true);
394 for (
size_t j = 0;
j <
nets.size();
j++) {
400 for (
size_t i =
depth - 1; i > 0; i--)
402 for (
size_t j = 0;
j <
nets.size();
j++) {
403 nets[
j].GetLayer(i).Backward(
nets[
j].GetLayer(i-1).GetActivationGradients(),
404 nets[
j].GetLayer(i-1).GetOutput(),
405 nets[
j].GetRegularization(),
406 nets[
j].GetWeightDecay());
409 for (
size_t j = 0;
j <
nets.size();
j++) {
410 nets[
j].GetLayer(0).Backward(dummy,
412 nets[
j].GetRegularization(),
413 nets[
j].GetWeightDecay());
416 for (
size_t j = 0;
j <
nets.size();
j++) {
417 for (
size_t i = 0; i <
depth; i++)
422 layer.GetWeightGradients(),
424 Architecture_t::Copy(
layer.GetWeights(),
427 layer.GetBiasGradients(),
429 Architecture_t::Copy(
layer.GetBiases(),
436template<
typename Architecture_t>
437template <
typename Net_t>
440 std::vector<Net_t> &
nets,
444 typename Architecture_t::Matrix_t dummy(0,0);
448 for (
size_t j = 0;
j <
nets.size();
j++) {
452 for (
size_t i = 1; i <
depth; i++)
454 for (
size_t j = 0;
j <
nets.size();
j++) {
455 nets[
j].GetLayer(i).Forward(
nets[
j].GetLayer(i-1).GetOutput(),
true);
459 for (
size_t j = 0;
j <
nets.size();
j++) {
465 for (
size_t i =
depth - 1; i > 0; i--)
467 for (
size_t j = 0;
j <
nets.size();
j++) {
468 nets[
j].GetLayer(i).Backward(
nets[
j].GetLayer(i-1).GetActivationGradients(),
469 nets[
j].GetLayer(i-1).GetOutput(),
470 nets[
j].GetRegularization(),
471 nets[
j].GetWeightDecay());
472 Architecture_t::ScaleAdd(
master.GetLayer(i).GetWeightGradients(),
473 nets[
j].GetLayer(i).GetWeightGradients(),
474 - fLearningRate / momentum);
475 Architecture_t::ScaleAdd(
master.GetLayer(i).GetBiasGradients(),
476 nets[
j].GetLayer(i).GetBiasGradients(),
477 - fLearningRate / momentum);
479 Architecture_t::ScaleAdd(
master.GetLayer(i).GetWeightGradients(),
480 master.GetLayer(i).GetWeightGradients(),
482 Architecture_t::ScaleAdd(
master.GetLayer(i).GetBiasGradients(),
483 master.GetLayer(i).GetBiasGradients(),
486 for (
size_t j = 0;
j <
nets.size();
j++) {
487 nets[
j].GetLayer(0).Backward(dummy,
489 nets[
j].GetRegularization(),
490 nets[
j].GetWeightDecay());
491 Architecture_t::ScaleAdd(
master.GetLayer(0).GetWeightGradients(),
492 nets[
j].GetLayer(0).GetWeightGradients(),
493 - fLearningRate / momentum);
494 Architecture_t::ScaleAdd(
master.GetLayer(0).GetBiasGradients(),
495 nets[
j].GetLayer(0).GetBiasGradients(),
496 - fLearningRate / momentum);
499 Architecture_t::ScaleAdd(
master.GetLayer(0).GetWeightGradients(),
500 master.GetLayer(0).GetWeightGradients(),
502 Architecture_t::ScaleAdd(
master.GetLayer(0).GetBiasGradients(),
503 master.GetLayer(0).GetBiasGradients(),
506 for (
size_t i = 0; i <
depth; i++)
515 for (
size_t j = 0;
j <
nets.size();
j++) {
517 Architecture_t::Copy(
layer.GetWeights(),
519 Architecture_t::Copy(
layer.GetBiases(),
526template<
typename Architecture_t>
527template <
typename Net_t>
530 std::vector<Net_t> &
nets,
534 typename Architecture_t::Matrix_t dummy(0,0);
538 for (
size_t j = 0;
j <
nets.size();
j++) {
542 for (
size_t i = 1; i <
depth; i++)
544 for (
size_t j = 0;
j <
nets.size();
j++) {
545 nets[
j].GetLayer(i).Forward(
nets[
j].GetLayer(i-1).GetOutput(),
true);
550 for (
size_t j = 0;
j <
nets.size();
j++) {
557 for (
size_t i =
depth - 1; i > 0; i--)
559 for (
size_t j = 0;
j <
nets.size();
j++) {
560 nets[
j].GetLayer(i).Backward(
nets[
j].GetLayer(i-1).GetActivationGradients(),
561 nets[
j].GetLayer(i-1).GetOutput(),
562 nets[
j].GetRegularization(),
563 nets[
j].GetWeightDecay());
567 for (
size_t j = 0;
j <
nets.size();
j++) {
568 nets[
j].GetLayer(0).Backward(dummy,
570 nets[
j].GetRegularization(),
571 nets[
j].GetWeightDecay());
574 for (
size_t i = 0; i <
depth; i++)
577 for (
size_t j = 0;
j <
nets.size();
j++) {
579 Architecture_t::Copy(
layer.GetWeights(),
581 Architecture_t::Copy(
layer.GetBiases(),
583 Architecture_t::ScaleAdd(
layer.GetWeights(),
586 Architecture_t::ScaleAdd(
layer.GetBiases(),
590 for (
size_t j = 0;
j <
nets.size();
j++) {
592 Architecture_t::ScaleAdd(
masterLayer.GetWeightGradients(),
593 layer.GetWeightGradients(),
594 - fLearningRate / momentum);
595 Architecture_t::ScaleAdd(
masterLayer.GetBiasGradients(),
596 layer.GetBiasGradients(),
597 - fLearningRate / momentum);
599 Architecture_t::ScaleAdd(
masterLayer.GetWeightGradients(),
602 Architecture_t::ScaleAdd(
masterLayer.GetBiasGradients(),
615template<
typename Architecture_t>
616template <
typename Net_t>
622 net.Forward(
input,
true);
625 for (
size_t i = 0; i < net.GetDepth(); i++)
627 auto &
layer = net.GetLayer(i);
628 Architecture_t::ScaleAdd(
layer.GetWeights(),
629 layer.GetWeightGradients(),
632 Architecture_t::ScaleAdd(
layer.GetBiases(),
633 layer.GetBiasGradients(),
640template <
typename Architecture_t>
641template <
typename Net_t>
647 fTrainingError =
loss;
650 for (
size_t i = 0; i < net.GetDepth(); i++)
652 auto &
layer = net.GetLayer(i);
653 Architecture_t::ScaleAdd(
layer.GetWeights(),
654 layer.GetWeightGradients(),
657 Architecture_t::ScaleAdd(
layer.GetBiases(),
658 layer.GetBiasGradients(),
666template<
typename Architecture_t>
669 if (fTestError < fMinimumError * 0.999) {
670 fConvergenceCount = 0;
671 fMinimumError = fTestError;
676 return (fConvergenceCount >= fConvergenceSteps);
680template<
typename Architecture_t>
684 if (fTestError < fMinimumError * 0.999) {
685 fConvergenceCount = 0;
686 fMinimumError = fTestError;
688 fConvergenceCount += fTestInterval;
690 return (fConvergenceCount >= fConvergenceSteps);
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
const_iterator begin() const
size_t fConvergenceCount
Current number of training epochs without.
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
Scalar_t fTrainingError
Holds the most recently computed training loss.
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
size_t fConvergenceSteps
Number of training epochs without considerable.
Scalar_t StepReducedWeightsLoss(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Similar to StepReducedWeights(...) but also evaluates the loss.
Scalar_t StepLoss(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Same as Step(...) but also evaluate the loss on the given training data.
void Reset()
Reset minimizer object to default state.
Scalar_t GetTrainingError() const
size_t fTestInterval
Interval for the computation of the test error.
void StepNesterov(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses Nesterov momentum.
void SetBatchSize(Scalar_t rate)
Scalar_t fTestError
Holds the most recently computed test loss.
typename Architecture_t::Matrix_t Matrix_t
size_t GetTestInterval() const
size_t fStepCount
Number of steps performed in the current training session.
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
void SetConvergenceSteps(size_t steps)
Scalar_t TrainMomentum(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, Scalar_t momentum, size_t nThreads=1)
Same as Train(...) but uses the given momentum.
size_t GetConvergenceCount() const
Scalar_t fMinimumError
The minimum loss achieved on the training set during the current training session.
void SetLearningRate(Scalar_t rate)
Scalar_t fLearningRate
Learning rate .
size_t fBatchSize
Batch size to use for the training.
size_t GetConvergenceSteps() const
Scalar_t Train(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, size_t nThreads=1)
Train the given net using the given training input data (events), training output data (labels),...
void SetTestInterval(size_t interval)
void StepReducedWeights(Net_t &net, Matrix_t &input, const Matrix_t &output)
Does not evaluate the loss and therefore not trigger a possible synchronization with the device.
Scalar_t GetTestError() const
typename Architecture_t::Scalar_t Scalar_t
create variable transformations