11#ifndef TMVA_DNN_MINIMIZERS 
   12#define TMVA_DNN_MINIMIZERS 
   54template<
typename Architecture_t>
 
   58   using Scalar_t = 
typename Architecture_t::Scalar_t;
 
   59   using Matrix_t = 
typename Architecture_t::Matrix_t;
 
   77                    size_t   convergenceSteps,
 
   90   template <
typename Data_t, 
typename Net_t>
 
   96   template <
typename Data_t, 
typename Net_t>
 
  106   template <
typename Net_t>
 
  111   template <
typename Net_t>
 
  120   template <
typename Net_t>
 
  122             std::vector<Net_t> &
nets,
 
  126   template <
typename Net_t>
 
  128                     std::vector<Net_t> &
nets,
 
  131   template <
typename Net_t>
 
  136                     std::vector<Net_t> &
nets,
 
  143   template <
typename Net_t>
 
  148   template <
typename Net_t>
 
 
  174template <
typename Architecture_t>
 
  176   : fBatchSize(0), fStepCount(0), fConvergenceSteps(0), fConvergenceCount(0), fTestInterval(0),
 
  177     fTrainingError(0), fTestError(0), fLearningRate(0),
 
  178     fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
 
 
  184template <
typename Architecture_t>
 
  186   : fBatchSize(0), fStepCount(0), fConvergenceSteps(convergenceSteps), fConvergenceCount(0),
 
  187     fTestInterval(testInterval), fTrainingError(0), fTestError(0),
 
  188     fLearningRate(learningRate), fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
 
 
  194template<
typename Architecture_t>
 
  195template <
typename Data_t, 
typename Net_t>
 
  215                                                  net.GetOutputWidth());
 
  216   std::vector<Net_t> 
nets{};
 
  218   for (
size_t i = 0; i < 
nThreads; i++) {
 
  220       for (
size_t j = 0; 
j < 
net.GetDepth(); 
j++)
 
  224           Architecture_t::Copy(
layer.GetWeights(),
 
  226           Architecture_t::Copy(
layer.GetBiases(),
 
  232   std::vector<TBatch<Architecture_t>> 
batches{};
 
  236      for (fStepCount = 0; fStepCount < fTestInterval; fStepCount++) {
 
  246      auto inputMatrix = 
b.GetInput();
 
  247      auto outputMatrix = 
b.GetOutput();
 
  248      auto weightMatrix = 
b.GetWeights();
 
  249      fTestError = 
testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
 
  251   } 
while (!HasConverged());
 
  253   return fMinimumError;
 
 
  257template<
typename Architecture_t>
 
  258template <
typename Data_t, 
typename Net_t>
 
  279                                                  net.GetOutputWidth());
 
  281   net.InitializeGradients();
 
  282   std::vector<Net_t> 
nets{};
 
  284   for (
size_t i = 0; i < 
nThreads; i++) {
 
  286       for (
size_t j = 0; 
j < 
net.GetDepth(); 
j++)
 
  290           Architecture_t::Copy(
layer.GetWeights(),
 
  292           Architecture_t::Copy(
layer.GetBiases(),
 
  298   std::vector<TBatch<Architecture_t>> 
batches{};
 
  302      for (fStepCount = 0; fStepCount < fTestInterval; fStepCount++) {
 
  307            if (momentum != 0.0) {
 
  318         auto inputMatrix = 
b.GetInput();
 
  319         auto outputMatrix = 
b.GetOutput();
 
  320         auto weightMatrix = 
b.GetWeights();
 
  321         fTestError += 
testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
 
  324   } 
while (!HasConverged());
 
  325   return fMinimumError;
 
 
  329template <
typename Architecture_t>
 
  330template <
typename Net_t>
 
  337   for (
size_t i = 0; i < 
net.GetDepth(); i++)
 
  340      Architecture_t::ScaleAdd(
layer.GetWeights(),
 
  341                               layer.GetWeightGradients(),
 
  343      Architecture_t::ScaleAdd(
layer.GetBiases(),
 
  344                               layer.GetBiasGradients(),
 
 
  350template <
typename Architecture_t>
 
  351template <
typename Net_t>
 
  358   for (
size_t i = 0; i < 
net.GetDepth(); i++)
 
  361      Architecture_t::ScaleAdd(
layer.GetWeights(),
 
  362                               layer.GetWeightGradients(),
 
  364      Architecture_t::ScaleAdd(
layer.GetBiases(),
 
  365                               layer.GetBiasGradients(),
 
 
  372template<
typename Architecture_t>
 
  373    template <
typename Net_t>
 
  376        std::vector<Net_t> & 
nets,
 
  379   typename Architecture_t::Matrix_t dummy(0,0);
 
  383   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  387   for (
size_t i = 1; i < 
depth; i++)
 
  389      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  394   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  400   for (
size_t i = 
depth - 1; i > 0; i--)
 
  402      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  403         nets[
j].GetLayer(i).Backward(
nets[
j].GetLayer(i-1).GetActivationGradients(),
 
  405                                      nets[
j].GetRegularization(),
 
  406                                      nets[
j].GetWeightDecay());
 
  409   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  410      nets[
j].GetLayer(0).Backward(dummy,
 
  412                                   nets[
j].GetRegularization(),
 
  413                                   nets[
j].GetWeightDecay());
 
  416   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  417      for (
size_t i = 0; i < 
depth; i++)
 
  422                                  layer.GetWeightGradients(),
 
  424         Architecture_t::Copy(
layer.GetWeights(),
 
  427                                  layer.GetBiasGradients(),
 
  429         Architecture_t::Copy(
layer.GetBiases(),
 
 
  436template<
typename Architecture_t>
 
  437template <
typename Net_t>
 
  440        std::vector<Net_t> & 
nets,
 
  444   typename Architecture_t::Matrix_t dummy(0,0);
 
  448   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  452   for (
size_t i = 1; i < 
depth; i++)
 
  454      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  459   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  465   for (
size_t i = 
depth - 1; i > 0; i--)
 
  467      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  468         nets[
j].GetLayer(i).Backward(
nets[
j].GetLayer(i-1).GetActivationGradients(),
 
  470                                      nets[
j].GetRegularization(),
 
  471                                      nets[
j].GetWeightDecay());
 
  472         Architecture_t::ScaleAdd(
master.GetLayer(i).GetWeightGradients(),
 
  473                                  nets[
j].GetLayer(i).GetWeightGradients(),
 
  474                                  - fLearningRate / momentum);
 
  475         Architecture_t::ScaleAdd(
master.GetLayer(i).GetBiasGradients(),
 
  476                                  nets[
j].GetLayer(i).GetBiasGradients(),
 
  477                                  - fLearningRate / momentum);
 
  479      Architecture_t::ScaleAdd(
master.GetLayer(i).GetWeightGradients(),
 
  480                               master.GetLayer(i).GetWeightGradients(),
 
  482      Architecture_t::ScaleAdd(
master.GetLayer(i).GetBiasGradients(),
 
  483                               master.GetLayer(i).GetBiasGradients(),
 
  486   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  487      nets[
j].GetLayer(0).Backward(dummy,
 
  489                                   nets[
j].GetRegularization(),
 
  490                                   nets[
j].GetWeightDecay());
 
  491      Architecture_t::ScaleAdd(
master.GetLayer(0).GetWeightGradients(),
 
  492                               nets[
j].GetLayer(0).GetWeightGradients(),
 
  493                               - fLearningRate / momentum);
 
  494      Architecture_t::ScaleAdd(
master.GetLayer(0).GetBiasGradients(),
 
  495                               nets[
j].GetLayer(0).GetBiasGradients(),
 
  496                               - fLearningRate / momentum);
 
  499   Architecture_t::ScaleAdd(
master.GetLayer(0).GetWeightGradients(),
 
  500                            master.GetLayer(0).GetWeightGradients(),
 
  502   Architecture_t::ScaleAdd(
master.GetLayer(0).GetBiasGradients(),
 
  503                            master.GetLayer(0).GetBiasGradients(),
 
  506   for (
size_t i = 0; i < 
depth; i++)
 
  515       for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  517         Architecture_t::Copy(
layer.GetWeights(),
 
  519         Architecture_t::Copy(
layer.GetBiases(),
 
 
  526template<
typename Architecture_t>
 
  527template <
typename Net_t>
 
  530        std::vector<Net_t> & 
nets,
 
  534   typename Architecture_t::Matrix_t dummy(0,0);
 
  538   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  542   for (
size_t i = 1; i < 
depth; i++)
 
  544      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  550   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  557   for (
size_t i = 
depth - 1; i > 0; i--)
 
  559      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  560         nets[
j].GetLayer(i).Backward(
nets[
j].GetLayer(i-1).GetActivationGradients(),
 
  562                                      nets[
j].GetRegularization(),
 
  563                                      nets[
j].GetWeightDecay());
 
  567   for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  568      nets[
j].GetLayer(0).Backward(dummy,
 
  570                                   nets[
j].GetRegularization(),
 
  571                                   nets[
j].GetWeightDecay());
 
  574   for (
size_t i = 0; i < 
depth; i++)
 
  577      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  579         Architecture_t::Copy(
layer.GetWeights(),
 
  581         Architecture_t::Copy(
layer.GetBiases(),
 
  583         Architecture_t::ScaleAdd(
layer.GetWeights(),
 
  586         Architecture_t::ScaleAdd(
layer.GetBiases(),
 
  590      for (
size_t j = 0; 
j < 
nets.size(); 
j++) {
 
  592         Architecture_t::ScaleAdd(
masterLayer.GetWeightGradients(),
 
  593                                  layer.GetWeightGradients(),
 
  594                                  - fLearningRate / momentum);
 
  595         Architecture_t::ScaleAdd(
masterLayer.GetBiasGradients(),
 
  596                                  layer.GetBiasGradients(),
 
  597                                  - fLearningRate / momentum);
 
  599      Architecture_t::ScaleAdd(
masterLayer.GetWeightGradients(),
 
  602      Architecture_t::ScaleAdd(
masterLayer.GetBiasGradients(),
 
 
  615template<
typename Architecture_t>
 
  616template <
typename Net_t>
 
  625   for (
size_t i = 0; i < 
net.GetDepth(); i++)
 
  628      Architecture_t::ScaleAdd(
layer.GetWeights(),
 
  629                               layer.GetWeightGradients(),
 
  632         Architecture_t::ScaleAdd(
layer.GetBiases(),
 
  633                                  layer.GetBiasGradients(),
 
 
  640template <
typename Architecture_t>
 
  641template <
typename Net_t>
 
  647   fTrainingError = 
loss;
 
  650   for (
size_t i = 0; i < 
net.GetDepth(); i++)
 
  653      Architecture_t::ScaleAdd(
layer.GetWeights(),
 
  654                               layer.GetWeightGradients(),
 
  657         Architecture_t::ScaleAdd(
layer.GetBiases(),
 
  658                                  layer.GetBiasGradients(),
 
 
  666template<
typename Architecture_t>
 
  669   if (fTestError < fMinimumError * 0.999) {
 
  670      fConvergenceCount = 0;
 
  671      fMinimumError     = fTestError;
 
  676   return (fConvergenceCount >= fConvergenceSteps);
 
 
  680template<
typename Architecture_t>
 
  684   if (fTestError < fMinimumError * 0.999) {
 
  685      fConvergenceCount = 0;
 
  686      fMinimumError     = fTestError;
 
  688      fConvergenceCount += fTestInterval;
 
  690   return (fConvergenceCount >= fConvergenceSteps);
 
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
 
TObject * GetOutput(const char *name)
 
static void Reset(const char *url, Bool_t hard=kFALSE)
 
const_iterator begin() const
 
size_t fConvergenceCount
Current number of training epochs without.
 
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
 
Scalar_t fTrainingError
Holds the most recently computed training loss.
 
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
 
size_t fConvergenceSteps
Number of training epochs without considerable.
 
Scalar_t StepReducedWeightsLoss(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Similar to StepReducedWeights(...) but also evaluates the loss.
 
Scalar_t StepLoss(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Same as Step(...) but also evaluate the loss on the given training data.
 
void Reset()
Reset minimizer object to default state.
 
Scalar_t GetTrainingError() const
 
size_t fTestInterval
Interval for the computation of the test error.
 
void StepNesterov(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses Nesterov momentum.
 
void SetBatchSize(Scalar_t rate)
 
Scalar_t fTestError
Holds the most recently computed test loss.
 
typename Architecture_t::Matrix_t Matrix_t
 
size_t GetTestInterval() const
 
size_t fStepCount
Number of steps performed in the current training session.
 
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
 
void SetConvergenceSteps(size_t steps)
 
Scalar_t TrainMomentum(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, Scalar_t momentum, size_t nThreads=1)
Same as Train(...) but uses the given momentum.
 
size_t GetConvergenceCount() const
 
Scalar_t fMinimumError
The minimum loss achieved on the training set during the current training session.
 
void SetLearningRate(Scalar_t rate)
 
Scalar_t fLearningRate
Learning rate .
 
size_t fBatchSize
Batch size to use for the training.
 
size_t GetConvergenceSteps() const
 
Scalar_t Train(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, size_t nThreads=1)
Train the given net using the given training input data (events), training output data (labels),...
 
void SetTestInterval(size_t interval)
 
void StepReducedWeights(Net_t &net, Matrix_t &input, const Matrix_t &output)
Does not evaluate the loss and therefore not trigger a possible synchronization with the device.
 
Scalar_t GetTestError() const
 
typename Architecture_t::Scalar_t Scalar_t
 
create variable transformations