doc/v616/NeuralNet_8icc_source.html

#ifndef TMVA_NEURAL_NET_I

#define TMVA_NEURAL_NET_I


#ifndef TMVA_NEURAL_NET

#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."

#endif // TMVA_NEURAL_NET

#pragma once

#pragma GCC diagnostic ignored "-Wunused-variable"


#include "Math/Util.h"


#include "TMVA/Pattern.h"

#include "TMVA/MethodBase.h"


#include <tuple>

#include <future>

#include <random>


namespace TMVA

{

    namespace DNN

    {


        template <typename T>

            T uniformFromTo (T from, T to)

        {

            return from + (rand ()* (to - from)/RAND_MAX);

        }


        template <typename Container, typename T>

            void uniformDouble (Container& container, T maxValue)

        {

            for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)

            {

//        (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);

                (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);

            }

        }


        extern std::shared_ptr<std::function<double(double)>> ZeroFnc;


        extern std::shared_ptr<std::function<double(double)>> Sigmoid;

        extern std::shared_ptr<std::function<double(double)>> InvSigmoid;


        extern std::shared_ptr<std::function<double(double)>> Tanh;

        extern std::shared_ptr<std::function<double(double)>> InvTanh;


        extern std::shared_ptr<std::function<double(double)>> Linear;

        extern std::shared_ptr<std::function<double(double)>> InvLinear;


        extern std::shared_ptr<std::function<double(double)>> SymmReLU;

        extern std::shared_ptr<std::function<double(double)>> InvSymmReLU;


        extern std::shared_ptr<std::function<double(double)>> ReLU;

        extern std::shared_ptr<std::function<double(double)>> InvReLU;


        extern std::shared_ptr<std::function<double(double)>> SoftPlus;

        extern std::shared_ptr<std::function<double(double)>> InvSoftPlus;


        extern std::shared_ptr<std::function<double(double)>> TanhShift;

        extern std::shared_ptr<std::function<double(double)>> InvTanhShift;


        extern std::shared_ptr<std::function<double(double)>> SoftSign;

        extern std::shared_ptr<std::function<double(double)>> InvSoftSign;


        extern std::shared_ptr<std::function<double(double)>> Gauss;

        extern std::shared_ptr<std::function<double(double)>> InvGauss;


        extern std::shared_ptr<std::function<double(double)>> GaussComplement;

        extern std::shared_ptr<std::function<double(double)>> InvGaussComplement;


/*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"

 *

 * itDrop correlates with itSourceBegin

 */

template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>

            void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,

                               ItWeight itWeight,

                               ItTarget itTargetBegin, ItTarget itTargetEnd,

                               ItDrop itDrop)

        {

            for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)

            {

                for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)

                {

            if (!HasDropOut || *itDrop)

                        (*itTarget) += (*itSource) * (*itWeight);

                    ++itWeight;

                }

        if (HasDropOut) ++itDrop;

            }

        }


/*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"

 *

 * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)

 */

template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>

            void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,

                                        ItWeight itWeight,

                            ItPrev itPrevBegin, ItPrev itPrevEnd,

                            ItDrop itDrop)

        {

            for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)

            {

                for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)

                {

                   if (!HasDropOut || *itDrop)

                      (*itPrev) += (*itCurr) * (*itWeight);

                    ++itWeight;

                }

        if (HasDropOut) ++itDrop;

            }

        }


/*! \brief apply the activation functions

 *

 *

 */


        template <typename ItValue, typename Fnc>

            void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)

        {

            while (itValue != itValueEnd)

            {

                auto& value = (*itValue);

                value = (*fnc.get ()) (value);


                ++itValue;

            }

        }


/*! \brief apply the activation functions and compute the gradient

 *

 *

 */

        template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>

            void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)

        {

            while (itValue != itValueEnd)

            {

                auto& value = (*itValue);

                value = (*fnc.get ()) (value);

                (*itGradient) = (*invFnc.get ()) (value);


                ++itValue; ++itGradient;

            }

        }


/*! \brief update the gradients

 *

 *

 */

        template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>

            void update (ItSource itSource, ItSource itSourceEnd,

                         ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,

                         ItTargetGradient itTargetGradientBegin,

                         ItGradient itGradient)

        {

            while (itSource != itSourceEnd)

            {

                auto itTargetDelta = itTargetDeltaBegin;

                auto itTargetGradient = itTargetGradientBegin;

                while (itTargetDelta != itTargetDeltaEnd)

                {

            (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);

                    ++itTargetDelta; ++itTargetGradient; ++itGradient;

                }

                ++itSource;

            }

        }


/*! \brief compute the regularization (L1, L2)

 *

 *

 */

        template <EnumRegularization Regularization>

            inline double computeRegularization (double weight, const double& factorWeightDecay)

        {

           MATH_UNUSED(weight);

           MATH_UNUSED(factorWeightDecay);


            return 0;

        }


// L1 regularization

        template <>

            inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)

        {

            return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);

        }


// L2 regularization

        template <>

            inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)

        {

            return factorWeightDecay * weight;

        }


/*! \brief update the gradients, using regularization

 *

 *

 */

        template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>

            void update (ItSource itSource, ItSource itSourceEnd,

                         ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,

                         ItTargetGradient itTargetGradientBegin,

                         ItGradient itGradient,

                         ItWeight itWeight, double weightDecay)

        {

            // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights

            while (itSource != itSourceEnd)

            {

                auto itTargetDelta = itTargetDeltaBegin;

                auto itTargetGradient = itTargetGradientBegin;

                while (itTargetDelta != itTargetDeltaEnd)

                {

                    (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);

                    ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;

                }

                ++itSource;

            }

        }


#define USELOCALWEIGHTS 1


/*! \brief implementation of the steepest gradient descent algorithm

 *

 * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle

 */

        template <typename Function, typename Weights, typename PassThrough>

            double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)

        {

            size_t numWeights = weights.size ();

            // std::vector<double> gradients (numWeights, 0.0);

            m_localGradients.assign (numWeights, 0.0);

            // std::vector<double> localWeights (begin (weights), end (weights));

            // m_localWeights.reserve (numWeights);

            m_localWeights.assign (begin (weights), end (weights));


            double E = 1e10;

            if (m_prevGradients.size () != numWeights)

            {

                m_prevGradients.clear ();

                m_prevGradients.assign (weights.size (), 0);

            }


            bool success = true;

            size_t currentRepetition = 0;

            while (success)

            {

                if (currentRepetition >= m_repetitions)

                    break;


                m_localGradients.assign (numWeights, 0.0);


                // --- nesterov momentum ---

                // apply momentum before computing the new gradient

                auto itPrevG = begin (m_prevGradients);

                auto itPrevGEnd = end (m_prevGradients);

                auto itLocWeight = begin (m_localWeights);

                for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)

                {

                    (*itPrevG) *= m_beta;

                    (*itLocWeight) += (*itPrevG);

                }


                E = fitnessFunction (passThrough, m_localWeights, m_localGradients);

//            plotGradients (gradients);

//            plotWeights (localWeights);


                double alpha = gaussDouble (m_alpha, m_alpha/2.0);

//                double alpha = m_alpha;


                auto itG = begin (m_localGradients);

                auto itGEnd = end (m_localGradients);

                itPrevG = begin (m_prevGradients);

                double maxGrad = 0.0;

                for (; itG != itGEnd; ++itG, ++itPrevG)

                {

                    double currGrad = (*itG);

                    double prevGrad = (*itPrevG);

                    currGrad *= alpha;


                    //(*itPrevG) = m_beta * (prevGrad + currGrad);

                    currGrad += prevGrad;

                    (*itG) = currGrad;

                    (*itPrevG) = currGrad;


                    if (std::fabs (currGrad) > maxGrad)

                        maxGrad = currGrad;

                }


                if (maxGrad > 1)

                {

                    m_alpha /= 2;

                    std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;

                    std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)

                                   {

                                       w /= maxGrad;

                                   });

                    m_prevGradients.clear ();

                }

                else

                {

                    auto itW = std::begin (weights);

                    std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)

                                   {

                                       *itW += g;

                                       ++itW;

                                   });

                }


                ++currentRepetition;

            }

            return E;

        }


/*! \brief sum of squares error function

 *

 *

 */

        template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>

            double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)

        {

            double errorSum = 0.0;


            // output - truth

            ItTruth itTruth = itTruthBegin;

            bool hasDeltas = (itDelta != itDeltaEnd);

            for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)

            {

// assert (itTruth != itTruthEnd);

                double output = (*itOutput);

                double error = output - (*itTruth);

                if (hasDeltas)

                {

                    (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;

                    ++itDelta;

                }

                errorSum += error*error  * patternWeight;

            }


            return 0.5*errorSum;

        }


/*! \brief cross entropy error function

 *

 *

 */

        template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>

            double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)

        {

            bool hasDeltas = (itDelta != itDeltaEnd);


            double errorSum = 0.0;

            for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)

            {

                double probability = *itProbability;

                double truth = *itTruthBegin;

                /* truth = truth < 0.1 ? 0.1 : truth; */

                /* truth = truth > 0.9 ? 0.9 : truth; */

                truth = truth < 0.5 ? 0.1 : 0.9;

                if (hasDeltas)

                {

                    double delta = probability - truth;

                    (*itDelta) = delta*patternWeight;

//     (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;

                    ++itDelta;

                }

                double error (0);

                if (probability == 0) // protection against log (0)

                {

                    if (truth >= 0.5)

                        error += 1.0;

                }

                else if (probability == 1)

                {

                    if (truth < 0.5)

                        error += 1.0;

                }

                else

                    error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function

                errorSum += error * patternWeight;


            }

            return errorSum;

        }


/*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)

 *

 *

 */

        template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>

            double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)

        {

            double errorSum = 0.0;


            bool hasDeltas = (itDelta != itDeltaEnd);

            // output - truth

            ItTruth itTruth = itTruthBegin;

            for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)

            {

// assert (itTruth != itTruthEnd);

                double probability = (*itProbability);

                double truth = (*itTruth);

                if (hasDeltas)

                {

                    (*itDelta) = probability - truth;

//     (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;

                    ++itDelta; //++itInvActFnc;

                }

                double error (0);


                error += truth * log (probability);

                errorSum += error;

            }


            return -errorSum * patternWeight;

        }


/*! \brief compute the weight decay for regularization (L1 or L2)

 *

 *

 */

        template <typename ItWeight>

            double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)

        {

            if (eRegularization == EnumRegularization::L1)

            {

                // weight decay (regularization)

                double w = 0;

                size_t n = 0;

                for (; itWeight != itWeightEnd; ++itWeight, ++n)

                {

                    double weight = (*itWeight);

                    w += std::fabs (weight);

                }

                return error + 0.5 * w * factorWeightDecay / n;

            }

            else if (eRegularization == EnumRegularization::L2)

            {

                // weight decay (regularization)

                double w = 0;

                size_t n = 0;

                for (; itWeight != itWeightEnd; ++itWeight, ++n)

                {

                    double weight = (*itWeight);

                    w += weight*weight;

                }

                return error + 0.5 * w * factorWeightDecay / n;

            }

            else

                return error;

        }


/*! \brief apply the weights (and functions) in forward direction of the DNN

 *

 *

 */

        template <typename LAYERDATA>

            void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)

        {

            if (prevLayerData.hasDropOut ())

            {

        applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),

                              currLayerData.weightsBegin (),

                              currLayerData.valuesBegin (), currLayerData.valuesEnd (),

                              prevLayerData.dropOut ());

            }

            else

            {

        bool dummy = true;

        applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),

                              currLayerData.weightsBegin (),

                             currLayerData.valuesBegin (), currLayerData.valuesEnd (),

                             &dummy); // dummy to turn on all nodes (no drop out)

            }

        }


/*! \brief backward application of the weights (back-propagation of the error)

 *

 *

 */

template <typename LAYERDATA>

    void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)

{

    if (prevLayerData.hasDropOut ())

    {

        applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),

                                     currLayerData.weightsBegin (),

                                     prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),

                                     prevLayerData.dropOut ());

    }

    else

    {

        bool dummy = true;

        applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),

                                      currLayerData.weightsBegin (),

                                      prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),

                                      &dummy); // dummy to use all nodes (no drop out)

    }

}


/*! \brief update the node values

 *

 *

 */

        template <typename LAYERDATA>

            void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)

        {

            // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights

            if (factorWeightDecay != 0.0) // has weight regularization

                if (regularization == EnumRegularization::L1)  // L1 regularization ( sum(|w|) )

                {

                    update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),

                                                    currLayerData.deltasBegin (), currLayerData.deltasEnd (),

                                                    currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),

                                                    currLayerData.weightsBegin (), factorWeightDecay);

                }

                else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )

                {

                    update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),

                                                    currLayerData.deltasBegin (), currLayerData.deltasEnd (),

                                                    currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),

                                                    currLayerData.weightsBegin (), factorWeightDecay);

                }

                else

                {

                    update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),

                            currLayerData.deltasBegin (), currLayerData.deltasEnd (),

                            currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());

                }


            else

            { // no weight regularization

                update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),

                        currLayerData.deltasBegin (), currLayerData.deltasEnd (),

                        currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());

            }

        }


/*! \brief compute the drop-out-weight factor

 *

 * when using drop-out a fraction of the nodes is turned off at each cycle of the computation

 * once all nodes are turned on again (for instances when the test samples are evaluated),

 * the weights have to be adjusted to account for the different number of active nodes

 * this function computes the factor and applies it to the weights

 */

        template <typename WeightsType, typename DropProbabilities>

            void Net::dropOutWeightFactor (WeightsType& weights,

                                           const DropProbabilities& drops,

                                           bool inverse)

        {

            if (drops.empty () || weights.empty ())

                return;


            auto itWeight = std::begin (weights);

            auto itWeightEnd = std::end (weights);

            auto itDrop = std::begin (drops);

            auto itDropEnd = std::end (drops);

            size_t numNodesPrev = inputSize ();

            double dropFractionPrev = *itDrop;

            ++itDrop;


            for (auto& layer : layers ())

            {

                if (itDrop == itDropEnd)

                    break;


                size_t _numNodes = layer.numNodes ();


                double dropFraction = *itDrop;

                double pPrev = 1.0 - dropFractionPrev;

                double p = 1.0 - dropFraction;

                p *= pPrev;


                if (inverse)

                {

                    p = 1.0/p;

                }

                size_t _numWeights = layer.numWeights (numNodesPrev);

                for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)

                {

                    if (itWeight == itWeightEnd)

                        break;


                    *itWeight *= p;

                    ++itWeight;

                }

                numNodesPrev = _numNodes;

                dropFractionPrev = dropFraction;

                ++itDrop;

            }

        }


/*! \brief execute the training until convergence emerges

 *

 * \param weights the container with the weights (synapses)

 * \param trainPattern the pattern for the training

 * \param testPattern the pattern for the testing

 * \param minimizer the minimizer (e.g. steepest gradient descent) to be used

 * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)

 */

        template <typename Minimizer>

            double Net::train (std::vector<double>& weights,

                               std::vector<Pattern>& trainPattern,

                               const std::vector<Pattern>& testPattern,

                           Minimizer& minimizer,

                           Settings& settings)

        {

//        std::cout << "START TRAINING" << std::endl;

            settings.startTrainCycle ();


            // JsMVA progress bar maximum (100%)

            if (fIPyMaxIter) *fIPyMaxIter = 100;


            settings.pads (4);

            settings.create ("trainErrors", 100, 0, 100, 100, 0,1);

            settings.create ("testErrors", 100, 0, 100, 100, 0,1);


            size_t cycleCount = 0;

            size_t testCycleCount = 0;

            double testError = 1e20;

            double trainError = 1e20;

            size_t dropOutChangeCount = 0;


            DropContainer dropContainer;

            DropContainer dropContainerTest;

            const std::vector<double>& dropFractions = settings.dropFractions ();

            bool isWeightsForDrop = false;


            // until convergence

            do

            {

                ++cycleCount;


                // if dropOut enabled

                size_t dropIndex = 0;

                if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)

                {

                    // fill the dropOut-container

                    dropContainer.clear ();

                    size_t _numNodes = inputSize ();

                    double dropFraction = 0.0;

                    dropFraction = dropFractions.at (dropIndex);

                    ++dropIndex;

                    fillDropContainer (dropContainer, dropFraction, _numNodes);

                    for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)

                    {

                        auto& layer = *itLayer;

                        _numNodes = layer.numNodes ();

                        // how many nodes have to be dropped

                        dropFraction = 0.0;

                        if (dropFractions.size () > dropIndex)

                            dropFraction = dropFractions.at (dropIndex);


                        fillDropContainer (dropContainer, dropFraction, _numNodes);

                    }

                    isWeightsForDrop = true;

                }


                // execute training cycle

                trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);


       // ------ check if we have to execute a test ------------------

                bool hasConverged = false;

            if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition

                {

                    if (isWeightsForDrop)

                    {

                        dropOutWeightFactor (weights, dropFractions);

                        isWeightsForDrop = false;

                    }


                    testError = 0;

                    //double weightSum = 0;

                    settings.startTestCycle ();

                    if (settings.useMultithreading ())

                    {

                        size_t numThreads = std::thread::hardware_concurrency ();

                        size_t patternPerThread = testPattern.size () / numThreads;

                        std::vector<Batch> batches;

                        auto itPat = testPattern.begin ();

                        // auto itPatEnd = testPattern.end ();

                        for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)

                        {

                            batches.push_back (Batch (itPat, itPat + patternPerThread));

                            itPat += patternPerThread;

                        }

                        if (itPat != testPattern.end ())

                            batches.push_back (Batch (itPat, testPattern.end ()));


                        std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;

                        for (auto& batch : batches)

                        {

                            // -------------------- execute each of the batch ranges on a different thread -------------------------------

                            futures.push_back (

                                std::async (std::launch::async, [&]()

                                            {

                                                std::vector<double> localOutput;

                                                pass_through_type passThrough (settings, batch, dropContainerTest);

                                                double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);

                                                return std::make_tuple (testBatchError, localOutput);

                                            })

                                );

                        }


                        auto itBatch = batches.begin  ();

                        for (auto& f : futures)

                        {

                            std::tuple<double,std::vector<double>> result = f.get ();

                            testError += std::get<0>(result) / batches.size ();

                            std::vector<double> output = std::get<1>(result);

                            if (output.size() == (outputSize() - 1) * itBatch->size())

                            {

                                auto output_iterator = output.begin();

                                for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)

                                {

                                    for (size_t output_index = 1; output_index < outputSize(); ++output_index)

                                    {

                                        settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),

                                                                                  (*pattern_it).weight ());

                                        ++output_iterator;

                                    }

                                }

                            }

                        ++itBatch;

                        }


                    }

                    else

                    {

                        std::vector<double> output;

                    //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)

                        {

                        //const Pattern& p = (*it);

                        //double weight = p.weight ();

                        //Batch batch (it, it+1);

                        Batch batch (begin (testPattern), end (testPattern));

                            output.clear ();

                        pass_through_type passThrough (settings, batch, dropContainerTest);

                            double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);

                        if (output.size() == (outputSize() - 1) * batch.size())

                        {

                            auto output_iterator = output.begin();

                            for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)

                            {

                                for (size_t output_index = 1; output_index < outputSize(); ++output_index)

                                {

                                    settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),

                                                                              (*pattern_it).weight ());

                                    ++output_iterator;

                                }

                            }

                        }

                        testError += testPatternError; /// batch.size ();

                        }

                    // testError /= testPattern.size ();

                    }

                    settings.endTestCycle ();

//                    testError /= weightSum;


                    settings.computeResult (*this, weights);


                    hasConverged = settings.hasConverged (testError);

                    if (!hasConverged && !isWeightsForDrop)

                    {

                        dropOutWeightFactor (weights, dropFractions, true); // inverse

                        isWeightsForDrop = true;

                    }

                }

                ++testCycleCount;

                ++dropOutChangeCount;


                static double x = -1.0;

                x += 1.0;

//            settings.resetPlot ("errors");

                settings.addPoint ("trainErrors", cycleCount, trainError);

                settings.addPoint ("testErrors", cycleCount, testError);

                settings.plot ("trainErrors", "C", 1, kBlue);

                settings.plot ("testErrors", "C", 1, kMagenta);


                // setup error plots and progress bar variables for JsMVA

                if (fInteractive){

                  fInteractive->AddPoint(cycleCount, trainError, testError);

                  if (*fExitFromTraining) break;

                  *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();

                }


                if (hasConverged)

                    break;


                if ((int)cycleCount % 10 == 0) {


                   TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",

                                            trainError,

                                            testError,

                                            (int)cycleCount,

                                            (int)settings.convergenceCount (),

                                            (int)settings.maxConvergenceCount ());

                   double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();

                   settings.cycle (progress, convText);

                }

            }

            while (true);

            settings.endTrainCycle (trainError);


            TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);

            double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();

            settings.cycle (progress, convText);


            return testError;

        }


/*! \brief execute a single training cycle

 *

 * uses multithreading if turned on

 *

 * \param minimizer the minimizer to be used (e.g. SGD)

 * \param weights the weight container with all the synapse weights

 * \param itPatternBegin begin of the pattern container

 * \parama itPatternEnd the end of the pattern container

 * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)

 * \param dropContainer the data for dropping-out nodes (regularization technique)

 */

        template <typename Iterator, typename Minimizer>

            inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,

                                           Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)

        {

            double error = 0.0;

            size_t numPattern = std::distance (itPatternBegin, itPatternEnd);

            size_t numBatches = numPattern/settings.batchSize ();

            size_t numBatches_stored = numBatches;


            std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});

            Iterator itPatternBatchBegin = itPatternBegin;

            Iterator itPatternBatchEnd = itPatternBatchBegin;


            // create batches

            std::vector<Batch> batches;

            while (numBatches > 0)

            {

                std::advance (itPatternBatchEnd, settings.batchSize ());

                batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));

                itPatternBatchBegin = itPatternBatchEnd;

                --numBatches;

            }


            // add the last pattern to the last batch

            if (itPatternBatchEnd != itPatternEnd)

                batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));


            ///< turn on multithreading if requested

            if (settings.useMultithreading ())

            {

                // -------------------- divide the batches into bunches for each thread --------------

                size_t numThreads = std::thread::hardware_concurrency ();

                size_t batchesPerThread = batches.size () / numThreads;

                typedef std::vector<Batch>::iterator batch_iterator;

                std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;

                batch_iterator itBatchBegin = std::begin (batches);

                batch_iterator itBatchCurrEnd = std::begin (batches);

                batch_iterator itBatchEnd = std::end (batches);

                for (size_t iT = 0; iT < numThreads; ++iT)

                {

                    if (iT == numThreads-1)

                        itBatchCurrEnd = itBatchEnd;

                    else

                        std::advance (itBatchCurrEnd, batchesPerThread);

                    batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));

                    itBatchBegin = itBatchCurrEnd;

                }


                // -------------------- loop  over batches -------------------------------------------

                std::vector<std::future<double>> futures;

                for (auto& batchRange : batchVec)

                {

                    // -------------------- execute each of the batch ranges on a different thread -------------------------------

                    futures.push_back (

                        std::async (std::launch::async, [&]()

                                    {

                                        double localError = 0.0;

                                        for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)

                                        {

                                            Batch& batch = *it;

                                            pass_through_type settingsAndBatch (settings, batch, dropContainer);

                                            Minimizer minimizerClone (minimizer);

                                            localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer

                                        }

                                        return localError;

                                    })

                        );

                }


                for (auto& f : futures)

                    error += f.get ();

            }

            else

            {

                for (auto& batch : batches)

                {

                    std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);

                    error += minimizer ((*this), weights, settingsAndBatch);

                }

            }


            numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error

            error /= numBatches_stored;

            settings.testIteration ();


            return error;

        }


/*! \brief compute the neural net

 *

 * \param input the input data

 * \param weights the weight data

 */

        template <typename Weights>

            std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const

        {

            std::vector<LayerData> layerData;

            layerData.reserve (m_layers.size ()+1);

            auto itWeight = begin (weights);

            auto itInputBegin = begin (input);

            auto itInputEnd = end (input);

            layerData.push_back (LayerData (itInputBegin, itInputEnd));

            size_t numNodesPrev = input.size ();


        // -------------------- prepare layer data with one pattern -------------------------------

            for (auto& layer: m_layers)

            {

                layerData.push_back (LayerData (layer.numNodes (), itWeight,

                                                layer.activationFunction (),

                                                layer.modeOutputValues ()));

                size_t _numWeights = layer.numWeights (numNodesPrev);

                itWeight += _numWeights;

                numNodesPrev = layer.numNodes ();

            }


            // --------- forward -------------

        forwardPattern (m_layers, layerData);


            // ------------- fetch output ------------------

                std::vector<double> output;

        fetchOutput (layerData.back (), output);

            return output;

        }


        template <typename Weights, typename PassThrough>

            double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const

        {

            std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward

            assert (numWeights () == weights.size ());

   double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);

            return error;

        }


        template <typename Weights, typename PassThrough, typename OutContainer>

            double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const

        {

            std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward

            assert (numWeights () == weights.size ());

   double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);

            return error;

        }


        template <typename Weights, typename Gradients, typename PassThrough>

        double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const

        {

            std::vector<double> nothing;

            assert (numWeights () == weights.size ());

            assert (weights.size () == gradients.size ());

   double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);

            return error;

        }


        template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>

        double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const

        {

            MATH_UNUSED(eFetch);

            assert (numWeights () == weights.size ());

            assert (weights.size () == gradients.size ());

   double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);

            return error;

        }


    template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>

        std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,

                                                                   Batch& batch,

                                                                   const DropContainer& dropContainer,

                                                                   ItWeight itWeightBegin,

                                                                   ItWeight /*itWeightEnd*/,

                                                                   ItGradient itGradientBegin,

                                                                   ItGradient itGradientEnd,

                                                                   size_t& totalNumWeights) const

    {

        LayerData::const_dropout_iterator itDropOut;

        bool usesDropOut = !dropContainer.empty ();

        if (usesDropOut)

            itDropOut = std::begin (dropContainer);


   if (_layers.empty ())

       throw std::string ("no layers in this net");


        // ----------- create layer data -------------------------------------------------------

        //LM- This assert not needed anymore (outputsize is actually numNodes+1)

        //assert (_layers.back ().numNodes () == outputSize ());

        totalNumWeights = 0;

        size_t totalNumNodes = 0;

        std::vector<std::vector<LayerData>> layerPatternData;

        layerPatternData.reserve (_layers.size ()+1);

        ItWeight itWeight = itWeightBegin;

        ItGradient itGradient = itGradientBegin;

        size_t numNodesPrev = inputSize ();

        typename Pattern::const_iterator itInputBegin;

        typename Pattern::const_iterator itInputEnd;


        // ItWeight itGammaBegin = itWeightBegin + numWeights ();

        // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();

        // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();

        // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();


        // --------------------- prepare layer data for input layer ----------------------------

        layerPatternData.push_back (std::vector<LayerData>());

   for (const Pattern& _pattern : batch)

        {

            std::vector<LayerData>& layerData = layerPatternData.back ();

            layerData.push_back (LayerData (numNodesPrev));


            itInputBegin = _pattern.beginInput ();

            itInputEnd = _pattern.endInput ();

            layerData.back ().setInput (itInputBegin, itInputEnd);


            if (usesDropOut)

                layerData.back ().setDropOut (itDropOut);


        }


        if (usesDropOut)

            itDropOut += _layers.back ().numNodes ();


        // ---------------- prepare subsequent layers ---------------------------------------------

        // for each of the layers

        for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)

        {

            bool isOutputLayer = (itLayer+1 == itLayerEnd);

            bool isFirstHiddenLayer = (itLayer == begin (_layers));


            auto& layer = *itLayer;

            layerPatternData.push_back (std::vector<LayerData>());

            // for each pattern, prepare a layerData

            for (const Pattern& _pattern : batch)

            {

                std::vector<LayerData>& layerData = layerPatternData.back ();

                //layerData.push_back (LayerData (numNodesPrev));


                if (itGradientBegin == itGradientEnd)

                {

                    layerData.push_back (LayerData (layer.numNodes (), itWeight,

                                                    layer.activationFunction (),

                                                    layer.modeOutputValues ()));

                }

                else

                {

                    layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,

                                                    layer.activationFunction (),

                                                    layer.inverseActivationFunction (),

                                                    layer.modeOutputValues ()));

                }


                if (usesDropOut)

                {

                    layerData.back ().setDropOut (itDropOut);

                }


            }


            if (usesDropOut)

            {

                itDropOut += layer.numNodes ();

            }

            size_t _numWeights = layer.numWeights (numNodesPrev);

            totalNumWeights += _numWeights;

            itWeight += _numWeights;

            itGradient += _numWeights;

            numNodesPrev = layer.numNodes ();

            totalNumNodes += numNodesPrev;


        }

   assert (totalNumWeights > 0);

        return layerPatternData;

}


    template <typename LayerContainer>

        void Net::forwardPattern (const LayerContainer& _layers,

                                  std::vector<LayerData>& layerData) const

    {

   size_t idxLayer = 0, idxLayerEnd = _layers.size ();

        size_t cumulativeNodeCount = 0;

   for (; idxLayer < idxLayerEnd; ++idxLayer)

   {

       LayerData& prevLayerData = layerData.at (idxLayer);

       LayerData& currLayerData = layerData.at (idxLayer+1);


       forward (prevLayerData, currLayerData);


            applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());

   }

    }


    template <typename LayerContainer, typename LayerPatternContainer>

        void Net::forwardBatch (const LayerContainer& _layers,

                                LayerPatternContainer& layerPatternData,

                                std::vector<double>& valuesMean,

                                std::vector<double>& valuesStdDev,

                                size_t trainFromLayer) const

    {

        valuesMean.clear ();

        valuesStdDev.clear ();


        // ---------------------------------- loop over layers and pattern -------------------------------------------------------

        size_t cumulativeNodeCount = 0;

   for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)

   {

       bool doTraining = idxLayer >= trainFromLayer;


            // get layer-pattern data for this and the corresponding one from the next layer

            std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);

            std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);


            size_t numPattern = prevLayerPatternData.size ();

            size_t numNodesLayer = _layers.at (idxLayer).numNodes ();


            std::vector<MeanVariance> means (numNodesLayer);

            // ---------------- loop over layerDatas of pattern compute forward ----------------------------

            for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)

            {

      const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);

      LayerData& currLayerData = currLayerPatternData.at (idxPattern);


                forward (prevLayerData, currLayerData); // feed forward

            }


            // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------

            for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)

            {

      //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);

      LayerData& currLayerData = currLayerPatternData.at (idxPattern);


      if (doTraining)

                    applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),

                                    currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());

      else

                    applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());

            }


            // accumulate node count

            cumulativeNodeCount += numNodesLayer;

        }

}


    template <typename OutputContainer>

        void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const

    {

        ModeOutputValues eModeOutput = lastLayerData.outputMode ();

        if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))

        {

            outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());

        }

        else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||

                 isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))

        {

            const auto& prob = lastLayerData.probabilities ();

            outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;

        }

        else

            assert (false);

    }


    template <typename OutputContainer>

        void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const

    {

        for (const LayerData& lastLayerData : lastLayerPatternData)

            fetchOutput (lastLayerData, outputContainer);

    }


    template <typename ItWeight>

        std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,

                                                                               std::vector<LayerData>& lastLayerData,

                                                                               Batch& batch,

                                                                               ItWeight itWeightBegin,

                                                                               ItWeight itWeightEnd) const

    {

        typename std::vector<LayerData>::iterator itLayerData    = lastLayerData.begin ();

//        typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();


        typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();

        typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();


        double sumWeights (0.0);

        double sumError (0.0);


        size_t idxPattern = 0;

// FIXME: check that iteration doesn't go beyond itLayerDataEnd!

        for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)

        {

            ++idxPattern;


            // compute E and the deltas of the computed output and the true output

            LayerData& layerData = (*itLayerData);

            const Pattern& _pattern = (*itPattern);

            double error = errorFunction (layerData, _pattern.output (),

                                          itWeightBegin, itWeightEnd,

                                          _pattern.weight (), settings.factorWeightDecay (),

                                          settings.regularization ());

            sumWeights += fabs (_pattern.weight ());

            sumError += error;

        }

        return std::make_tuple (sumError, sumWeights);

    }


    template <typename Settings>

        void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,

                                 const Settings& settings,

                                 size_t trainFromLayer,

                                 size_t totalNumWeights) const

    {

        bool doTraining = layerPatternData.size () > trainFromLayer;

        if (doTraining) // training

        {

            // ------------- backpropagation -------------

            size_t idxLayer = layerPatternData.size ();

            for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();

                 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)

            {

                --idxLayer;

                if (idxLayer <= trainFromLayer) // no training

                    break;


                std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);

                std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);


                size_t idxPattern = 0;

// FIXME: check that itPrevLayerData doesn't go beyond itPrevLayerDataEnd!

                for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),

                     itPrevLayerData = begin (prevLayerDataColl) /*, itPrevLayerDataEnd = end (prevLayerDataColl)*/;

                     itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)

                {

                    LayerData& currLayerData = (*itCurrLayerData);

                    LayerData& prevLayerData = *(itPrevLayerData);


                    backward (prevLayerData, currLayerData);


                    // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)

                    // because L1 and L2 regularization

                    //

                    //  http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization

                    //

                    // L1 : -factorWeightDecay*sgn(w)/numWeights

                    // L2 : -factorWeightDecay/numWeights

                    update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());

                }

            }

        }

    }


/*! \brief forward propagation and backward propagation

 *

 *

 */

        template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>

            double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,

                                      ItWeight itWeightBegin, ItWeight itWeightEnd,

                                          ItGradient itGradientBegin, ItGradient itGradientEnd,

                                          size_t trainFromLayer,

                                      OutContainer& outputContainer, bool doFetchOutput) const

        {

            Settings& settings = std::get<0>(settingsAndBatch);

            Batch& batch = std::get<1>(settingsAndBatch);

            DropContainer& dropContainer = std::get<2>(settingsAndBatch);


            double sumError = 0.0;

            double sumWeights = 0.0;   // -------------


        // ----------------------------- prepare layer data -------------------------------------

        size_t totalNumWeights (0);

        std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,

                                                                                 batch,

                                                                                 dropContainer,

                                                                                 itWeightBegin,

                                                                                 itWeightEnd,

                                                                                 itGradientBegin,

                                                                                 itGradientEnd,

                                                                                 totalNumWeights);


        // ---------------------------------- propagate forward ------------------------------------------------------------------

        std::vector<double> valuesMean;

        std::vector<double> valuesStdDev;

        forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);


            // ------------- fetch output ------------------

        if (doFetchOutput)

            {

            fetchOutput (layerPatternData.back (), outputContainer);

            }


            // ------------- error computation -------------

        std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);


                // ------------- backpropagation -------------

        backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);


        // --- compile the measures

            double batchSize = std::distance (std::begin (batch), std::end (batch));

            for (auto it = itGradientBegin; it != itGradientEnd; ++it)

                (*it) /= batchSize;


            sumError /= sumWeights;

            return sumError;

        }


/*! \brief initialization of the weights

 *

 *

 */

        template <typename OutIterator>

            void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)

        {

            if (eInitStrategy == WeightInitializationStrategy::XAVIER)

            {

                // input and output properties

                int numInput = inputSize ();


                // compute variance and mean of input and output

                //...


                // compute the weights

                for (auto& layer: layers ())

                {

                    double nIn = numInput;

                    double stdDev = sqrt (2.0/nIn);

                    for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)

                    {

                        (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU

                        ++itWeight;

                    }

                    numInput = layer.numNodes ();

                }

                return;

            }


            if (eInitStrategy == WeightInitializationStrategy::XAVIERUNIFORM)

            {

                // input and output properties

                int numInput = inputSize ();


                // compute variance and mean of input and output

                //...


                // compute the weights

                for (auto& layer: layers ())

                {

                    double nIn = numInput;

                    double minVal = -sqrt(2.0/nIn);

                    double maxVal = sqrt (2.0/nIn);

                    for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)

                    {


                        (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU

                        ++itWeight;

                    }

                    numInput = layer.numNodes ();

                }

                return;

            }


            if (eInitStrategy == WeightInitializationStrategy::TEST)

            {

                // input and output properties

                int numInput = inputSize ();


                // compute variance and mean of input and output

                //...


                // compute the weights

                for (auto& layer: layers ())

                {

//                double nIn = numInput;

                    for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)

                    {

                        (*itWeight) = DNN::gaussDouble (0.0, 0.1);

                        ++itWeight;

                    }

                    numInput = layer.numNodes ();

                }

                return;

            }


            if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)

            {

                // input and output properties

                int numInput = inputSize ();


                // compute variance and mean of input and output

                //...


                // compute the weights

                for (auto& layer: layers ())

                {

                    double nIn = numInput;

                    for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)

                    {

                        (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU

                        ++itWeight;

                    }

                    numInput = layer.numNodes ();

                }

                return;

            }


        }


/*! \brief compute the error function

 *

 *

 */

        template <typename Container, typename ItWeight>

            double Net::errorFunction (LayerData& layerData,

                                       Container truth,

                                       ItWeight itWeight,

                                       ItWeight itWeightEnd,

                                       double patternWeight,

                                       double factorWeightDecay,

                                       EnumRegularization eRegularization) const

        {

            double error (0);

            switch (m_eErrorFunction)

            {

            case ModeErrorFunction::SUMOFSQUARES:

            {

                error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),

                                      layerData.deltasBegin (), layerData.deltasEnd (),

                                      layerData.inverseActivationFunction (),

                                      patternWeight);

                break;

            }

            case ModeErrorFunction::CROSSENTROPY:

            {

                assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));

                std::vector<double> probabilities = layerData.probabilities ();

                error = crossEntropy (begin (probabilities), end (probabilities),

                                      begin (truth), end (truth),

                                      layerData.deltasBegin (), layerData.deltasEnd (),

                                      layerData.inverseActivationFunction (),

                                      patternWeight);

                break;

            }

            case ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE:

            {

                std::cout << "softmax." << std::endl;

                assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));

                std::vector<double> probabilities = layerData.probabilities ();

                error = softMaxCrossEntropy (begin (probabilities), end (probabilities),

                                             begin (truth), end (truth),

                                             layerData.deltasBegin (), layerData.deltasEnd (),

                                             layerData.inverseActivationFunction (),

                                             patternWeight);

                break;

            }

            }

            if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)

            {

                error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);

            }

            return error;

        }


// /*! \brief pre-training

//  *

//  * in development

//  */

//         template <typename Minimizer>

//             void Net::preTrain (std::vector<double>& weights,

//                                 std::vector<Pattern>& trainPattern,

//                                 const std::vector<Pattern>& testPattern,

//                                 Minimizer& minimizer, Settings& settings)

//         {

//             auto itWeightGeneral = std::begin (weights);

//             std::vector<Pattern> prePatternTrain (trainPattern.size ());

//             std::vector<Pattern> prePatternTest (testPattern.size ());


//             size_t _inputSize = inputSize ();


//             // transform pattern using the created preNet

//             auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)

//                 {

//                     pttrnOutput.clear ();

//                     std::transform (std::begin (pttrnInput), std::end (pttrnInput),

//                                     std::back_inserter (pttrnOutput),

//                                     [](const Pattern& p)

//             {

//                 Pattern pat (p.input (), p.input (), p.weight ());

//                 return pat;

//             });

//                 };


//             initializePrePattern (trainPattern, prePatternTrain);

//             initializePrePattern (testPattern, prePatternTest);


//             std::vector<double> originalDropFractions = settings.dropFractions ();


//             for (auto& _layer : layers ())

//             {

//                 // compute number of weights (as a function of the number of incoming nodes)

//                 // fetch number of nodes

//                 size_t numNodes = _layer.numNodes ();

//                 size_t _numWeights = _layer.numWeights (_inputSize);


//                 // ------------------

//                 DNN::Net preNet;

//                 if (!originalDropFractions.empty ())

//                 {

//                     originalDropFractions.erase (originalDropFractions.begin ());

//                     settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());

//                 }

//                 std::vector<double> preWeights;


//                 // define the preNet (pretraining-net) for this layer

//                 // outputSize == inputSize, because this is an autoencoder;

//                 preNet.setInputSize (_inputSize);

//                 preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));

//                 preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));

//                 preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);

//                 preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)


//                 // initialize weights

//                 preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,

//                                           std::back_inserter (preWeights));


//                 // overwrite already existing weights from the "general" weights

//                 std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());

//                 std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer


//                 // train the "preNet"

//                 preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);


//                 // fetch the pre-trained weights (without the output part of the autoencoder)

//                 std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);


//                 // advance the iterator on the incoming weights

//                 itWeightGeneral += _numWeights;


//                 // remove the weights of the output layer of the preNet

//                 preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());


//                 // remove the outputLayer of the preNet

//                 preNet.removeLayer ();


//                 // set the output size to the number of nodes in the new output layer (== last hidden layer)

//                 preNet.setOutputSize (numNodes);


//                 // transform pattern using the created preNet

//                 auto proceedPattern = [&](std::vector<Pattern>& pttrn)

//                     {

//                         std::vector<Pattern> newPttrn;

//                         std::for_each (std::begin (pttrn), std::end (pttrn),

//                                        [&preNet,&preWeights,&newPttrn](Pattern& p)

//                 {

//                     std::vector<double> output = preNet.compute (p.input (), preWeights);

//                     Pattern pat (output, output, p.weight ());

//                     newPttrn.push_back (pat);

// //                    p = pat;

//                 });

//                         return newPttrn;

//                     };


//                 prePatternTrain = proceedPattern (prePatternTrain);

//                 prePatternTest = proceedPattern (prePatternTest);


//                 // the new input size is the output size of the already reduced preNet

//                 _inputSize = preNet.layers ().back ().numNodes ();

//             }

//         }


    } // namespace DNN

} // namespace TMVA


#endif

MethodBase.h

Pattern.h

f
#define f(i)
Definition: RSha256.hxx:104

g
#define g(i)
Definition: RSha256.hxx:105

dummy
static RooMathCoreReg dummy
Definition: RooMathCoreReg.cxx:27

kMagenta
@ kMagenta
Definition: Rtypes.h:63

kBlue
@ kBlue
Definition: Rtypes.h:63

sqrt
double sqrt(double)

log
double log(double)

Form
char * Form(const char *fmt,...)

Util.h

MATH_UNUSED
#define MATH_UNUSED(var)
Definition: Util.h:33

Function
Double_t(* Function)(Double_t)
Definition: Functor.C:4

Pattern
Definition: Pattern.h:8

Pattern::weight
double weight() const
Definition: Pattern.h:74

Pattern::const_iterator
std::vector< double >::const_iterator const_iterator
Definition: Pattern.h:12

Pattern::output
std::vector< double > & output()
Definition: Pattern.h:84

TMVA::DNN::Batch
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:236

TMVA::DNN::Batch::begin
const_iterator begin() const
Definition: NeuralNet.h:245

TMVA::DNN::Batch::end
const_iterator end() const
Definition: NeuralNet.h:246

TMVA::DNN::Batch::size
size_t size() const
Definition: NeuralNet.h:248

TMVA::DNN::LayerData
LayerData holds the data of one layer.
Definition: NeuralNet.h:438

TMVA::DNN::LayerData::valuesEnd
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587

TMVA::DNN::LayerData::valueGradientsBegin
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601

TMVA::DNN::LayerData::deltasBegin
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595

TMVA::DNN::LayerData::outputMode
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592

TMVA::DNN::LayerData::inverseActivationFunction
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612

TMVA::DNN::LayerData::deltasEnd
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596

TMVA::DNN::LayerData::activationFunction
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611

TMVA::DNN::LayerData::probabilities
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593

TMVA::DNN::LayerData::const_dropout_iterator
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449

TMVA::DNN::LayerData::valuesBegin
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586

TMVA::DNN::Net::forwardBatch
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1243

TMVA::DNN::Net::fExitFromTraining
bool * fExitFromTraining
Definition: NeuralNet.h:1284

TMVA::DNN::Net::m_layers
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1279

TMVA::DNN::Net::fIPyMaxIter
UInt_t * fIPyMaxIter
Definition: NeuralNet.h:1285

TMVA::DNN::Net::compute
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1037

TMVA::DNN::Net::fetchOutput
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1298

TMVA::DNN::Net::inputSize
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1105

TMVA::DNN::Net::m_eErrorFunction
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1276

TMVA::DNN::Net::train
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:710

TMVA::DNN::Net::layers
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1252

TMVA::DNN::Net::prepareLayerData
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1111

TMVA::DNN::Net::initializeWeights
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1481

TMVA::DNN::Net::outputSize
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1106

TMVA::DNN::Net::errorFunction
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1590

TMVA::DNN::Net::forward_backward
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1416

TMVA::DNN::Net::trainCycle
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:939

TMVA::DNN::Net::fIPyCurrentIter
UInt_t * fIPyCurrentIter
Definition: NeuralNet.h:1285

TMVA::DNN::Net::operator()
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1070

TMVA::DNN::Net::dropOutWeightFactor
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:650

TMVA::DNN::Net::fillDropContainer
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
Definition: NeuralNet.cxx:575

TMVA::DNN::Net::numWeights
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
Definition: NeuralNet.cxx:543

TMVA::DNN::Net::fInteractive
IPythonInteractive * fInteractive
Definition: NeuralNet.h:1283

TMVA::DNN::Net::computeError
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1328

TMVA::DNN::Net::forwardPattern
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1223

TMVA::DNN::Net::backPropagate
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1365

TMVA::DNN::Settings
Settings for the training of the neural net.
Definition: NeuralNet.h:737

TMVA::DNN::Settings::useMultithreading
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822

TMVA::DNN::Settings::regularization
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820

TMVA::DNN::Settings::convergenceCount
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834

TMVA::DNN::Settings::testRepetitions
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775

TMVA::DNN::Settings::endTestCycle
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812

TMVA::DNN::Settings::testIteration
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813

TMVA::DNN::Settings::hasConverged
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:488

TMVA::DNN::Settings::cycle
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806

TMVA::DNN::Settings::endTrainCycle
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795

TMVA::DNN::Settings::dropFractions
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769

TMVA::DNN::Settings::addPoint
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828

TMVA::DNN::Settings::testSample
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788

TMVA::DNN::Settings::plot
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830

TMVA::DNN::Settings::startTrainCycle
virtual void startTrainCycle()
Definition: NeuralNet.h:789

TMVA::DNN::Settings::convergenceSteps
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773

TMVA::DNN::Settings::factorWeightDecay
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776

TMVA::DNN::Settings::maxConvergenceCount
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835

TMVA::DNN::Settings::pads
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825

TMVA::DNN::Settings::batchSize
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774

TMVA::DNN::Settings::computeResult
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816

TMVA::DNN::Settings::dropRepetitions
size_t dropRepetitions() const
Definition: NeuralNet.h:768

TMVA::DNN::Settings::create
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826

TMVA::DNN::Settings::startTestCycle
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811

TMVA::DNN::Steepest::m_repetitions
size_t m_repetitions
Definition: NeuralNet.h:338

TMVA::DNN::Steepest::m_beta
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373

TMVA::DNN::Steepest::m_localGradients
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377

TMVA::DNN::Steepest::m_prevGradients
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374

TMVA::DNN::Steepest::m_alpha
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372

TMVA::DNN::Steepest::m_localWeights
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376

TMVA::DNN::Steepest::operator()
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:269

TMVA::IPythonInteractive::AddPoint
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212

TString
Basic string class.
Definition: TString.h:131

x
Double_t x[n]
Definition: legend1.C:17

n
const Int_t n
Definition: legend1.C:16

ClassificationKeras.output
output
Definition: ClassificationKeras.py:16

ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34

ROOT::Math::fabs
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Definition: UnaryOperators.h:131

ROOT::R::function
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151

RooFit::Minimizer
RooCmdArg Minimizer(const char *type, const char *alg=0)
Definition: RooGlobalFunc.cxx:207

TMVA::DNN::InvGauss
std::shared_ptr< std::function< double(double)> > InvGauss
Definition: NeuralNet.cxx:14

TMVA::DNN::sumOfSquares
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)

TMVA::DNN::uniformDouble
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:43

TMVA::DNN::SymmReLU
std::shared_ptr< std::function< double(double)> > SymmReLU
Definition: NeuralNet.cxx:30

TMVA::DNN::TanhShift
std::shared_ptr< std::function< double(double)> > TanhShift
Definition: NeuralNet.cxx:31

TMVA::DNN::Tanh
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29

TMVA::DNN::InvSigmoid
std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition: NeuralNet.cxx:18

TMVA::DNN::forward
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:544

TMVA::DNN::applyFunctions
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)

TMVA::DNN::uniformFromTo
T uniformFromTo(T from, T to)
Definition: NeuralNet.icc:32

TMVA::DNN::ModeOutput
ModeOutput
Definition: NeuralNet.h:1037

TMVA::DNN::ModeOutput::FETCH
@ FETCH

TMVA::DNN::SoftPlus
std::shared_ptr< std::function< double(double)> > SoftPlus
Definition: NeuralNet.cxx:27

TMVA::DNN::EnumRegularization
EnumRegularization
Definition: NeuralNet.h:174

TMVA::DNN::EnumRegularization::L2
@ L2

TMVA::DNN::EnumRegularization::L1
@ L1

TMVA::DNN::EnumRegularization::NONE
@ NONE

TMVA::DNN::crossEntropy
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:410

TMVA::DNN::backward
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:570

TMVA::DNN::ZeroFnc
std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition: NeuralNet.cxx:28

TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496

TMVA::DNN::InvSoftSign
std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition: NeuralNet.cxx:20

TMVA::DNN::InvGaussComplement
std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition: NeuralNet.cxx:15

TMVA::DNN::regularization
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:216

TMVA::DNN::ModeErrorFunction::CROSSENTROPY
@ CROSSENTROPY

TMVA::DNN::ModeErrorFunction::SUMOFSQUARES
@ SUMOFSQUARES

TMVA::DNN::ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE
@ CROSSENTROPY_MUTUALEXCLUSIVE

TMVA::DNN::softMaxCrossEntropy
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:456

TMVA::DNN::InvTanh
std::shared_ptr< std::function< double(double)> > InvTanh
Definition: NeuralNet.cxx:22

TMVA::DNN::Linear
std::shared_ptr< std::function< double(double)> > Linear
Definition: NeuralNet.cxx:24

TMVA::DNN::WeightInitializationStrategy
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1057

TMVA::DNN::WeightInitializationStrategy::TEST
@ TEST

TMVA::DNN::WeightInitializationStrategy::XAVIERUNIFORM
@ XAVIERUNIFORM

TMVA::DNN::WeightInitializationStrategy::XAVIER
@ XAVIER

TMVA::DNN::WeightInitializationStrategy::LAYERSIZE
@ LAYERSIZE

TMVA::DNN::InvReLU
std::shared_ptr< std::function< double(double)> > InvReLU
Definition: NeuralNet.cxx:17

TMVA::DNN::GaussComplement
std::shared_ptr< std::function< double(double)> > GaussComplement
Definition: NeuralNet.cxx:13

TMVA::DNN::Gauss
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12

TMVA::DNN::Sigmoid
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26

TMVA::DNN::gaussDouble
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:35

TMVA::DNN::ModeOutputValues
ModeOutputValues
Definition: NeuralNet.h:180

TMVA::DNN::ModeOutputValues::SOFTMAX
@ SOFTMAX

TMVA::DNN::ModeOutputValues::DIRECT
@ DIRECT

TMVA::DNN::ModeOutputValues::SIGMOID
@ SIGMOID

TMVA::DNN::SoftSign
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32

TMVA::DNN::InvSoftPlus
std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition: NeuralNet.cxx:19

TMVA::DNN::ReLU
std::shared_ptr< std::function< double(double)> > ReLU
Definition: NeuralNet.cxx:25

TMVA::DNN::computeRegularization
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
Definition: NeuralNet.icc:207

TMVA::DNN::applyWeights
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)

TMVA::DNN::pass_through_type
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301

TMVA::DNN::isFlagSet
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213

TMVA::DNN::InvTanhShift
std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition: NeuralNet.cxx:23

TMVA::DNN::update
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:181

TMVA::DNN::DropContainer
std::vector< char > DropContainer
Definition: NeuralNet.h:220

TMVA::DNN::applyWeightsBackwards
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)

TMVA::DNN::InvSymmReLU
std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition: NeuralNet.cxx:21

TMVA::DNN::InvLinear
std::shared_ptr< std::function< double(double)> > InvLinear
Definition: NeuralNet.cxx:16

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21

TMath::E
constexpr Double_t E()
Base of natural log:
Definition: TMath.h:97