Layer implementing Batch Normalization.
The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable:
In addition a running batch mean and variance is computed and stored in the class During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance If momentum is in [0,1) the running mean and variances are the exponential averages using the momentum value running_mean = momentum * running_mean + (1-momentum) * batch_mean If instead momentum<1 the cumulative average is computed running_mean = (nb/(nb+1) * running_mean + 1/(nb+1) * batch_mean
See more at [https://arxiv.org/pdf/1502.03167v3.pdf]
Definition at line 64 of file BatchNormLayer.h.
Public Types | |
using | BNormDescriptors_t = typename Architecture_t::BNormDescriptors_t |
using | HelperDescriptor_t = typename Architecture_t::TensorDescriptor_t |
using | Matrix_t = typename Architecture_t::Matrix_t |
using | Scalar_t = typename Architecture_t::Scalar_t |
using | Tensor_t = typename Architecture_t::Tensor_t |
Public Member Functions | |
TBatchNormLayer (const TBatchNormLayer &) | |
Copy Constructor. | |
TBatchNormLayer (size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, const std::vector< size_t > &shape, int axis=-1, Scalar_t momentum=-1., Scalar_t epsilon=0.0001) | |
Constructor. | |
TBatchNormLayer (TBatchNormLayer< Architecture_t > *layer) | |
Copy the dense layer provided as a pointer. | |
~TBatchNormLayer () | |
Destructor. | |
virtual void | AddWeightsXMLTo (void *parent) |
Writes the information and the weights about the layer in an XML node. | |
void | Backward (Tensor_t &gradients_backward, const Tensor_t &activations_backward) |
Compute weight, bias and activation gradients. | |
void | Forward (Tensor_t &input, bool inTraining=true) |
Compute activation of the layer for the given input. | |
Matrix_t & | GetBatchMean () |
const Matrix_t & | GetBatchMean () const |
Scalar_t | GetEpsilon () const |
std::vector< Matrix_t > | GetExtraLayerParameters () const |
Matrix_t & | GetIVariance () |
const Matrix_t & | GetIVariance () const |
Scalar_t | GetMomentum () const |
Matrix_t & | GetMuVector () |
const Matrix_t & | GetMuVector () const |
Scalar_t | GetNormAxis () const |
int & | GetNTrainedBatches () |
const int & | GetNTrainedBatches () const |
Matrix_t & | GetReshapedData () |
const Matrix_t & | GetReshapedData () const |
Matrix_t & | GetVariance () |
const Matrix_t & | GetVariance () const |
Matrix_t & | GetVarVector () |
const Matrix_t & | GetVarVector () const |
virtual void | Initialize () |
Initialize the weights and biases according to the given initialization method. | |
void | Print () const |
Printing the layer info. | |
virtual void | ReadWeightsFromXML (void *parent) |
Read the information and the weights about the layer from XML node. | |
void | ResetTraining () |
Reset some training flags after a loop on all batches Some layer (e.g. | |
void | SetExtraLayerParameters (const std::vector< Matrix_t > ¶ms) |
Public Member Functions inherited from TMVA::DNN::VGeneralLayer< Architecture_t > | |
VGeneralLayer (const VGeneralLayer &) | |
Copy Constructor. | |
VGeneralLayer (size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init) | |
Constructor. | |
VGeneralLayer (size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, std::vector< size_t > WeightsNRows, std::vector< size_t > WeightsNCols, size_t BiasesNSlices, std::vector< size_t > BiasesNRows, std::vector< size_t > BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init) | |
General Constructor with different weights dimension. | |
VGeneralLayer (VGeneralLayer< Architecture_t > *layer) | |
Copy the layer provided as a pointer. | |
virtual | ~VGeneralLayer () |
Virtual Destructor. | |
void | CopyBiases (const std::vector< Matrix_t > &otherBiases) |
Copies the biases provided as an input. | |
template<typename Arch > | |
void | CopyParameters (const VGeneralLayer< Arch > &layer) |
Copy all trainable weight and biases from another equivalent layer but with different architecture The function can copy also extra parameters in addition to weights and biases if they are return by the function GetExtraLayerParameters. | |
void | CopyWeights (const std::vector< Matrix_t > &otherWeights) |
Copies the weights provided as an input. | |
Tensor_t & | GetActivationGradients () |
const Tensor_t & | GetActivationGradients () const |
Matrix_t | GetActivationGradientsAt (size_t i) |
const Matrix_t & | GetActivationGradientsAt (size_t i) const |
size_t | GetBatchSize () const |
Getters. | |
std::vector< Matrix_t > & | GetBiases () |
const std::vector< Matrix_t > & | GetBiases () const |
Matrix_t & | GetBiasesAt (size_t i) |
const Matrix_t & | GetBiasesAt (size_t i) const |
std::vector< Matrix_t > & | GetBiasGradients () |
const std::vector< Matrix_t > & | GetBiasGradients () const |
Matrix_t & | GetBiasGradientsAt (size_t i) |
const Matrix_t & | GetBiasGradientsAt (size_t i) const |
size_t | GetDepth () const |
size_t | GetHeight () const |
EInitialization | GetInitialization () const |
size_t | GetInputDepth () const |
size_t | GetInputHeight () const |
size_t | GetInputWidth () const |
Tensor_t & | GetOutput () |
const Tensor_t & | GetOutput () const |
Matrix_t | GetOutputAt (size_t i) |
const Matrix_t & | GetOutputAt (size_t i) const |
std::vector< Matrix_t > & | GetWeightGradients () |
const std::vector< Matrix_t > & | GetWeightGradients () const |
Matrix_t & | GetWeightGradientsAt (size_t i) |
const Matrix_t & | GetWeightGradientsAt (size_t i) const |
std::vector< Matrix_t > & | GetWeights () |
const std::vector< Matrix_t > & | GetWeights () const |
Matrix_t & | GetWeightsAt (size_t i) |
const Matrix_t & | GetWeightsAt (size_t i) const |
size_t | GetWidth () const |
bool | IsTraining () const |
void | ReadMatrixXML (void *node, const char *name, Matrix_t &matrix) |
void | SetBatchSize (size_t batchSize) |
Setters. | |
void | SetDepth (size_t depth) |
virtual void | SetDropoutProbability (Scalar_t) |
Set Dropout probability. | |
void | SetHeight (size_t height) |
void | SetInputDepth (size_t inputDepth) |
void | SetInputHeight (size_t inputHeight) |
void | SetInputWidth (size_t inputWidth) |
void | SetIsTraining (bool isTraining) |
void | SetWidth (size_t width) |
void | Update (const Scalar_t learningRate) |
Updates the weights and biases, given the learning rate. | |
void | UpdateBiases (const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate) |
Updates the biases, given the gradients and the learning rate. | |
void | UpdateBiasGradients (const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate) |
Updates the bias gradients, given some other weight gradients and learning rate. | |
void | UpdateWeightGradients (const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate) |
Updates the weight gradients, given some other weight gradients and learning rate. | |
void | UpdateWeights (const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate) |
Updates the weights, given the gradients and the learning rate,. | |
void | WriteMatrixToXML (void *node, const char *name, const Matrix_t &matrix) |
void | WriteTensorToXML (void *node, const char *name, const std::vector< Matrix_t > &tensor) |
helper functions for XML | |
Static Protected Member Functions | |
static size_t | CalculateNormDim (int axis, size_t c, size_t h, size_t w) |
Private Attributes | |
Tensor_t | fDerivatives |
First fDerivatives of the activations of this layer. | |
TDescriptors * | fDescriptors = nullptr |
Scalar_t | fEpsilon |
Matrix_t | fIVar |
Scalar_t | fMomentum |
The weight decay. | |
Matrix_t | fMu |
Matrix_t | fMu_Training |
int | fNormAxis |
Normalization axis. For each element of this axis we will compute mean and stddev. | |
Tensor_t | fReshapedData |
int | fTrainedBatches = 0 |
Matrix_t | fVar |
Matrix_t | fVar_Training |
Additional Inherited Members | |
Protected Attributes inherited from TMVA::DNN::VGeneralLayer< Architecture_t > | |
Tensor_t | fActivationGradients |
Gradients w.r.t. the activations of this layer. | |
size_t | fBatchSize |
Batch size used for training and evaluation. | |
std::vector< Matrix_t > | fBiases |
The biases associated to the layer. | |
std::vector< Matrix_t > | fBiasGradients |
Gradients w.r.t. the bias values of the layer. | |
size_t | fDepth |
The depth of the layer. | |
size_t | fHeight |
The height of the layer. | |
EInitialization | fInit |
The initialization method. | |
size_t | fInputDepth |
The depth of the previous layer or input. | |
size_t | fInputHeight |
The height of the previous layer or input. | |
size_t | fInputWidth |
The width of the previous layer or input. | |
bool | fIsTraining |
Flag indicating the mode. | |
Tensor_t | fOutput |
Activations of this layer. | |
std::vector< Matrix_t > | fWeightGradients |
Gradients w.r.t. the weights of the layer. | |
std::vector< Matrix_t > | fWeights |
The weights associated to the layer. | |
size_t | fWidth |
The width of this layer. | |
#include <TMVA/DNN/BatchNormLayer.h>
using TMVA::DNN::TBatchNormLayer< Architecture_t >::BNormDescriptors_t = typename Architecture_t::BNormDescriptors_t |
Definition at line 72 of file BatchNormLayer.h.
using TMVA::DNN::TBatchNormLayer< Architecture_t >::HelperDescriptor_t = typename Architecture_t::TensorDescriptor_t |
Definition at line 71 of file BatchNormLayer.h.
using TMVA::DNN::TBatchNormLayer< Architecture_t >::Matrix_t = typename Architecture_t::Matrix_t |
Definition at line 68 of file BatchNormLayer.h.
using TMVA::DNN::TBatchNormLayer< Architecture_t >::Scalar_t = typename Architecture_t::Scalar_t |
Definition at line 67 of file BatchNormLayer.h.
using TMVA::DNN::TBatchNormLayer< Architecture_t >::Tensor_t = typename Architecture_t::Tensor_t |
Definition at line 69 of file BatchNormLayer.h.
TMVA::DNN::TBatchNormLayer< Architecture_t >::TBatchNormLayer | ( | size_t | batchSize, |
size_t | inputDepth, | ||
size_t | inputHeight, | ||
size_t | inputWidth, | ||
const std::vector< size_t > & | shape, | ||
int | axis = -1 , |
||
Scalar_t | momentum = -1. , |
||
Scalar_t | epsilon = 0.0001 |
||
) |
Constructor.
Definition at line 219 of file BatchNormLayer.h.
TMVA::DNN::TBatchNormLayer< Architecture_t >::TBatchNormLayer | ( | TBatchNormLayer< Architecture_t > * | layer | ) |
Copy the dense layer provided as a pointer.
Definition at line 242 of file BatchNormLayer.h.
TMVA::DNN::TBatchNormLayer< Architecture_t >::TBatchNormLayer | ( | const TBatchNormLayer< Architecture_t > & | layer | ) |
Copy Constructor.
Definition at line 251 of file BatchNormLayer.h.
TMVA::DNN::TBatchNormLayer< Architecture_t >::~TBatchNormLayer |
Destructor.
Definition at line 259 of file BatchNormLayer.h.
|
virtual |
Writes the information and the weights about the layer in an XML node.
Implements TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 387 of file BatchNormLayer.h.
|
virtual |
Compute weight, bias and activation gradients.
Uses the precomputed first partial derivatives of the activation function computed during forward propagation and modifies them. Must only be called directly a the corresponding call to Forward(...).
Implements TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 337 of file BatchNormLayer.h.
|
inlinestaticprotected |
Definition at line 199 of file BatchNormLayer.h.
|
virtual |
Compute activation of the layer for the given input.
The input must be in 3D tensor form with the different matrices corresponding to different events in the batch. Computes activations as well as the first partial derivative of the activation function at those activations.
Implements TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 295 of file BatchNormLayer.h.
|
inline |
Definition at line 149 of file BatchNormLayer.h.
|
inline |
Definition at line 148 of file BatchNormLayer.h.
|
inline |
Definition at line 177 of file BatchNormLayer.h.
|
inlinevirtual |
Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 185 of file BatchNormLayer.h.
|
inline |
Definition at line 161 of file BatchNormLayer.h.
|
inline |
Definition at line 160 of file BatchNormLayer.h.
|
inline |
Definition at line 174 of file BatchNormLayer.h.
|
inline |
Definition at line 165 of file BatchNormLayer.h.
|
inline |
Definition at line 164 of file BatchNormLayer.h.
|
inline |
Definition at line 180 of file BatchNormLayer.h.
|
inline |
Definition at line 145 of file BatchNormLayer.h.
|
inline |
Definition at line 144 of file BatchNormLayer.h.
|
inline |
Definition at line 183 of file BatchNormLayer.h.
|
inline |
Definition at line 182 of file BatchNormLayer.h.
|
inline |
Definition at line 157 of file BatchNormLayer.h.
|
inline |
Definition at line 156 of file BatchNormLayer.h.
|
inline |
Definition at line 169 of file BatchNormLayer.h.
|
inline |
Definition at line 168 of file BatchNormLayer.h.
|
virtual |
Initialize the weights and biases according to the given initialization method.
Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 269 of file BatchNormLayer.h.
|
virtual |
Printing the layer info.
Implements TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 369 of file BatchNormLayer.h.
|
virtual |
Read the information and the weights about the layer from XML node.
Implements TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 412 of file BatchNormLayer.h.
|
inlinevirtual |
Reset some training flags after a loop on all batches Some layer (e.g.
batchnormalization) might need to implement the function in case some operations are needed after looping an all batches
Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 129 of file BatchNormLayer.h.
|
inlinevirtual |
Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.
Definition at line 192 of file BatchNormLayer.h.
|
private |
First fDerivatives of the activations of this layer.
Definition at line 77 of file BatchNormLayer.h.
|
private |
Definition at line 97 of file BatchNormLayer.h.
|
private |
Definition at line 82 of file BatchNormLayer.h.
|
private |
Definition at line 86 of file BatchNormLayer.h.
|
private |
The weight decay.
Definition at line 81 of file BatchNormLayer.h.
|
private |
Definition at line 84 of file BatchNormLayer.h.
|
private |
Definition at line 88 of file BatchNormLayer.h.
|
private |
Normalization axis. For each element of this axis we will compute mean and stddev.
Definition at line 79 of file BatchNormLayer.h.
|
private |
Definition at line 92 of file BatchNormLayer.h.
|
private |
Definition at line 95 of file BatchNormLayer.h.
|
private |
Definition at line 85 of file BatchNormLayer.h.
|
private |
Definition at line 89 of file BatchNormLayer.h.