27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
54template <
typename Architecture_t>
57 using Scalar_t =
typename Architecture_t::Scalar_t;
58 using Matrix_t =
typename Architecture_t::Matrix_t;
88 void Forward(std::vector<Matrix_t> &input,
bool applyDropout =
false);
94 void Backward(std::vector<Matrix_t> &gradients_backward,
const std::vector<Matrix_t> &activations_backward,
95 std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
125template <
typename Architecture_t>
129 :
VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1,
width, 1,
width, inputWidth, 1,
width, 1, 1,
131 fDerivatives(), fDropoutProbability(dropoutProbability), fF(
f), fReg(reg), fWeightDecay(
weightDecay)
137template <
typename Architecture_t>
139 :
VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer->GetDropoutProbability()),
140 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
146template <
typename Architecture_t>
148 :
VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF),
149 fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
155template <
typename Architecture_t>
162template <
typename Architecture_t>
165 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
166 Architecture_t::Dropout(input[0], this->GetDropoutProbability());
168 Architecture_t::MultiplyTranspose(this->GetOutputAt(0), input[0], this->GetWeightsAt(0));
169 Architecture_t::AddRowWise(this->GetOutputAt(0), this->GetBiasesAt(0));
170 evaluateDerivative<Architecture_t>(this->GetDerivativesAt(0), this->GetActivationFunction(), this->GetOutputAt(0));
171 evaluate<Architecture_t>(this->GetOutputAt(0), this->GetActivationFunction());
175template <
typename Architecture_t>
177 const std::vector<Matrix_t> &activations_backward,
178 std::vector<Matrix_t> & , std::vector<Matrix_t> &
181 if (gradients_backward.size() == 0) {
183 Architecture_t::Backward(
dummy, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
184 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
185 activations_backward[0]);
188 Architecture_t::Backward(gradients_backward[0], this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
189 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
190 activations_backward[0]);
193 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
194 this->GetWeightDecay(), this->GetRegularization());
198template <
typename Architecture_t>
201 std::cout <<
" DENSE Layer: \t ";
202 std::cout <<
" ( Input = " << this->GetWeightsAt(0).GetNcols();
203 std::cout <<
" , Width = " << this->GetWeightsAt(0).GetNrows() <<
" ) ";
204 if (this->GetOutput().size() > 0) {
205 std::cout <<
"\tOutput = ( " << this->GetOutput().size() <<
" , " << this->GetOutput()[0].GetNrows() <<
" , " << this->GetOutput()[0].GetNcols() <<
" ) ";
207 std::vector<std::string> activationNames = {
"Identity",
"Relu",
"Sigmoid",
"Tanh",
"SymmRelu",
"SoftSign",
"Gauss" };
208 std::cout <<
"\t Activation Function = ";
209 std::cout << activationNames[ static_cast<int>(fF) ] << std::endl;
214template <
typename Architecture_t>
223 int activationFunction =
static_cast<int>(
this -> GetActivationFunction());
227 this->WriteMatrixToXML(layerxml,
"Weights",
this -> GetWeightsAt(0));
228 this->WriteMatrixToXML(layerxml,
"Biases",
this -> GetBiasesAt(0));
232template <
typename Architecture_t>
236 this->ReadMatrixXML(parent,
"Weights",
this -> GetWeightsAt(0));
237 this->ReadMatrixXML(parent,
"Biases",
this -> GetBiasesAt(0));
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
ERegularization fReg
The regularization method.
ERegularization GetRegularization() const
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations of this layer.
void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Compute weight, bias and activation gradients.
std::vector< Matrix_t > & GetDerivatives()
Matrix_t & GetDerivativesAt(size_t i)
typename Architecture_t::Matrix_t Matrix_t
Scalar_t fWeightDecay
The weight decay.
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Compute activation of the layer for the given input.
EActivationFunction fF
Activation function of the layer.
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
void Print() const
Printing the layer info.
~TDenseLayer()
Destructor.
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Scalar_t GetDropoutProbability() const
Getters.
EActivationFunction GetActivationFunction() const
typename Architecture_t::Scalar_t Scalar_t
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Scalar_t fDropoutProbability
Probability that an input is active.
Scalar_t GetWeightDecay() const
const std::vector< Matrix_t > & GetDerivatives() const
const Matrix_t & GetDerivativesAt(size_t i) const
Generic General Layer class.
size_t fBatchSize
Batch size used for training and evaluation.
size_t fWidth
The width of this layer.
size_t GetBatchSize() const
Getters.
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
Abstract ClassifierFactory template that handles arbitrary types.