27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
55template <
typename Architecture_t>
58 using Scalar_t =
typename Architecture_t::Scalar_t;
59 using Matrix_t =
typename Architecture_t::Matrix_t;
89 void Forward(std::vector<Matrix_t> &input,
bool applyDropout =
false);
95 void Backward(std::vector<Matrix_t> &gradients_backward,
const std::vector<Matrix_t> &activations_backward,
96 std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
128template <
typename Architecture_t>
132 :
VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1,
width, 1,
width, inputWidth, 1,
width, 1, 1,
134 fDerivatives(), fDropoutProbability(dropoutProbability), fF(
f), fReg(reg), fWeightDecay(
weightDecay)
140template <
typename Architecture_t>
142 :
VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer->GetDropoutProbability()),
143 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
149template <
typename Architecture_t>
151 :
VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF),
152 fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
158template <
typename Architecture_t>
165template <
typename Architecture_t>
168 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
169 Architecture_t::Dropout(input[0], this->GetDropoutProbability());
171 Architecture_t::MultiplyTranspose(this->GetOutputAt(0), input[0], this->GetWeightsAt(0));
172 Architecture_t::AddRowWise(this->GetOutputAt(0), this->GetBiasesAt(0));
173 evaluateDerivative<Architecture_t>(this->GetDerivativesAt(0), this->GetActivationFunction(), this->GetOutputAt(0));
174 evaluate<Architecture_t>(this->GetOutputAt(0), this->GetActivationFunction());
178template <
typename Architecture_t>
180 const std::vector<Matrix_t> &activations_backward,
181 std::vector<Matrix_t> & , std::vector<Matrix_t> &
184 if (gradients_backward.size() == 0) {
186 Architecture_t::Backward(
dummy, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
187 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
188 activations_backward[0]);
191 Architecture_t::Backward(gradients_backward[0], this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
192 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
193 activations_backward[0]);
196 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
197 this->GetWeightDecay(), this->GetRegularization());
201template <
typename Architecture_t>
204 std::cout <<
" DENSE Layer: \t";
205 std::cout <<
" ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();
206 std::cout <<
" , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() <<
" ) ";
207 if (this->GetOutput().size() > 0) {
208 std::cout <<
"\tOutput = ( " << std::setw(2) << this->GetOutput().size() <<
" ," << std::setw(6) << this->GetOutput()[0].GetNrows() <<
" ," << std::setw(6) << this->GetOutput()[0].GetNcols() <<
" ) ";
210 std::vector<std::string> activationNames = {
"Identity",
"Relu",
"Sigmoid",
"Tanh",
"SymmRelu",
"SoftSign",
"Gauss" };
211 std::cout <<
"\t Activation Function = ";
212 std::cout << activationNames[ static_cast<int>(fF) ];
213 if (fDropoutProbability != 1.) std::cout <<
"\t Dropout prob. = " << fDropoutProbability;
214 std::cout << std::endl;
219template <
typename Architecture_t>
228 int activationFunction =
static_cast<int>(
this -> GetActivationFunction());
232 this->WriteMatrixToXML(layerxml,
"Weights",
this -> GetWeightsAt(0));
233 this->WriteMatrixToXML(layerxml,
"Biases",
this -> GetBiasesAt(0));
237template <
typename Architecture_t>
241 this->ReadMatrixXML(parent,
"Weights",
this -> GetWeightsAt(0));
242 this->ReadMatrixXML(parent,
"Biases",
this -> GetBiasesAt(0));
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
ERegularization fReg
The regularization method.
ERegularization GetRegularization() const
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations of this layer.
void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Compute weight, bias and activation gradients.
std::vector< Matrix_t > & GetDerivatives()
Matrix_t & GetDerivativesAt(size_t i)
typename Architecture_t::Matrix_t Matrix_t
Scalar_t fWeightDecay
The weight decay.
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Compute activation of the layer for the given input.
EActivationFunction fF
Activation function of the layer.
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
void Print() const
Printing the layer info.
~TDenseLayer()
Destructor.
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Scalar_t GetDropoutProbability() const
Getters.
EActivationFunction GetActivationFunction() const
virtual void SetDropoutProbability(Scalar_t dropoutProbability)
Set dropout probabilities.
typename Architecture_t::Scalar_t Scalar_t
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Scalar_t fDropoutProbability
Probability that an input is active.
Scalar_t GetWeightDecay() const
const std::vector< Matrix_t > & GetDerivatives() const
const Matrix_t & GetDerivativesAt(size_t i) const
Generic General Layer class.
size_t fBatchSize
Batch size used for training and evaluation.
size_t fWidth
The width of this layer.
size_t GetBatchSize() const
Getters.
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
create variable transformations