27#ifndef TMVA_DNN_GENERALLAYER
28#define TMVA_DNN_GENERALLAYER
50template <
typename Architecture_t>
53 using Tensor_t =
typename Architecture_t::Tensor_t;
54 using Matrix_t =
typename Architecture_t::Matrix_t;
55 using Scalar_t =
typename Architecture_t::Scalar_t;
124 void Update(
const Scalar_t learningRate);
147 template <
typename Arch>
238template <
typename Architecture_t>
244 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(
depth),
245 fHeight(
height), fWidth(
width), fIsTraining(
true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
263template <
typename Architecture_t>
270 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(
depth),
271 fHeight(
height), fWidth(
width), fIsTraining(
true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
294template <
typename Architecture_t>
296 : fBatchSize(
layer->GetBatchSize()), fInputDepth(
layer->GetInputDepth()), fInputHeight(
layer->GetInputHeight()),
297 fInputWidth(
layer->GetInputWidth()), fDepth(
layer->GetDepth()), fHeight(
layer->GetHeight()),
298 fWidth(
layer->GetWidth()), fIsTraining(
layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
300 fOutput(
layer->GetOutput().GetShape() ),
301 fActivationGradients(
layer->GetActivationGradients().GetShape() ),
302 fInit(
layer->GetInitialization() )
330 Architecture_t::Copy(
fBiases[i],
layer->GetBiasesAt(i));
335template <
typename Architecture_t>
337 : fBatchSize(
layer.fBatchSize), fInputDepth(
layer.fInputDepth), fInputHeight(
layer.fInputHeight),
338 fInputWidth(
layer.fInputWidth), fDepth(
layer.fDepth), fHeight(
layer.fHeight), fWidth(
layer.fWidth),
339 fIsTraining(
layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
340 fOutput(
layer.GetOutput() ),
341 fActivationGradients(
layer.GetActivationGradients() ),
342 fInit(
layer.GetInitialization())
387template <
typename Architecture_t>
394template <
typename Architecture_t>
397 for (
size_t i = 0; i < fWeights.size(); i++) {
402 for (
size_t i = 0; i < fBiases.size(); i++) {
409template <
typename Architecture_t>
412 this->UpdateWeights(fWeightGradients, learningRate);
413 this->UpdateBiases(fBiasGradients, learningRate);
417template <
typename Architecture_t>
419 const Scalar_t learningRate) ->
void
421 for (
size_t i = 0; i < fWeights.size(); i++) {
422 Architecture_t::ScaleAdd(fWeights[i],
weightGradients[i], -learningRate);
427template <
typename Architecture_t>
429 const Scalar_t learningRate) ->
void
431 for (
size_t i = 0; i < fBiases.size(); i++) {
432 Architecture_t::ScaleAdd(fBiases[i],
biasGradients[i], -learningRate);
437template <
typename Architecture_t>
439 const Scalar_t learningRate) ->
void
441 for (
size_t i = 0; i < fWeightGradients.size(); i++) {
442 Architecture_t::ScaleAdd(fWeightGradients[i],
weightGradients[i], -learningRate);
447template <
typename Architecture_t>
449 const Scalar_t learningRate) ->
void
451 for (
size_t i = 0; i < fBiasGradients.size(); i++) {
452 Architecture_t::ScaleAdd(fBiasGradients[i],
biasGradients[i], -learningRate);
457template <
typename Architecture_t>
461 for (
size_t i = 0; i < fWeights.size(); i++) {
467template <
typename Architecture_t>
470 for (
size_t i = 0; i < fBiases.size(); i++) {
476template <
typename Architecture_t>
477template <
typename Arch>
482 Architecture_t::CopyDiffArch(this->GetWeights(),
layer.GetWeights());
483 Architecture_t::CopyDiffArch(this->GetBiases(),
layer.GetBiases());
486 auto params =
layer.GetExtraLayerParameters();
487 if (params.size() > 0) {
495template <
typename Architecture_t>
500 if (
tensor.size() == 0)
return;
506 for (
size_t i = 0; i <
tensor.size(); ++i) {
508 for (
Int_t row = 0; row <
mat.GetNrows(); row++) {
509 for (
Int_t col = 0; col <
mat.GetNcols(); col++) {
512 s << std::scientific <<
mat(row, col) <<
" ";
516 xmlengine.AddRawLine(
matnode, s.str().c_str() );
520template <
typename Architecture_t>
524 void*
matnode = xmlengine.NewChild(node,
nullptr,
name);
529 s.precision( std::numeric_limits<Scalar_t>::digits10 );
532 for (
size_t row = 0; row <
nrows; row++) {
533 for (
size_t col = 0; col <
ncols; col++) {
535 s << std::scientific <<
matrix(row,col) <<
" ";
539 xmlengine.AddRawLine(
matnode, s.str().c_str() );
543template <
typename Architecture_t>
559 for (
size_t i = 0; i <
rows; i++)
561 for (
size_t j = 0;
j <
cols;
j++)
563#ifndef R__HAS_TMVAGPU
576 Architecture_t::Copy(
matrix, tmp);
581template <
typename Architecture>
582auto debugTensor(
const typename Architecture::Tensor_t & A,
const std::string
name =
"tensor") ->
void
584 Architecture::PrintTensor(A,
name);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
Generic General Layer class.
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Tensor_t fOutput
Activations of this layer.
const std::vector< Matrix_t > & GetWeightGradients() const
virtual void SetDropoutProbability(Scalar_t)
Set Dropout probability.
void CopyParameters(const VGeneralLayer< Arch > &layer)
Copy all trainable weight and biases from another equivalent layer but with different architecture Th...
const Matrix_t & GetWeightsAt(size_t i) const
void SetHeight(size_t height)
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Matrix_t & GetBiasesAt(size_t i)
void SetInputHeight(size_t inputHeight)
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
void SetDepth(size_t depth)
virtual void SetExtraLayerParameters(const std::vector< Matrix_t > &)
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
void SetBatchSize(size_t batchSize)
Setters.
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
size_t fBatchSize
Batch size used for training and evaluation.
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
typename Architecture_t::Matrix_t Matrix_t
const std::vector< Matrix_t > & GetBiasGradients() const
void SetInputDepth(size_t inputDepth)
const std::vector< Matrix_t > & GetWeights() const
std::vector< Matrix_t > & GetWeights()
size_t fWidth
The width of this layer.
EInitialization fInit
The initialization method.
std::vector< Matrix_t > fBiases
The biases associated to the layer.
void SetIsTraining(bool isTraining)
size_t fInputWidth
The width of the previous layer or input.
size_t fHeight
The height of the layer.
virtual void Print() const =0
Prints the info about the layer.
size_t fInputDepth
The depth of the previous layer or input.
void SetWidth(size_t width)
bool fIsTraining
Flag indicating the mode.
const Tensor_t & GetOutput() const
const std::vector< Matrix_t > & GetBiases() const
typename Architecture_t::Scalar_t Scalar_t
std::vector< Matrix_t > & GetBiasGradients()
Tensor_t & GetActivationGradients()
std::vector< Matrix_t > fWeights
The weights associated to the layer.
EInitialization GetInitialization() const
Tensor_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Matrix_t & GetWeightsAt(size_t i)
Matrix_t & GetBiasGradientsAt(size_t i)
size_t GetInputDepth() const
const Matrix_t & GetActivationGradientsAt(size_t i) const
std::vector< Matrix_t > & GetBiases()
virtual std::vector< Matrix_t > GetExtraLayerParameters() const
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Matrix_t GetActivationGradientsAt(size_t i)
std::vector< Matrix_t > & GetWeightGradients()
const Tensor_t & GetActivationGradients() const
size_t fInputHeight
The height of the previous layer or input.
size_t fDepth
The depth of the layer.
virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
Backpropagates the error.
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
const Matrix_t & GetBiasesAt(size_t i) const
virtual void ResetTraining()
Reset some training flags after a loop on all batches Some layer (e.g.
size_t GetInputHeight() const
void SetInputWidth(size_t inputWidth)
const Matrix_t & GetBiasGradientsAt(size_t i) const
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
size_t GetBatchSize() const
Getters.
Matrix_t & GetWeightGradientsAt(size_t i)
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
virtual void Forward(Tensor_t &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.
Matrix_t GetOutputAt(size_t i)
const Matrix_t & GetWeightGradientsAt(size_t i) const
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
typename Architecture_t::Tensor_t Tensor_t
virtual ~VGeneralLayer()
Virtual Destructor.
const Matrix_t & GetOutputAt(size_t i) const
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
size_t GetInputWidth() const
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
auto debugTensor(const typename Architecture::Tensor_t &A, const std::string name="tensor") -> void
create variable transformations