27#ifndef TMVA_DNN_GENERALLAYER
28#define TMVA_DNN_GENERALLAYER
48template <
typename Architecture_t>
51 using Tensor_t =
typename Architecture_t::Tensor_t;
52 using Matrix_t =
typename Architecture_t::Matrix_t;
53 using Scalar_t =
typename Architecture_t::Scalar_t;
81 VGeneralLayer(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t Depth,
82 size_t Height,
size_t Width,
size_t WeightsNSlices,
size_t WeightsNRows,
size_t WeightsNCols,
83 size_t BiasesNSlices,
size_t BiasesNRows,
size_t BiasesNCols,
size_t OutputNSlices,
size_t OutputNRows,
87 VGeneralLayer(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t Depth,
88 size_t Height,
size_t Width,
size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
89 std::vector<size_t> WeightsNCols,
size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
90 std::vector<size_t> BiasesNCols,
size_t OutputNSlices,
size_t OutputNRows,
size_t OutputNCols,
136 void CopyWeights(
const std::vector<Matrix_t> &otherWeights);
139 void CopyBiases(
const std::vector<Matrix_t> &otherBiases);
144 template <
typename Arch>
235template <
typename Architecture_t>
237 size_t depth,
size_t height,
size_t width,
size_t weightsNSlices,
238 size_t weightsNRows,
size_t weightsNCols,
size_t biasesNSlices,
239 size_t biasesNRows,
size_t biasesNCols,
size_t outputNSlices,
241 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
242 fHeight(height), fWidth(
width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
243 fOutput( outputNSlices, outputNRows, outputNCols ),
244 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
248 for (
size_t i = 0; i < weightsNSlices; i++) {
249 fWeights.emplace_back(weightsNRows, weightsNCols);
253 for (
size_t i = 0; i < biasesNSlices; i++) {
254 fBiases.emplace_back(biasesNRows, biasesNCols);
260template <
typename Architecture_t>
262 size_t depth,
size_t height,
size_t width,
size_t weightsNSlices,
263 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
264 size_t biasesNSlices, std::vector<size_t> biasesNRows,
265 std::vector<size_t> biasesNCols,
size_t outputNSlices,
size_t outputNRows,
267 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
268 fHeight(height), fWidth(
width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
269 fOutput( outputNSlices, outputNRows, outputNCols ),
270 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
274 for (
size_t i = 0; i < weightsNSlices; i++) {
275 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
279 for (
size_t i = 0; i < biasesNSlices; i++) {
280 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
291template <
typename Architecture_t>
293 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
294 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
295 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
297 fOutput( layer->GetOutput().GetShape() ),
298 fActivationGradients( layer->GetActivationGradients().GetShape() ),
299 fInit(layer->GetInitialization() )
302 size_t weightsNSlices = (layer->
GetWeights()).size();
303 size_t weightsNRows = 0;
304 size_t weightsNCols = 0;
306 for (
size_t i = 0; i < weightsNSlices; i++) {
310 fWeights.emplace_back(weightsNRows, weightsNCols);
316 size_t biasesNSlices = (layer->
GetBiases()).size();
317 size_t biasesNRows = 0;
318 size_t biasesNCols = 0;
320 for (
size_t i = 0; i < biasesNSlices; i++) {
324 fBiases.emplace_back(biasesNRows, biasesNCols);
332template <
typename Architecture_t>
334 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
335 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
336 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
337 fOutput( layer.GetOutput() ),
338 fActivationGradients( layer.GetActivationGradients() ),
339 fInit( layer.GetInitialization())
342 size_t weightsNSlices = layer.
fWeights.size();
343 size_t weightsNRows = 0;
344 size_t weightsNCols = 0;
346 for (
size_t i = 0; i < weightsNSlices; i++) {
347 weightsNRows = (layer.
fWeights[i]).GetNrows();
348 weightsNCols = (layer.
fWeights[i]).GetNcols();
350 fWeights.emplace_back(weightsNRows, weightsNCols);
356 size_t biasesNSlices = layer.
fBiases.size();
357 size_t biasesNRows = 0;
358 size_t biasesNCols = 0;
360 for (
size_t i = 0; i < biasesNSlices; i++) {
361 biasesNRows = (layer.
fBiases[i]).GetNrows();
362 biasesNCols = (layer.
fBiases[i]).GetNcols();
364 fBiases.emplace_back(biasesNRows, biasesNCols);
370 size_t outputNSlices = layer.
fOutput.size();
371 size_t outputNRows = 0;
372 size_t outputNCols = 0;
374 for (
size_t i = 0; i < outputNSlices; i++) {
375 outputNRows = (layer.
fOutput[i]).GetNrows();
376 outputNCols = (layer.
fOutput[i]).GetNcols();
378 fOutput.emplace_back(outputNRows, outputNCols);
384template <
typename Architecture_t>
391template <
typename Architecture_t>
394 for (
size_t i = 0; i < fWeights.size(); i++) {
395 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
399 for (
size_t i = 0; i < fBiases.size(); i++) {
406template <
typename Architecture_t>
409 this->UpdateWeights(fWeightGradients, learningRate);
410 this->UpdateBiases(fBiasGradients, learningRate);
414template <
typename Architecture_t>
416 const Scalar_t learningRate) ->
void
418 for (
size_t i = 0; i < fWeights.size(); i++) {
419 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
424template <
typename Architecture_t>
426 const Scalar_t learningRate) ->
void
428 for (
size_t i = 0; i < fBiases.size(); i++) {
429 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
434template <
typename Architecture_t>
436 const Scalar_t learningRate) ->
void
438 for (
size_t i = 0; i < fWeightGradients.size(); i++) {
439 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
444template <
typename Architecture_t>
446 const Scalar_t learningRate) ->
void
448 for (
size_t i = 0; i < fBiasGradients.size(); i++) {
449 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
454template <
typename Architecture_t>
458 for (
size_t i = 0; i < fWeights.size(); i++) {
464template <
typename Architecture_t>
467 for (
size_t i = 0; i < fBiases.size(); i++) {
473template <
typename Architecture_t>
474template <
typename Arch>
479 Architecture_t::CopyDiffArch(this->GetWeights(), layer.
GetWeights());
480 Architecture_t::CopyDiffArch(this->GetBiases(), layer.
GetBiases());
484 if (params.size() > 0) {
485 auto paramsToCopy = GetExtraLayerParameters();
486 Architecture_t::CopyDiffArch(paramsToCopy, params );
487 SetExtraLayerParameters(paramsToCopy);
492template <
typename Architecture_t>
497 if (tensor.size() == 0)
return;
498 xmlengine.NewAttr(matnode,0,
"Depth",
gTools().StringFromInt(tensor.size()) );
500 xmlengine.NewAttr(matnode,0,
"Rows",
gTools().StringFromInt(tensor[0].GetNrows()) );
501 xmlengine.NewAttr(matnode,0,
"Columns",
gTools().StringFromInt(tensor[0].GetNcols()) );
503 for (
size_t i = 0; i < tensor.size(); ++i) {
504 auto & mat = tensor[i];
505 for (
Int_t row = 0; row < mat.GetNrows(); row++) {
506 for (
Int_t col = 0; col < mat.GetNcols(); col++) {
509 s << std::scientific << mat(row, col) <<
" ";
513 xmlengine.AddRawLine( matnode,
s.str().c_str() );
517template <
typename Architecture_t>
523 xmlengine.NewAttr(matnode,0,
"Rows",
gTools().StringFromInt(matrix.GetNrows()) );
524 xmlengine.NewAttr(matnode,0,
"Columns",
gTools().StringFromInt(matrix.GetNcols()) );
526 s.precision( std::numeric_limits<Scalar_t>::digits10 );
527 size_t nrows = matrix.GetNrows();
528 size_t ncols = matrix.GetNcols();
529 for (
size_t row = 0; row < nrows; row++) {
530 for (
size_t col = 0; col < ncols; col++) {
532 s << std::scientific << matrix(row,col) <<
" ";
536 xmlengine.AddRawLine( matnode,
s.str().c_str() );
540template <
typename Architecture_t>
548 R__ASSERT((
size_t) matrix.GetNrows() == rows);
549 R__ASSERT((
size_t) matrix.GetNcols() == cols);
554 std::stringstream matrixStringStream(matrixString);
556 for (
size_t i = 0; i < rows; i++)
558 for (
size_t j = 0; j < cols; j++)
560#ifndef R__HAS_TMVAGPU
561 matrixStringStream >> tmatrix(i,j);
564 matrixStringStream >> value;
565 tmatrix(i,j) = value;
578template <
typename Architecture>
579auto debugTensor(
const typename Architecture::Tensor_t &
A,
const std::string
name =
"tensor") ->
void
581 Architecture::PrintTensor(
A,
name);
include TDocParser_001 C image html pict1_TDocParser_001 png width
Generic General Layer class.
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Tensor_t fOutput
Activations of this layer.
const std::vector< Matrix_t > & GetWeightGradients() const
virtual void SetDropoutProbability(Scalar_t)
Set Dropout probability.
void CopyParameters(const VGeneralLayer< Arch > &layer)
Copy all trainable weight and biases from another equivalent layer but with different architecture Th...
const Matrix_t & GetWeightsAt(size_t i) const
void SetHeight(size_t height)
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Matrix_t & GetBiasesAt(size_t i)
void SetInputHeight(size_t inputHeight)
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
void SetDepth(size_t depth)
virtual void SetExtraLayerParameters(const std::vector< Matrix_t > &)
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
void SetBatchSize(size_t batchSize)
Setters.
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
size_t fBatchSize
Batch size used for training and evaluation.
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
typename Architecture_t::Matrix_t Matrix_t
const std::vector< Matrix_t > & GetBiasGradients() const
void SetInputDepth(size_t inputDepth)
const std::vector< Matrix_t > & GetWeights() const
std::vector< Matrix_t > & GetWeights()
size_t fWidth
The width of this layer.
EInitialization fInit
The initialization method.
std::vector< Matrix_t > fBiases
The biases associated to the layer.
void SetIsTraining(bool isTraining)
size_t fInputWidth
The width of the previous layer or input.
size_t fHeight
The height of the layer.
virtual void Print() const =0
Prints the info about the layer.
size_t fInputDepth
The depth of the previous layer or input.
void SetWidth(size_t width)
bool fIsTraining
Flag indicating the mode.
const Tensor_t & GetOutput() const
const std::vector< Matrix_t > & GetBiases() const
typename Architecture_t::Scalar_t Scalar_t
std::vector< Matrix_t > & GetBiasGradients()
Tensor_t & GetActivationGradients()
std::vector< Matrix_t > fWeights
The weights associated to the layer.
EInitialization GetInitialization() const
Tensor_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Matrix_t & GetWeightsAt(size_t i)
Matrix_t & GetBiasGradientsAt(size_t i)
size_t GetInputDepth() const
const Matrix_t & GetActivationGradientsAt(size_t i) const
std::vector< Matrix_t > & GetBiases()
virtual std::vector< Matrix_t > GetExtraLayerParameters() const
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Matrix_t GetActivationGradientsAt(size_t i)
std::vector< Matrix_t > & GetWeightGradients()
const Tensor_t & GetActivationGradients() const
size_t fInputHeight
The height of the previous layer or input.
size_t fDepth
The depth of the layer.
virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
Backpropagates the error.
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
const Matrix_t & GetBiasesAt(size_t i) const
virtual void ResetTraining()
Reset some training flags after a loop on all batches Some layer (e.g.
size_t GetInputHeight() const
void SetInputWidth(size_t inputWidth)
const Matrix_t & GetBiasGradientsAt(size_t i) const
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
size_t GetBatchSize() const
Getters.
Matrix_t & GetWeightGradientsAt(size_t i)
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
virtual void Forward(Tensor_t &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.
Matrix_t GetOutputAt(size_t i)
const Matrix_t & GetWeightGradientsAt(size_t i) const
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
typename Architecture_t::Tensor_t Tensor_t
virtual ~VGeneralLayer()
Virtual Destructor.
const Matrix_t & GetOutputAt(size_t i) const
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
size_t GetInputWidth() const
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
void Copy(void *source, void *dest)
void Init(TClassEdit::TInterpreterLookupHelper *helper)
static constexpr double s
auto debugTensor(const typename Architecture::Tensor_t &A, const std::string name="tensor") -> void
UInt_t Depth(const Node< T > *node)
create variable transformations